1/*
2 * jsimd_i386.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009-2011, 2013-2014 D. R. Commander
6 *
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc
10 *
11 * This file contains the interface between the "normal" portions
12 * of the library and the SIMD implementations when running on a
13 * 32-bit x86 architecture.
14 */
15
16#define JPEG_INTERNALS
17#include "../jinclude.h"
18#include "../jpeglib.h"
19#include "../jsimd.h"
20#include "../jdct.h"
21#include "../jsimddct.h"
22#include "jsimd.h"
23
24/*
25 * In the PIC cases, we have no guarantee that constants will keep
26 * their alignment. This macro allows us to verify it at runtime.
27 */
28#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
29
30#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
31
32static unsigned int simd_support = ~0;
33
34/*
35 * Check what SIMD accelerations are supported.
36 *
37 * FIXME: This code is racy under a multi-threaded environment.
38 */
39LOCAL(void)
40init_simd (void)
41{
42  char *env = NULL;
43
44  if (simd_support != ~0U)
45    return;
46
47  simd_support = jpeg_simd_cpu_support();
48
49  /* Force different settings through environment variables */
50  env = getenv("JSIMD_FORCEMMX");
51  if ((env != NULL) && (strcmp(env, "1") == 0))
52    simd_support &= JSIMD_MMX;
53  env = getenv("JSIMD_FORCE3DNOW");
54  if ((env != NULL) && (strcmp(env, "1") == 0))
55    simd_support &= JSIMD_3DNOW|JSIMD_MMX;
56  env = getenv("JSIMD_FORCESSE");
57  if ((env != NULL) && (strcmp(env, "1") == 0))
58    simd_support &= JSIMD_SSE|JSIMD_MMX;
59  env = getenv("JSIMD_FORCESSE2");
60  if ((env != NULL) && (strcmp(env, "1") == 0))
61    simd_support &= JSIMD_SSE2;
62  env = getenv("JSIMD_FORCENONE");
63  if ((env != NULL) && (strcmp(env, "1") == 0))
64    simd_support = 0;
65}
66
67GLOBAL(int)
68jsimd_can_rgb_ycc (void)
69{
70  init_simd();
71
72  /* The code is optimised for these values only */
73  if (BITS_IN_JSAMPLE != 8)
74    return 0;
75  if (sizeof(JDIMENSION) != 4)
76    return 0;
77  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
78    return 0;
79
80  if ((simd_support & JSIMD_SSE2) &&
81      IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
82    return 1;
83  if (simd_support & JSIMD_MMX)
84    return 1;
85
86  return 0;
87}
88
89GLOBAL(int)
90jsimd_can_rgb_gray (void)
91{
92  init_simd();
93
94  /* The code is optimised for these values only */
95  if (BITS_IN_JSAMPLE != 8)
96    return 0;
97  if (sizeof(JDIMENSION) != 4)
98    return 0;
99  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
100    return 0;
101
102  if ((simd_support & JSIMD_SSE2) &&
103      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
104    return 1;
105  if (simd_support & JSIMD_MMX)
106    return 1;
107
108  return 0;
109}
110
111GLOBAL(int)
112jsimd_can_ycc_rgb (void)
113{
114  init_simd();
115
116  /* The code is optimised for these values only */
117  if (BITS_IN_JSAMPLE != 8)
118    return 0;
119  if (sizeof(JDIMENSION) != 4)
120    return 0;
121  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
122    return 0;
123
124  if ((simd_support & JSIMD_SSE2) &&
125      IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
126    return 1;
127  if (simd_support & JSIMD_MMX)
128    return 1;
129
130  return 0;
131}
132
133GLOBAL(int)
134jsimd_can_ycc_rgb565 (void)
135{
136  return 0;
137}
138
139GLOBAL(void)
140jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
141                       JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
142                       JDIMENSION output_row, int num_rows)
143{
144  void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
145  void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
146
147  switch(cinfo->in_color_space) {
148    case JCS_EXT_RGB:
149      sse2fct=jsimd_extrgb_ycc_convert_sse2;
150      mmxfct=jsimd_extrgb_ycc_convert_mmx;
151      break;
152    case JCS_EXT_RGBX:
153    case JCS_EXT_RGBA:
154      sse2fct=jsimd_extrgbx_ycc_convert_sse2;
155      mmxfct=jsimd_extrgbx_ycc_convert_mmx;
156      break;
157    case JCS_EXT_BGR:
158      sse2fct=jsimd_extbgr_ycc_convert_sse2;
159      mmxfct=jsimd_extbgr_ycc_convert_mmx;
160      break;
161    case JCS_EXT_BGRX:
162    case JCS_EXT_BGRA:
163      sse2fct=jsimd_extbgrx_ycc_convert_sse2;
164      mmxfct=jsimd_extbgrx_ycc_convert_mmx;
165      break;
166    case JCS_EXT_XBGR:
167    case JCS_EXT_ABGR:
168      sse2fct=jsimd_extxbgr_ycc_convert_sse2;
169      mmxfct=jsimd_extxbgr_ycc_convert_mmx;
170      break;
171    case JCS_EXT_XRGB:
172    case JCS_EXT_ARGB:
173      sse2fct=jsimd_extxrgb_ycc_convert_sse2;
174      mmxfct=jsimd_extxrgb_ycc_convert_mmx;
175      break;
176    default:
177      sse2fct=jsimd_rgb_ycc_convert_sse2;
178      mmxfct=jsimd_rgb_ycc_convert_mmx;
179      break;
180  }
181
182  if ((simd_support & JSIMD_SSE2) &&
183      IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
184    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
185  else if (simd_support & JSIMD_MMX)
186    mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
187}
188
189GLOBAL(void)
190jsimd_rgb_gray_convert (j_compress_ptr cinfo,
191                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
192                        JDIMENSION output_row, int num_rows)
193{
194  void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
195  void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
196
197  switch(cinfo->in_color_space) {
198    case JCS_EXT_RGB:
199      sse2fct=jsimd_extrgb_gray_convert_sse2;
200      mmxfct=jsimd_extrgb_gray_convert_mmx;
201      break;
202    case JCS_EXT_RGBX:
203    case JCS_EXT_RGBA:
204      sse2fct=jsimd_extrgbx_gray_convert_sse2;
205      mmxfct=jsimd_extrgbx_gray_convert_mmx;
206      break;
207    case JCS_EXT_BGR:
208      sse2fct=jsimd_extbgr_gray_convert_sse2;
209      mmxfct=jsimd_extbgr_gray_convert_mmx;
210      break;
211    case JCS_EXT_BGRX:
212    case JCS_EXT_BGRA:
213      sse2fct=jsimd_extbgrx_gray_convert_sse2;
214      mmxfct=jsimd_extbgrx_gray_convert_mmx;
215      break;
216    case JCS_EXT_XBGR:
217    case JCS_EXT_ABGR:
218      sse2fct=jsimd_extxbgr_gray_convert_sse2;
219      mmxfct=jsimd_extxbgr_gray_convert_mmx;
220      break;
221    case JCS_EXT_XRGB:
222    case JCS_EXT_ARGB:
223      sse2fct=jsimd_extxrgb_gray_convert_sse2;
224      mmxfct=jsimd_extxrgb_gray_convert_mmx;
225      break;
226    default:
227      sse2fct=jsimd_rgb_gray_convert_sse2;
228      mmxfct=jsimd_rgb_gray_convert_mmx;
229      break;
230  }
231
232  if ((simd_support & JSIMD_SSE2) &&
233      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
234    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
235  else if (simd_support & JSIMD_MMX)
236    mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
237}
238
239GLOBAL(void)
240jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
241                       JSAMPIMAGE input_buf, JDIMENSION input_row,
242                       JSAMPARRAY output_buf, int num_rows)
243{
244  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
245  void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
246
247  switch(cinfo->out_color_space) {
248    case JCS_EXT_RGB:
249      sse2fct=jsimd_ycc_extrgb_convert_sse2;
250      mmxfct=jsimd_ycc_extrgb_convert_mmx;
251      break;
252    case JCS_EXT_RGBX:
253    case JCS_EXT_RGBA:
254      sse2fct=jsimd_ycc_extrgbx_convert_sse2;
255      mmxfct=jsimd_ycc_extrgbx_convert_mmx;
256      break;
257    case JCS_EXT_BGR:
258      sse2fct=jsimd_ycc_extbgr_convert_sse2;
259      mmxfct=jsimd_ycc_extbgr_convert_mmx;
260      break;
261    case JCS_EXT_BGRX:
262    case JCS_EXT_BGRA:
263      sse2fct=jsimd_ycc_extbgrx_convert_sse2;
264      mmxfct=jsimd_ycc_extbgrx_convert_mmx;
265      break;
266    case JCS_EXT_XBGR:
267    case JCS_EXT_ABGR:
268      sse2fct=jsimd_ycc_extxbgr_convert_sse2;
269      mmxfct=jsimd_ycc_extxbgr_convert_mmx;
270      break;
271    case JCS_EXT_XRGB:
272    case JCS_EXT_ARGB:
273      sse2fct=jsimd_ycc_extxrgb_convert_sse2;
274      mmxfct=jsimd_ycc_extxrgb_convert_mmx;
275      break;
276    default:
277      sse2fct=jsimd_ycc_rgb_convert_sse2;
278      mmxfct=jsimd_ycc_rgb_convert_mmx;
279      break;
280  }
281
282  if ((simd_support & JSIMD_SSE2) &&
283      IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
284    sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
285  else if (simd_support & JSIMD_MMX)
286    mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
287}
288
289GLOBAL(void)
290jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
291                          JSAMPIMAGE input_buf, JDIMENSION input_row,
292                          JSAMPARRAY output_buf, int num_rows)
293{
294}
295
296GLOBAL(int)
297jsimd_can_h2v2_downsample (void)
298{
299  init_simd();
300
301  /* The code is optimised for these values only */
302  if (BITS_IN_JSAMPLE != 8)
303    return 0;
304  if (sizeof(JDIMENSION) != 4)
305    return 0;
306
307  if (simd_support & JSIMD_SSE2)
308    return 1;
309  if (simd_support & JSIMD_MMX)
310    return 1;
311
312  return 0;
313}
314
315GLOBAL(int)
316jsimd_can_h2v1_downsample (void)
317{
318  init_simd();
319
320  /* The code is optimised for these values only */
321  if (BITS_IN_JSAMPLE != 8)
322    return 0;
323  if (sizeof(JDIMENSION) != 4)
324    return 0;
325
326  if (simd_support & JSIMD_SSE2)
327    return 1;
328  if (simd_support & JSIMD_MMX)
329    return 1;
330
331  return 0;
332}
333
334GLOBAL(void)
335jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
336                       JSAMPARRAY input_data, JSAMPARRAY output_data)
337{
338  if (simd_support & JSIMD_SSE2)
339    jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
340                               compptr->v_samp_factor,
341                               compptr->width_in_blocks, input_data,
342                               output_data);
343  else if (simd_support & JSIMD_MMX)
344    jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
345                              compptr->v_samp_factor, compptr->width_in_blocks,
346                              input_data, output_data);
347}
348
349GLOBAL(void)
350jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
351                       JSAMPARRAY input_data, JSAMPARRAY output_data)
352{
353  if (simd_support & JSIMD_SSE2)
354    jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
355                               compptr->v_samp_factor,
356                               compptr->width_in_blocks, input_data,
357                               output_data);
358  else if (simd_support & JSIMD_MMX)
359    jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
360                              compptr->v_samp_factor, compptr->width_in_blocks,
361                              input_data, output_data);
362}
363
364GLOBAL(int)
365jsimd_can_h2v2_upsample (void)
366{
367  init_simd();
368
369  /* The code is optimised for these values only */
370  if (BITS_IN_JSAMPLE != 8)
371    return 0;
372  if (sizeof(JDIMENSION) != 4)
373    return 0;
374
375  if (simd_support & JSIMD_SSE2)
376    return 1;
377  if (simd_support & JSIMD_MMX)
378    return 1;
379
380  return 0;
381}
382
383GLOBAL(int)
384jsimd_can_h2v1_upsample (void)
385{
386  init_simd();
387
388  /* The code is optimised for these values only */
389  if (BITS_IN_JSAMPLE != 8)
390    return 0;
391  if (sizeof(JDIMENSION) != 4)
392    return 0;
393
394  if (simd_support & JSIMD_SSE2)
395    return 1;
396  if (simd_support & JSIMD_MMX)
397    return 1;
398
399  return 0;
400}
401
402GLOBAL(void)
403jsimd_h2v2_upsample (j_decompress_ptr cinfo,
404                     jpeg_component_info * compptr,
405                     JSAMPARRAY input_data,
406                     JSAMPARRAY * output_data_ptr)
407{
408  if (simd_support & JSIMD_SSE2)
409    jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
410                             input_data, output_data_ptr);
411  else if (simd_support & JSIMD_MMX)
412    jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
413                            input_data, output_data_ptr);
414}
415
416GLOBAL(void)
417jsimd_h2v1_upsample (j_decompress_ptr cinfo,
418                     jpeg_component_info * compptr,
419                     JSAMPARRAY input_data,
420                     JSAMPARRAY * output_data_ptr)
421{
422  if (simd_support & JSIMD_SSE2)
423    jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
424                             input_data, output_data_ptr);
425  else if (simd_support & JSIMD_MMX)
426    jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
427                            input_data, output_data_ptr);
428}
429
430GLOBAL(int)
431jsimd_can_h2v2_fancy_upsample (void)
432{
433  init_simd();
434
435  /* The code is optimised for these values only */
436  if (BITS_IN_JSAMPLE != 8)
437    return 0;
438  if (sizeof(JDIMENSION) != 4)
439    return 0;
440
441  if ((simd_support & JSIMD_SSE2) &&
442      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
443    return 1;
444  if (simd_support & JSIMD_MMX)
445    return 1;
446
447  return 0;
448}
449
450GLOBAL(int)
451jsimd_can_h2v1_fancy_upsample (void)
452{
453  init_simd();
454
455  /* The code is optimised for these values only */
456  if (BITS_IN_JSAMPLE != 8)
457    return 0;
458  if (sizeof(JDIMENSION) != 4)
459    return 0;
460
461  if ((simd_support & JSIMD_SSE2) &&
462      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
463    return 1;
464  if (simd_support & JSIMD_MMX)
465    return 1;
466
467  return 0;
468}
469
470GLOBAL(void)
471jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
472                           jpeg_component_info * compptr,
473                           JSAMPARRAY input_data,
474                           JSAMPARRAY * output_data_ptr)
475{
476  if ((simd_support & JSIMD_SSE2) &&
477      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
478    jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
479                                   compptr->downsampled_width, input_data,
480                                   output_data_ptr);
481  else if (simd_support & JSIMD_MMX)
482    jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
483                                  compptr->downsampled_width, input_data,
484                                  output_data_ptr);
485}
486
487GLOBAL(void)
488jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
489                           jpeg_component_info * compptr,
490                           JSAMPARRAY input_data,
491                           JSAMPARRAY * output_data_ptr)
492{
493  if ((simd_support & JSIMD_SSE2) &&
494      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
495    jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
496                                   compptr->downsampled_width, input_data,
497                                   output_data_ptr);
498  else if (simd_support & JSIMD_MMX)
499    jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
500                                  compptr->downsampled_width, input_data,
501                                  output_data_ptr);
502}
503
504GLOBAL(int)
505jsimd_can_h2v2_merged_upsample (void)
506{
507  init_simd();
508
509  /* The code is optimised for these values only */
510  if (BITS_IN_JSAMPLE != 8)
511    return 0;
512  if (sizeof(JDIMENSION) != 4)
513    return 0;
514
515  if ((simd_support & JSIMD_SSE2) &&
516      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
517    return 1;
518  if (simd_support & JSIMD_MMX)
519    return 1;
520
521  return 0;
522}
523
524GLOBAL(int)
525jsimd_can_h2v1_merged_upsample (void)
526{
527  init_simd();
528
529  /* The code is optimised for these values only */
530  if (BITS_IN_JSAMPLE != 8)
531    return 0;
532  if (sizeof(JDIMENSION) != 4)
533    return 0;
534
535  if ((simd_support & JSIMD_SSE2) &&
536      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
537    return 1;
538  if (simd_support & JSIMD_MMX)
539    return 1;
540
541  return 0;
542}
543
544GLOBAL(void)
545jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
546                            JSAMPIMAGE input_buf,
547                            JDIMENSION in_row_group_ctr,
548                            JSAMPARRAY output_buf)
549{
550  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
551  void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
552
553  switch(cinfo->out_color_space) {
554    case JCS_EXT_RGB:
555      sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
556      mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
557      break;
558    case JCS_EXT_RGBX:
559    case JCS_EXT_RGBA:
560      sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
561      mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
562      break;
563    case JCS_EXT_BGR:
564      sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
565      mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
566      break;
567    case JCS_EXT_BGRX:
568    case JCS_EXT_BGRA:
569      sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
570      mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
571      break;
572    case JCS_EXT_XBGR:
573    case JCS_EXT_ABGR:
574      sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
575      mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
576      break;
577    case JCS_EXT_XRGB:
578    case JCS_EXT_ARGB:
579      sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
580      mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
581      break;
582    default:
583      sse2fct=jsimd_h2v2_merged_upsample_sse2;
584      mmxfct=jsimd_h2v2_merged_upsample_mmx;
585      break;
586  }
587
588  if ((simd_support & JSIMD_SSE2) &&
589      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
590    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
591  else if (simd_support & JSIMD_MMX)
592    mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
593}
594
595GLOBAL(void)
596jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
597                            JSAMPIMAGE input_buf,
598                            JDIMENSION in_row_group_ctr,
599                            JSAMPARRAY output_buf)
600{
601  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
602  void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
603
604  switch(cinfo->out_color_space) {
605    case JCS_EXT_RGB:
606      sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
607      mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
608      break;
609    case JCS_EXT_RGBX:
610    case JCS_EXT_RGBA:
611      sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
612      mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
613      break;
614    case JCS_EXT_BGR:
615      sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
616      mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
617      break;
618    case JCS_EXT_BGRX:
619    case JCS_EXT_BGRA:
620      sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
621      mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
622      break;
623    case JCS_EXT_XBGR:
624    case JCS_EXT_ABGR:
625      sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
626      mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
627      break;
628    case JCS_EXT_XRGB:
629    case JCS_EXT_ARGB:
630      sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
631      mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
632      break;
633    default:
634      sse2fct=jsimd_h2v1_merged_upsample_sse2;
635      mmxfct=jsimd_h2v1_merged_upsample_mmx;
636      break;
637  }
638
639  if ((simd_support & JSIMD_SSE2) &&
640      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
641    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
642  else if (simd_support & JSIMD_MMX)
643    mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
644}
645
646GLOBAL(int)
647jsimd_can_convsamp (void)
648{
649  init_simd();
650
651  /* The code is optimised for these values only */
652  if (DCTSIZE != 8)
653    return 0;
654  if (BITS_IN_JSAMPLE != 8)
655    return 0;
656  if (sizeof(JDIMENSION) != 4)
657    return 0;
658  if (sizeof(DCTELEM) != 2)
659    return 0;
660
661  if (simd_support & JSIMD_SSE2)
662    return 1;
663  if (simd_support & JSIMD_MMX)
664    return 1;
665
666  return 0;
667}
668
669GLOBAL(int)
670jsimd_can_convsamp_float (void)
671{
672  init_simd();
673
674  /* The code is optimised for these values only */
675  if (DCTSIZE != 8)
676    return 0;
677  if (BITS_IN_JSAMPLE != 8)
678    return 0;
679  if (sizeof(JDIMENSION) != 4)
680    return 0;
681  if (sizeof(FAST_FLOAT) != 4)
682    return 0;
683
684  if (simd_support & JSIMD_SSE2)
685    return 1;
686  if (simd_support & JSIMD_SSE)
687    return 1;
688  if (simd_support & JSIMD_3DNOW)
689    return 1;
690
691  return 0;
692}
693
694GLOBAL(void)
695jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
696                DCTELEM * workspace)
697{
698  if (simd_support & JSIMD_SSE2)
699    jsimd_convsamp_sse2(sample_data, start_col, workspace);
700  else if (simd_support & JSIMD_MMX)
701    jsimd_convsamp_mmx(sample_data, start_col, workspace);
702}
703
704GLOBAL(void)
705jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
706                      FAST_FLOAT * workspace)
707{
708  if (simd_support & JSIMD_SSE2)
709    jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
710  else if (simd_support & JSIMD_SSE)
711    jsimd_convsamp_float_sse(sample_data, start_col, workspace);
712  else if (simd_support & JSIMD_3DNOW)
713    jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
714}
715
716GLOBAL(int)
717jsimd_can_fdct_islow (void)
718{
719  init_simd();
720
721  /* The code is optimised for these values only */
722  if (DCTSIZE != 8)
723    return 0;
724  if (sizeof(DCTELEM) != 2)
725    return 0;
726
727  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
728    return 1;
729  if (simd_support & JSIMD_MMX)
730    return 1;
731
732  return 0;
733}
734
735GLOBAL(int)
736jsimd_can_fdct_ifast (void)
737{
738  init_simd();
739
740  /* The code is optimised for these values only */
741  if (DCTSIZE != 8)
742    return 0;
743  if (sizeof(DCTELEM) != 2)
744    return 0;
745
746  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
747    return 1;
748  if (simd_support & JSIMD_MMX)
749    return 1;
750
751  return 0;
752}
753
754GLOBAL(int)
755jsimd_can_fdct_float (void)
756{
757  init_simd();
758
759  /* The code is optimised for these values only */
760  if (DCTSIZE != 8)
761    return 0;
762  if (sizeof(FAST_FLOAT) != 4)
763    return 0;
764
765  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
766    return 1;
767  if (simd_support & JSIMD_3DNOW)
768    return 1;
769
770  return 0;
771}
772
773GLOBAL(void)
774jsimd_fdct_islow (DCTELEM * data)
775{
776  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
777    jsimd_fdct_islow_sse2(data);
778  else if (simd_support & JSIMD_MMX)
779    jsimd_fdct_islow_mmx(data);
780}
781
782GLOBAL(void)
783jsimd_fdct_ifast (DCTELEM * data)
784{
785  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
786    jsimd_fdct_ifast_sse2(data);
787  else if (simd_support & JSIMD_MMX)
788    jsimd_fdct_ifast_mmx(data);
789}
790
791GLOBAL(void)
792jsimd_fdct_float (FAST_FLOAT * data)
793{
794  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
795    jsimd_fdct_float_sse(data);
796  else if (simd_support & JSIMD_3DNOW)
797    jsimd_fdct_float_3dnow(data);
798}
799
800GLOBAL(int)
801jsimd_can_quantize (void)
802{
803  init_simd();
804
805  /* The code is optimised for these values only */
806  if (DCTSIZE != 8)
807    return 0;
808  if (sizeof(JCOEF) != 2)
809    return 0;
810  if (sizeof(DCTELEM) != 2)
811    return 0;
812
813  if (simd_support & JSIMD_SSE2)
814    return 1;
815  if (simd_support & JSIMD_MMX)
816    return 1;
817
818  return 0;
819}
820
821GLOBAL(int)
822jsimd_can_quantize_float (void)
823{
824  init_simd();
825
826  /* The code is optimised for these values only */
827  if (DCTSIZE != 8)
828    return 0;
829  if (sizeof(JCOEF) != 2)
830    return 0;
831  if (sizeof(FAST_FLOAT) != 4)
832    return 0;
833
834  if (simd_support & JSIMD_SSE2)
835    return 1;
836  if (simd_support & JSIMD_SSE)
837    return 1;
838  if (simd_support & JSIMD_3DNOW)
839    return 1;
840
841  return 0;
842}
843
844GLOBAL(void)
845jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
846                DCTELEM * workspace)
847{
848  if (simd_support & JSIMD_SSE2)
849    jsimd_quantize_sse2(coef_block, divisors, workspace);
850  else if (simd_support & JSIMD_MMX)
851    jsimd_quantize_mmx(coef_block, divisors, workspace);
852}
853
854GLOBAL(void)
855jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
856                      FAST_FLOAT * workspace)
857{
858  if (simd_support & JSIMD_SSE2)
859    jsimd_quantize_float_sse2(coef_block, divisors, workspace);
860  else if (simd_support & JSIMD_SSE)
861    jsimd_quantize_float_sse(coef_block, divisors, workspace);
862  else if (simd_support & JSIMD_3DNOW)
863    jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
864}
865
866GLOBAL(int)
867jsimd_can_idct_2x2 (void)
868{
869  init_simd();
870
871  /* The code is optimised for these values only */
872  if (DCTSIZE != 8)
873    return 0;
874  if (sizeof(JCOEF) != 2)
875    return 0;
876  if (BITS_IN_JSAMPLE != 8)
877    return 0;
878  if (sizeof(JDIMENSION) != 4)
879    return 0;
880  if (sizeof(ISLOW_MULT_TYPE) != 2)
881    return 0;
882
883  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
884    return 1;
885  if (simd_support & JSIMD_MMX)
886    return 1;
887
888  return 0;
889}
890
891GLOBAL(int)
892jsimd_can_idct_4x4 (void)
893{
894  init_simd();
895
896  /* The code is optimised for these values only */
897  if (DCTSIZE != 8)
898    return 0;
899  if (sizeof(JCOEF) != 2)
900    return 0;
901  if (BITS_IN_JSAMPLE != 8)
902    return 0;
903  if (sizeof(JDIMENSION) != 4)
904    return 0;
905  if (sizeof(ISLOW_MULT_TYPE) != 2)
906    return 0;
907
908  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
909    return 1;
910  if (simd_support & JSIMD_MMX)
911    return 1;
912
913  return 0;
914}
915
916GLOBAL(void)
917jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
918                JCOEFPTR coef_block, JSAMPARRAY output_buf,
919                JDIMENSION output_col)
920{
921  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
922    jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
923                        output_col);
924  else if (simd_support & JSIMD_MMX)
925    jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
926}
927
928GLOBAL(void)
929jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
930                JCOEFPTR coef_block, JSAMPARRAY output_buf,
931                JDIMENSION output_col)
932{
933  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
934    jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
935                        output_col);
936  else if (simd_support & JSIMD_MMX)
937    jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
938}
939
940GLOBAL(int)
941jsimd_can_idct_islow (void)
942{
943  init_simd();
944
945  /* The code is optimised for these values only */
946  if (DCTSIZE != 8)
947    return 0;
948  if (sizeof(JCOEF) != 2)
949    return 0;
950  if (BITS_IN_JSAMPLE != 8)
951    return 0;
952  if (sizeof(JDIMENSION) != 4)
953    return 0;
954  if (sizeof(ISLOW_MULT_TYPE) != 2)
955    return 0;
956
957  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
958    return 1;
959  if (simd_support & JSIMD_MMX)
960    return 1;
961
962  return 0;
963}
964
965GLOBAL(int)
966jsimd_can_idct_ifast (void)
967{
968  init_simd();
969
970  /* The code is optimised for these values only */
971  if (DCTSIZE != 8)
972    return 0;
973  if (sizeof(JCOEF) != 2)
974    return 0;
975  if (BITS_IN_JSAMPLE != 8)
976    return 0;
977  if (sizeof(JDIMENSION) != 4)
978    return 0;
979  if (sizeof(IFAST_MULT_TYPE) != 2)
980    return 0;
981  if (IFAST_SCALE_BITS != 2)
982    return 0;
983
984  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
985    return 1;
986  if (simd_support & JSIMD_MMX)
987    return 1;
988
989  return 0;
990}
991
992GLOBAL(int)
993jsimd_can_idct_float (void)
994{
995  init_simd();
996
997  if (DCTSIZE != 8)
998    return 0;
999  if (sizeof(JCOEF) != 2)
1000    return 0;
1001  if (BITS_IN_JSAMPLE != 8)
1002    return 0;
1003  if (sizeof(JDIMENSION) != 4)
1004    return 0;
1005  if (sizeof(FAST_FLOAT) != 4)
1006    return 0;
1007  if (sizeof(FLOAT_MULT_TYPE) != 4)
1008    return 0;
1009
1010  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1011    return 1;
1012  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1013    return 1;
1014  if (simd_support & JSIMD_3DNOW)
1015    return 1;
1016
1017  return 0;
1018}
1019
1020GLOBAL(void)
1021jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1022                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
1023                  JDIMENSION output_col)
1024{
1025  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
1026    jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1027                          output_col);
1028  else if (simd_support & JSIMD_MMX)
1029    jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
1030                         output_col);
1031}
1032
1033GLOBAL(void)
1034jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1035                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
1036                  JDIMENSION output_col)
1037{
1038  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1039    jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1040                          output_col);
1041  else if (simd_support & JSIMD_MMX)
1042    jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
1043                         output_col);
1044}
1045
1046GLOBAL(void)
1047jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1048                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
1049                  JDIMENSION output_col)
1050{
1051  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1052    jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1053                          output_col);
1054  else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1055    jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
1056                         output_col);
1057  else if (simd_support & JSIMD_3DNOW)
1058    jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
1059                           output_col);
1060}
1061
1062