jsimd_mips.c revision a6b7fbd3521a88305897cbea7db4d0eef9e0ec55
1/*
2 * jsimd_mips.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009-2011 D. R. Commander
6 * Copyright (C) 2013, MIPS Technologies, Inc., California
7 *
8 * Based on the x86 SIMD extension for IJG JPEG library,
9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11 *
12 * This file contains the interface between the "normal" portions
13 * of the library and the SIMD implementations when running on
14 * MIPS architecture.
15 *
16 * Based on the stubs from 'jsimd_none.c'
17 */
18
19#define JPEG_INTERNALS
20#include "../jinclude.h"
21#include "../jpeglib.h"
22#include "../jsimd.h"
23#include "../jdct.h"
24#include "../jsimddct.h"
25#include "jsimd.h"
26
27#include <stdio.h>
28#include <string.h>
29#include <ctype.h>
30
31static unsigned int simd_support = ~0;
32
33#if defined(__linux__)
34
35LOCAL(int)
36parse_proc_cpuinfo(const char* search_string)
37{
38  const char* file_name = "/proc/cpuinfo";
39  char cpuinfo_line[256];
40  FILE* f = NULL;
41  simd_support = 0;
42
43  if ((f = fopen(file_name, "r")) != NULL) {
44    while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f) != NULL) {
45      if (strstr(cpuinfo_line, search_string) != NULL) {
46        fclose(f);
47        simd_support |= JSIMD_MIPS_DSPR2;
48        return 1;
49      }
50    }
51    fclose(f);
52  }
53  /* Did not find string in the proc file, or not Linux ELF. */
54  return 0;
55}
56#endif
57
58/*
59 * Check what SIMD accelerations are supported.
60 *
61 * FIXME: This code is racy under a multi-threaded environment.
62 */
63LOCAL(void)
64init_simd (void)
65{
66  if (simd_support != ~0U)
67    return;
68
69  simd_support = 0;
70
71#if defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
72  simd_support |= JSIMD_MIPS_DSPR2;
73#elif defined(__linux__)
74  /* We still have a chance to use MIPS DSPR2 regardless of globally used
75   * -mdspr2 options passed to gcc by performing runtime detection via
76   * /proc/cpuinfo parsing on linux */
77  if (!parse_proc_cpuinfo("MIPS 74K"))
78    return;
79#endif
80}
81
82GLOBAL(int)
83jsimd_can_rgb_ycc (void)
84{
85  init_simd();
86
87  /* The code is optimised for these values only */
88  if (BITS_IN_JSAMPLE != 8)
89    return 0;
90  if (sizeof(JDIMENSION) != 4)
91    return 0;
92  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
93    return 0;
94  if (simd_support & JSIMD_MIPS_DSPR2)
95    return 1;
96
97  return 0;
98}
99
100GLOBAL(int)
101jsimd_can_rgb_gray (void)
102{
103  init_simd();
104
105  /* The code is optimised for these values only */
106  if (BITS_IN_JSAMPLE != 8)
107    return 0;
108  if (sizeof(JDIMENSION) != 4)
109    return 0;
110  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
111    return 0;
112  if (simd_support & JSIMD_MIPS_DSPR2)
113    return 1;
114
115  return 0;
116}
117
118GLOBAL(int)
119jsimd_can_ycc_rgb (void)
120{
121  init_simd();
122
123  /* The code is optimised for these values only */
124  if (BITS_IN_JSAMPLE != 8)
125    return 0;
126  if (sizeof(JDIMENSION) != 4)
127    return 0;
128  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
129    return 0;
130  if (simd_support & JSIMD_MIPS_DSPR2)
131    return 1;
132
133  return 0;
134}
135
136GLOBAL(void)
137jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
138                       JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
139                       JDIMENSION output_row, int num_rows)
140{
141  void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
142  switch(cinfo->in_color_space)
143  {
144    case JCS_EXT_RGB:
145      mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2;
146      break;
147    case JCS_EXT_RGBX:
148    case JCS_EXT_RGBA:
149      mipsdspr2fct=jsimd_extrgbx_ycc_convert_mips_dspr2;
150      break;
151    case JCS_EXT_BGR:
152      mipsdspr2fct=jsimd_extbgr_ycc_convert_mips_dspr2;
153      break;
154    case JCS_EXT_BGRX:
155    case JCS_EXT_BGRA:
156      mipsdspr2fct=jsimd_extbgrx_ycc_convert_mips_dspr2;
157      break;
158    case JCS_EXT_XBGR:
159    case JCS_EXT_ABGR:
160      mipsdspr2fct=jsimd_extxbgr_ycc_convert_mips_dspr2;
161
162      break;
163    case JCS_EXT_XRGB:
164    case JCS_EXT_ARGB:
165      mipsdspr2fct=jsimd_extxrgb_ycc_convert_mips_dspr2;
166      break;
167    default:
168      mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2;
169      break;
170  }
171
172  if (simd_support & JSIMD_MIPS_DSPR2)
173    mipsdspr2fct(cinfo->image_width, input_buf,
174        output_buf, output_row, num_rows);
175}
176
177GLOBAL(void)
178jsimd_rgb_gray_convert (j_compress_ptr cinfo,
179                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
180                        JDIMENSION output_row, int num_rows)
181{
182  void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
183  switch(cinfo->in_color_space)
184  {
185    case JCS_EXT_RGB:
186      mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2;
187      break;
188    case JCS_EXT_RGBX:
189    case JCS_EXT_RGBA:
190      mipsdspr2fct=jsimd_extrgbx_gray_convert_mips_dspr2;
191      break;
192    case JCS_EXT_BGR:
193      mipsdspr2fct=jsimd_extbgr_gray_convert_mips_dspr2;
194      break;
195    case JCS_EXT_BGRX:
196    case JCS_EXT_BGRA:
197      mipsdspr2fct=jsimd_extbgrx_gray_convert_mips_dspr2;
198      break;
199    case JCS_EXT_XBGR:
200    case JCS_EXT_ABGR:
201      mipsdspr2fct=jsimd_extxbgr_gray_convert_mips_dspr2;
202      break;
203    case JCS_EXT_XRGB:
204    case JCS_EXT_ARGB:
205      mipsdspr2fct=jsimd_extxrgb_gray_convert_mips_dspr2;
206      break;
207    default:
208      mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2;
209      break;
210  }
211
212  if (simd_support & JSIMD_MIPS_DSPR2)
213    mipsdspr2fct(cinfo->image_width, input_buf,
214        output_buf, output_row, num_rows);
215
216}
217
218GLOBAL(void)
219jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
220                       JSAMPIMAGE input_buf, JDIMENSION input_row,
221                       JSAMPARRAY output_buf, int num_rows)
222{
223  void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
224
225  switch(cinfo->out_color_space)
226  {
227    case JCS_EXT_RGB:
228      mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2;
229      break;
230    case JCS_EXT_RGBX:
231    case JCS_EXT_RGBA:
232      mipsdspr2fct=jsimd_ycc_extrgbx_convert_mips_dspr2;
233      break;
234    case JCS_EXT_BGR:
235      mipsdspr2fct=jsimd_ycc_extbgr_convert_mips_dspr2;
236      break;
237    case JCS_EXT_BGRX:
238    case JCS_EXT_BGRA:
239      mipsdspr2fct=jsimd_ycc_extbgrx_convert_mips_dspr2;
240      break;
241    case JCS_EXT_XBGR:
242    case JCS_EXT_ABGR:
243      mipsdspr2fct=jsimd_ycc_extxbgr_convert_mips_dspr2;
244      break;
245    case JCS_EXT_XRGB:
246    case JCS_EXT_ARGB:
247      mipsdspr2fct=jsimd_ycc_extxrgb_convert_mips_dspr2;
248      break;
249  default:
250      mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2;
251      break;
252  }
253
254  if (simd_support & JSIMD_MIPS_DSPR2)
255    mipsdspr2fct(cinfo->output_width, input_buf,
256        input_row, output_buf, num_rows);
257}
258
259GLOBAL(int)
260jsimd_can_h2v2_downsample (void)
261{
262  init_simd();
263
264  /* The code is optimised for these values only */
265  if (BITS_IN_JSAMPLE != 8)
266    return 0;
267  if (sizeof(JDIMENSION) != 4)
268    return 0;
269  if (simd_support & JSIMD_MIPS_DSPR2)
270    return 1;
271
272  return 0;
273}
274
275GLOBAL(int)
276jsimd_can_h2v1_downsample (void)
277{
278  init_simd();
279
280  /* The code is optimised for these values only */
281  if (BITS_IN_JSAMPLE != 8)
282    return 0;
283  if (sizeof(JDIMENSION) != 4)
284    return 0;
285  if (simd_support & JSIMD_MIPS_DSPR2)
286    return 1;
287
288  return 0;
289}
290
291GLOBAL(void)
292jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
293                       JSAMPARRAY input_data, JSAMPARRAY output_data)
294{
295  if (simd_support & JSIMD_MIPS_DSPR2)
296    jsimd_h2v2_downsample_mips_dspr2(cinfo->image_width,
297        cinfo->max_v_samp_factor, compptr->v_samp_factor,
298        compptr->width_in_blocks, input_data, output_data);
299}
300
301GLOBAL(void)
302jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
303                       JSAMPARRAY input_data, JSAMPARRAY output_data)
304{
305  if (simd_support & JSIMD_MIPS_DSPR2)
306    jsimd_h2v1_downsample_mips_dspr2(cinfo->image_width,
307        cinfo->max_v_samp_factor, compptr->v_samp_factor,
308        compptr->width_in_blocks, input_data, output_data);
309}
310
311GLOBAL(int)
312jsimd_can_h2v2_upsample (void)
313{
314  init_simd();
315
316  /* The code is optimised for these values only */
317  if (BITS_IN_JSAMPLE != 8)
318    return 0;
319  if (sizeof(JDIMENSION) != 4)
320    return 0;
321  if (simd_support & JSIMD_MIPS_DSPR2)
322    return 1;
323
324  return 0;
325}
326
327GLOBAL(int)
328jsimd_can_h2v1_upsample (void)
329{
330  init_simd();
331
332  /* The code is optimised for these values only */
333  if (BITS_IN_JSAMPLE != 8)
334    return 0;
335  if (sizeof(JDIMENSION) != 4)
336    return 0;
337  if (simd_support & JSIMD_MIPS_DSPR2)
338    return 1;
339
340  return 0;
341}
342
343GLOBAL(void)
344jsimd_h2v2_upsample (j_decompress_ptr cinfo,
345                     jpeg_component_info * compptr,
346                     JSAMPARRAY input_data,
347                     JSAMPARRAY * output_data_ptr)
348{
349  if (simd_support & JSIMD_MIPS_DSPR2)
350    jsimd_h2v2_upsample_mips_dspr2(cinfo->max_v_samp_factor,
351        cinfo->output_width, input_data, output_data_ptr);
352}
353
354GLOBAL(void)
355jsimd_h2v1_upsample (j_decompress_ptr cinfo,
356                     jpeg_component_info * compptr,
357                     JSAMPARRAY input_data,
358                     JSAMPARRAY * output_data_ptr)
359{
360  if (simd_support & JSIMD_MIPS_DSPR2)
361    jsimd_h2v1_upsample_mips_dspr2(cinfo->max_v_samp_factor,
362        cinfo->output_width, input_data, output_data_ptr);
363}
364
365GLOBAL(int)
366jsimd_can_h2v2_fancy_upsample (void)
367{
368  init_simd();
369
370  /* The code is optimised for these values only */
371  if (BITS_IN_JSAMPLE != 8)
372    return 0;
373  if (sizeof(JDIMENSION) != 4)
374    return 0;
375  if (simd_support & JSIMD_MIPS_DSPR2)
376    return 1;
377
378  return 0;
379}
380
381GLOBAL(int)
382jsimd_can_h2v1_fancy_upsample (void)
383{
384  init_simd();
385
386  /* The code is optimised for these values only */
387  if (BITS_IN_JSAMPLE != 8)
388    return 0;
389  if (sizeof(JDIMENSION) != 4)
390    return 0;
391  if (simd_support & JSIMD_MIPS_DSPR2)
392    return 1;
393
394  return 0;
395}
396
397GLOBAL(void)
398jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
399                           jpeg_component_info * compptr,
400                           JSAMPARRAY input_data,
401                           JSAMPARRAY * output_data_ptr)
402{
403  if (simd_support & JSIMD_MIPS_DSPR2)
404    jsimd_h2v2_fancy_upsample_mips_dspr2(cinfo->max_v_samp_factor,
405        compptr->downsampled_width, input_data, output_data_ptr);
406}
407
408GLOBAL(void)
409jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
410                           jpeg_component_info * compptr,
411                           JSAMPARRAY input_data,
412                           JSAMPARRAY * output_data_ptr)
413{
414  if (simd_support & JSIMD_MIPS_DSPR2)
415    jsimd_h2v1_fancy_upsample_mips_dspr2(cinfo->max_v_samp_factor,
416        compptr->downsampled_width, input_data, output_data_ptr);
417}
418
419GLOBAL(int)
420jsimd_can_h2v2_merged_upsample (void)
421{
422  return 0;
423}
424
425GLOBAL(int)
426jsimd_can_h2v1_merged_upsample (void)
427{
428  return 0;
429}
430
431GLOBAL(void)
432jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
433                            JSAMPIMAGE input_buf,
434                            JDIMENSION in_row_group_ctr,
435                            JSAMPARRAY output_buf)
436{
437}
438
439GLOBAL(void)
440jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
441                            JSAMPIMAGE input_buf,
442                            JDIMENSION in_row_group_ctr,
443                            JSAMPARRAY output_buf)
444{
445}
446
447GLOBAL(int)
448jsimd_can_convsamp (void)
449{
450  return 0;
451}
452
453GLOBAL(int)
454jsimd_can_convsamp_float (void)
455{
456  return 0;
457}
458
459GLOBAL(void)
460jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
461                DCTELEM * workspace)
462{
463}
464
465GLOBAL(void)
466jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
467                      FAST_FLOAT * workspace)
468{
469}
470
471GLOBAL(int)
472jsimd_can_fdct_islow (void)
473{
474  init_simd();
475
476  /* The code is optimised for these values only */
477  if (DCTSIZE != 8)
478    return 0;
479  if (sizeof(DCTELEM) != 2)
480    return 0;
481
482  if (simd_support & JSIMD_MIPS_DSPR2)
483    return 1;
484
485  return 0;
486}
487
488GLOBAL(int)
489jsimd_can_fdct_ifast (void)
490{
491  return 0;
492}
493
494GLOBAL(int)
495jsimd_can_fdct_float (void)
496{
497  return 0;
498}
499
500GLOBAL(void)
501jsimd_fdct_islow (DCTELEM * data)
502{
503  if (simd_support & JSIMD_MIPS_DSPR2)
504    jsimd_fdct_islow_mips_dspr2(data);
505}
506
507GLOBAL(void)
508jsimd_fdct_ifast (DCTELEM * data)
509{
510}
511
512GLOBAL(void)
513jsimd_fdct_float (FAST_FLOAT * data)
514{
515}
516
517GLOBAL(int)
518jsimd_can_quantize (void)
519{
520  init_simd();
521
522  /* The code is optimised for these values only */
523  if (DCTSIZE != 8)
524    return 0;
525  if (sizeof(JCOEF) != 2)
526    return 0;
527  if (sizeof(DCTELEM) != 2)
528    return 0;
529
530  if (simd_support & JSIMD_MIPS_DSPR2)
531    return 1;
532
533  return 0;
534}
535
536GLOBAL(int)
537jsimd_can_quantize_float (void)
538{
539  return 0;
540}
541
542GLOBAL(void)
543jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
544                DCTELEM * workspace)
545{
546  if (simd_support & JSIMD_MIPS_DSPR2)
547    jsimd_quantize_mips_dspr2(coef_block, divisors, workspace);
548}
549
550GLOBAL(void)
551jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
552                      FAST_FLOAT * workspace)
553{
554}
555
556GLOBAL(int)
557jsimd_can_idct_2x2 (void)
558{
559  init_simd();
560
561  /* The code is optimised for these values only */
562  if (DCTSIZE != 8)
563    return 0;
564  if (sizeof(JCOEF) != 2)
565    return 0;
566  if (BITS_IN_JSAMPLE != 8)
567    return 0;
568  if (sizeof(JDIMENSION) != 4)
569    return 0;
570  if (sizeof(ISLOW_MULT_TYPE) != 2)
571    return 0;
572
573  if ((simd_support & JSIMD_MIPS_DSPR2))
574    return 1;
575
576  return 0;
577}
578
579GLOBAL(int)
580jsimd_can_idct_4x4 (void)
581{
582  init_simd();
583
584  /* The code is optimised for these values only */
585  if (DCTSIZE != 8)
586    return 0;
587  if (sizeof(JCOEF) != 2)
588    return 0;
589  if (BITS_IN_JSAMPLE != 8)
590    return 0;
591  if (sizeof(JDIMENSION) != 4)
592    return 0;
593  if (sizeof(ISLOW_MULT_TYPE) != 2)
594    return 0;
595
596  if ((simd_support & JSIMD_MIPS_DSPR2))
597    return 1;
598
599  return 0;
600}
601
602GLOBAL(int)
603jsimd_can_idct_6x6 (void)
604{
605  init_simd();
606
607  /* The code is optimised for these values only */
608  if (DCTSIZE != 8)
609    return 0;
610  if (sizeof(JCOEF) != 2)
611    return 0;
612  if (BITS_IN_JSAMPLE != 8)
613    return 0;
614  if (sizeof(JDIMENSION) != 4)
615    return 0;
616  if (sizeof(ISLOW_MULT_TYPE) != 2)
617    return 0;
618
619  if ((simd_support & JSIMD_MIPS_DSPR2))
620    return 1;
621
622  return 0;
623}
624
625GLOBAL(int)
626jsimd_can_idct_12x12 (void)
627{
628  init_simd();
629
630  if (BITS_IN_JSAMPLE != 8)
631    return 0;
632  if (DCTSIZE != 8)
633    return 0;
634  if (sizeof(JCOEF) != 2)
635    return 0;
636  if (sizeof(JDIMENSION) != 4)
637    return 0;
638  if (sizeof(ISLOW_MULT_TYPE) != 2)
639    return 0;
640
641  if (simd_support & JSIMD_MIPS_DSPR2)
642    return 1;
643
644  return 0;
645}
646
647GLOBAL(void)
648jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
649                JCOEFPTR coef_block, JSAMPARRAY output_buf,
650                JDIMENSION output_col)
651{
652  if ((simd_support & JSIMD_MIPS_DSPR2))
653    jsimd_idct_2x2_mips_dspr2(compptr->dct_table, coef_block,
654                              output_buf, output_col);
655}
656
657GLOBAL(void)
658jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
659                JCOEFPTR coef_block, JSAMPARRAY output_buf,
660                JDIMENSION output_col)
661{
662  if ((simd_support & JSIMD_MIPS_DSPR2))
663  {
664    int workspace[DCTSIZE*4];  /* buffers data between passes */
665    jsimd_idct_4x4_mips_dspr2(compptr->dct_table, coef_block,
666                              output_buf, output_col, workspace);
667  }
668}
669
670GLOBAL(void)
671jsimd_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
672           JCOEFPTR coef_block, JSAMPARRAY output_buf,
673           JDIMENSION output_col)
674{
675    if ((simd_support & JSIMD_MIPS_DSPR2))
676      jsimd_idct_6x6_mips_dspr2(compptr->dct_table, coef_block,
677                                output_buf, output_col);
678}
679
680GLOBAL(void)
681jsimd_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
682                  JCOEFPTR coef_block,
683                  JSAMPARRAY output_buf, JDIMENSION output_col)
684{
685  if (simd_support & JSIMD_MIPS_DSPR2) {
686    int workspace[96];
687    int output[12] = {
688      (int)(output_buf[0] + output_col),
689      (int)(output_buf[1] + output_col),
690      (int)(output_buf[2] + output_col),
691      (int)(output_buf[3] + output_col),
692      (int)(output_buf[4] + output_col),
693      (int)(output_buf[5] + output_col),
694      (int)(output_buf[6] + output_col),
695      (int)(output_buf[7] + output_col),
696      (int)(output_buf[8] + output_col),
697      (int)(output_buf[9] + output_col),
698      (int)(output_buf[10] + output_col),
699      (int)(output_buf[11] + output_col),
700    };
701    jsimd_idct_12x12_pass1_mips_dspr2(coef_block,
702                                      compptr->dct_table, workspace);
703    jsimd_idct_12x12_pass2_mips_dspr2(workspace, output);
704  }
705}
706
707GLOBAL(int)
708jsimd_can_idct_islow (void)
709{
710  return 0;
711}
712
713GLOBAL(int)
714jsimd_can_idct_ifast (void)
715{
716  return 0;
717}
718
719GLOBAL(int)
720jsimd_can_idct_float (void)
721{
722  return 0;
723}
724
725GLOBAL(void)
726jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
727                JCOEFPTR coef_block, JSAMPARRAY output_buf,
728                JDIMENSION output_col)
729{
730}
731
732GLOBAL(void)
733jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
734                JCOEFPTR coef_block, JSAMPARRAY output_buf,
735                JDIMENSION output_col)
736{
737}
738
739GLOBAL(void)
740jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
741                JCOEFPTR coef_block, JSAMPARRAY output_buf,
742                JDIMENSION output_col)
743{
744}
745