1/*
2 * jsimd_arm64.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009-2011, 2013-2014 D. R. Commander
6 *
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc
10 *
11 * This file contains the interface between the "normal" portions
12 * of the library and the SIMD implementations when running on a
13 * 64-bit ARM architecture.
14 */
15
16#define JPEG_INTERNALS
17#include "../jinclude.h"
18#include "../jpeglib.h"
19#include "../jsimd.h"
20#include "../jdct.h"
21#include "../jsimddct.h"
22#include "jsimd.h"
23
24#include <stdio.h>
25#include <string.h>
26#include <ctype.h>
27
28static unsigned int simd_support = ~0;
29
30/*
31 * Check what SIMD accelerations are supported.
32 *
33 * FIXME: This code is racy under a multi-threaded environment.
34 */
35
36/*
37 * ARMv8 architectures support NEON extensions by default.
38 * It is no longer optional as it was with ARMv7.
39 */
40
41
42LOCAL(void)
43init_simd (void)
44{
45  char *env = NULL;
46
47  if (simd_support != ~0U)
48    return;
49
50  simd_support = 0;
51
52  simd_support |= JSIMD_ARM_NEON;
53
54  /* Force different settings through environment variables */
55  env = getenv("JSIMD_FORCENEON");
56  if ((env != NULL) && (strcmp(env, "1") == 0))
57    simd_support &= JSIMD_ARM_NEON;
58  env = getenv("JSIMD_FORCENONE");
59  if ((env != NULL) && (strcmp(env, "1") == 0))
60    simd_support = 0;
61}
62
63GLOBAL(int)
64jsimd_can_rgb_ycc (void)
65{
66  init_simd();
67
68  return 0;
69}
70
71GLOBAL(int)
72jsimd_can_rgb_gray (void)
73{
74  init_simd();
75
76  return 0;
77}
78
79GLOBAL(int)
80jsimd_can_ycc_rgb (void)
81{
82  init_simd();
83
84  /* The code is optimised for these values only */
85  if (BITS_IN_JSAMPLE != 8)
86    return 0;
87  if (sizeof(JDIMENSION) != 4)
88    return 0;
89  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
90    return 0;
91
92  if (simd_support & JSIMD_ARM_NEON)
93    return 1;
94
95  return 0;
96}
97
98GLOBAL(int)
99jsimd_can_ycc_rgb565 (void)
100{
101  init_simd();
102
103  /* The code is optimised for these values only */
104  if (BITS_IN_JSAMPLE != 8)
105    return 0;
106  if (sizeof(JDIMENSION) != 4)
107    return 0;
108
109  if (simd_support & JSIMD_ARM_NEON)
110    return 1;
111
112  return 0;
113}
114
115GLOBAL(void)
116jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
117                       JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
118                       JDIMENSION output_row, int num_rows)
119{
120}
121
122GLOBAL(void)
123jsimd_rgb_gray_convert (j_compress_ptr cinfo,
124                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
125                        JDIMENSION output_row, int num_rows)
126{
127}
128
129GLOBAL(void)
130jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
131                       JSAMPIMAGE input_buf, JDIMENSION input_row,
132                       JSAMPARRAY output_buf, int num_rows)
133{
134  void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
135
136  switch(cinfo->out_color_space) {
137    case JCS_EXT_RGB:
138      neonfct=jsimd_ycc_extrgb_convert_neon;
139      break;
140    case JCS_EXT_RGBX:
141    case JCS_EXT_RGBA:
142      neonfct=jsimd_ycc_extrgbx_convert_neon;
143      break;
144    case JCS_EXT_BGR:
145      neonfct=jsimd_ycc_extbgr_convert_neon;
146      break;
147    case JCS_EXT_BGRX:
148    case JCS_EXT_BGRA:
149      neonfct=jsimd_ycc_extbgrx_convert_neon;
150      break;
151    case JCS_EXT_XBGR:
152    case JCS_EXT_ABGR:
153      neonfct=jsimd_ycc_extxbgr_convert_neon;
154      break;
155    case JCS_EXT_XRGB:
156    case JCS_EXT_ARGB:
157      neonfct=jsimd_ycc_extxrgb_convert_neon;
158      break;
159    default:
160      neonfct=jsimd_ycc_extrgb_convert_neon;
161      break;
162  }
163
164  if (simd_support & JSIMD_ARM_NEON)
165    neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
166}
167
168GLOBAL(void)
169jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
170                          JSAMPIMAGE input_buf, JDIMENSION input_row,
171                          JSAMPARRAY output_buf, int num_rows)
172{
173  if (simd_support & JSIMD_ARM_NEON)
174    jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
175                                  output_buf, num_rows);
176}
177
178GLOBAL(int)
179jsimd_can_h2v2_downsample (void)
180{
181  init_simd();
182
183  return 0;
184}
185
186GLOBAL(int)
187jsimd_can_h2v1_downsample (void)
188{
189  init_simd();
190
191  return 0;
192}
193
194GLOBAL(void)
195jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
196                       JSAMPARRAY input_data, JSAMPARRAY output_data)
197{
198}
199
200GLOBAL(void)
201jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
202                       JSAMPARRAY input_data, JSAMPARRAY output_data)
203{
204}
205
206GLOBAL(int)
207jsimd_can_h2v2_upsample (void)
208{
209  init_simd();
210
211  return 0;
212}
213
214GLOBAL(int)
215jsimd_can_h2v1_upsample (void)
216{
217  init_simd();
218
219  return 0;
220}
221
222GLOBAL(void)
223jsimd_h2v2_upsample (j_decompress_ptr cinfo,
224                     jpeg_component_info * compptr,
225                     JSAMPARRAY input_data,
226                     JSAMPARRAY * output_data_ptr)
227{
228}
229
230GLOBAL(void)
231jsimd_h2v1_upsample (j_decompress_ptr cinfo,
232                     jpeg_component_info * compptr,
233                     JSAMPARRAY input_data,
234                     JSAMPARRAY * output_data_ptr)
235{
236}
237
238GLOBAL(int)
239jsimd_can_h2v2_fancy_upsample (void)
240{
241  init_simd();
242
243  return 0;
244}
245
246GLOBAL(int)
247jsimd_can_h2v1_fancy_upsample (void)
248{
249  init_simd();
250
251  return 0;
252}
253
254GLOBAL(void)
255jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
256                           jpeg_component_info * compptr,
257                           JSAMPARRAY input_data,
258                           JSAMPARRAY * output_data_ptr)
259{
260}
261
262GLOBAL(void)
263jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
264                           jpeg_component_info * compptr,
265                           JSAMPARRAY input_data,
266                           JSAMPARRAY * output_data_ptr)
267{
268}
269
270GLOBAL(int)
271jsimd_can_h2v2_merged_upsample (void)
272{
273  init_simd();
274
275  return 0;
276}
277
278GLOBAL(int)
279jsimd_can_h2v1_merged_upsample (void)
280{
281  init_simd();
282
283  return 0;
284}
285
286GLOBAL(void)
287jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
288                            JSAMPIMAGE input_buf,
289                            JDIMENSION in_row_group_ctr,
290                            JSAMPARRAY output_buf)
291{
292}
293
294GLOBAL(void)
295jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
296                            JSAMPIMAGE input_buf,
297                            JDIMENSION in_row_group_ctr,
298                            JSAMPARRAY output_buf)
299{
300}
301
302GLOBAL(int)
303jsimd_can_convsamp (void)
304{
305  init_simd();
306
307  return 0;
308}
309
310GLOBAL(int)
311jsimd_can_convsamp_float (void)
312{
313  init_simd();
314
315  return 0;
316}
317
318GLOBAL(void)
319jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
320                DCTELEM * workspace)
321{
322}
323
324GLOBAL(void)
325jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
326                      FAST_FLOAT * workspace)
327{
328}
329
330GLOBAL(int)
331jsimd_can_fdct_islow (void)
332{
333  init_simd();
334
335  return 0;
336}
337
338GLOBAL(int)
339jsimd_can_fdct_ifast (void)
340{
341  init_simd();
342
343  return 0;
344}
345
346GLOBAL(int)
347jsimd_can_fdct_float (void)
348{
349  init_simd();
350
351  return 0;
352}
353
354GLOBAL(void)
355jsimd_fdct_islow (DCTELEM * data)
356{
357}
358
359GLOBAL(void)
360jsimd_fdct_ifast (DCTELEM * data)
361{
362}
363
364GLOBAL(void)
365jsimd_fdct_float (FAST_FLOAT * data)
366{
367}
368
369GLOBAL(int)
370jsimd_can_quantize (void)
371{
372  init_simd();
373
374  return 0;
375}
376
377GLOBAL(int)
378jsimd_can_quantize_float (void)
379{
380  init_simd();
381
382  return 0;
383}
384
385GLOBAL(void)
386jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
387                DCTELEM * workspace)
388{
389}
390
391GLOBAL(void)
392jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
393                      FAST_FLOAT * workspace)
394{
395}
396
397GLOBAL(int)
398jsimd_can_idct_2x2 (void)
399{
400  init_simd();
401
402  /* The code is optimised for these values only */
403  if (DCTSIZE != 8)
404    return 0;
405  if (sizeof(JCOEF) != 2)
406    return 0;
407  if (BITS_IN_JSAMPLE != 8)
408    return 0;
409  if (sizeof(JDIMENSION) != 4)
410    return 0;
411  if (sizeof(ISLOW_MULT_TYPE) != 2)
412    return 0;
413
414  if (simd_support & JSIMD_ARM_NEON)
415    return 1;
416
417  return 0;
418}
419
420GLOBAL(int)
421jsimd_can_idct_4x4 (void)
422{
423  init_simd();
424
425  /* The code is optimised for these values only */
426  if (DCTSIZE != 8)
427    return 0;
428  if (sizeof(JCOEF) != 2)
429    return 0;
430  if (BITS_IN_JSAMPLE != 8)
431    return 0;
432  if (sizeof(JDIMENSION) != 4)
433    return 0;
434  if (sizeof(ISLOW_MULT_TYPE) != 2)
435    return 0;
436
437  if (simd_support & JSIMD_ARM_NEON)
438    return 1;
439
440  return 0;
441}
442
443GLOBAL(void)
444jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
445                JCOEFPTR coef_block, JSAMPARRAY output_buf,
446                JDIMENSION output_col)
447{
448  if (simd_support & JSIMD_ARM_NEON)
449    jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf,
450                        output_col);
451}
452
453GLOBAL(void)
454jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
455                JCOEFPTR coef_block, JSAMPARRAY output_buf,
456                JDIMENSION output_col)
457{
458  if (simd_support & JSIMD_ARM_NEON)
459    jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf,
460                        output_col);
461}
462
463GLOBAL(int)
464jsimd_can_idct_islow (void)
465{
466  init_simd();
467
468  /* The code is optimised for these values only */
469  if (DCTSIZE != 8)
470    return 0;
471  if (sizeof(JCOEF) != 2)
472    return 0;
473  if (BITS_IN_JSAMPLE != 8)
474    return 0;
475  if (sizeof(JDIMENSION) != 4)
476    return 0;
477  if (sizeof(ISLOW_MULT_TYPE) != 2)
478    return 0;
479
480  if (simd_support & JSIMD_ARM_NEON)
481    return 1;
482
483  return 0;
484}
485
486GLOBAL(int)
487jsimd_can_idct_ifast (void)
488{
489  init_simd();
490
491  /* The code is optimised for these values only */
492  if (DCTSIZE != 8)
493    return 0;
494  if (sizeof(JCOEF) != 2)
495    return 0;
496  if (BITS_IN_JSAMPLE != 8)
497    return 0;
498  if (sizeof(JDIMENSION) != 4)
499    return 0;
500  if (sizeof(IFAST_MULT_TYPE) != 2)
501    return 0;
502  if (IFAST_SCALE_BITS != 2)
503    return 0;
504
505  if (simd_support & JSIMD_ARM_NEON)
506    return 1;
507
508  return 0;
509}
510
511GLOBAL(int)
512jsimd_can_idct_float (void)
513{
514  init_simd();
515
516  return 0;
517}
518
519GLOBAL(void)
520jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
521                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
522                  JDIMENSION output_col)
523{
524  if (simd_support & JSIMD_ARM_NEON)
525    jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
526                          output_col);
527}
528
529GLOBAL(void)
530jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
531                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
532                  JDIMENSION output_col)
533{
534  if (simd_support & JSIMD_ARM_NEON)
535    jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
536                          output_col);
537}
538
539GLOBAL(void)
540jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
541                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
542                  JDIMENSION output_col)
543{
544}
545