jsimd_mips.c revision a6b7fbd3521a88305897cbea7db4d0eef9e0ec55
1/* 2 * jsimd_mips.c 3 * 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 5 * Copyright 2009-2011 D. R. Commander 6 * Copyright (C) 2013, MIPS Technologies, Inc., California 7 * 8 * Based on the x86 SIMD extension for IJG JPEG library, 9 * Copyright (C) 1999-2006, MIYASAKA Masaru. 10 * For conditions of distribution and use, see copyright notice in jsimdext.inc 11 * 12 * This file contains the interface between the "normal" portions 13 * of the library and the SIMD implementations when running on 14 * MIPS architecture. 15 * 16 * Based on the stubs from 'jsimd_none.c' 17 */ 18 19#define JPEG_INTERNALS 20#include "../jinclude.h" 21#include "../jpeglib.h" 22#include "../jsimd.h" 23#include "../jdct.h" 24#include "../jsimddct.h" 25#include "jsimd.h" 26 27#include <stdio.h> 28#include <string.h> 29#include <ctype.h> 30 31static unsigned int simd_support = ~0; 32 33#if defined(__linux__) 34 35LOCAL(int) 36parse_proc_cpuinfo(const char* search_string) 37{ 38 const char* file_name = "/proc/cpuinfo"; 39 char cpuinfo_line[256]; 40 FILE* f = NULL; 41 simd_support = 0; 42 43 if ((f = fopen(file_name, "r")) != NULL) { 44 while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f) != NULL) { 45 if (strstr(cpuinfo_line, search_string) != NULL) { 46 fclose(f); 47 simd_support |= JSIMD_MIPS_DSPR2; 48 return 1; 49 } 50 } 51 fclose(f); 52 } 53 /* Did not find string in the proc file, or not Linux ELF. */ 54 return 0; 55} 56#endif 57 58/* 59 * Check what SIMD accelerations are supported. 60 * 61 * FIXME: This code is racy under a multi-threaded environment. 62 */ 63LOCAL(void) 64init_simd (void) 65{ 66 if (simd_support != ~0U) 67 return; 68 69 simd_support = 0; 70 71#if defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) 72 simd_support |= JSIMD_MIPS_DSPR2; 73#elif defined(__linux__) 74 /* We still have a chance to use MIPS DSPR2 regardless of globally used 75 * -mdspr2 options passed to gcc by performing runtime detection via 76 * /proc/cpuinfo parsing on linux */ 77 if (!parse_proc_cpuinfo("MIPS 74K")) 78 return; 79#endif 80} 81 82GLOBAL(int) 83jsimd_can_rgb_ycc (void) 84{ 85 init_simd(); 86 87 /* The code is optimised for these values only */ 88 if (BITS_IN_JSAMPLE != 8) 89 return 0; 90 if (sizeof(JDIMENSION) != 4) 91 return 0; 92 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 93 return 0; 94 if (simd_support & JSIMD_MIPS_DSPR2) 95 return 1; 96 97 return 0; 98} 99 100GLOBAL(int) 101jsimd_can_rgb_gray (void) 102{ 103 init_simd(); 104 105 /* The code is optimised for these values only */ 106 if (BITS_IN_JSAMPLE != 8) 107 return 0; 108 if (sizeof(JDIMENSION) != 4) 109 return 0; 110 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 111 return 0; 112 if (simd_support & JSIMD_MIPS_DSPR2) 113 return 1; 114 115 return 0; 116} 117 118GLOBAL(int) 119jsimd_can_ycc_rgb (void) 120{ 121 init_simd(); 122 123 /* The code is optimised for these values only */ 124 if (BITS_IN_JSAMPLE != 8) 125 return 0; 126 if (sizeof(JDIMENSION) != 4) 127 return 0; 128 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 129 return 0; 130 if (simd_support & JSIMD_MIPS_DSPR2) 131 return 1; 132 133 return 0; 134} 135 136GLOBAL(void) 137jsimd_rgb_ycc_convert (j_compress_ptr cinfo, 138 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 139 JDIMENSION output_row, int num_rows) 140{ 141 void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 142 switch(cinfo->in_color_space) 143 { 144 case JCS_EXT_RGB: 145 mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2; 146 break; 147 case JCS_EXT_RGBX: 148 case JCS_EXT_RGBA: 149 mipsdspr2fct=jsimd_extrgbx_ycc_convert_mips_dspr2; 150 break; 151 case JCS_EXT_BGR: 152 mipsdspr2fct=jsimd_extbgr_ycc_convert_mips_dspr2; 153 break; 154 case JCS_EXT_BGRX: 155 case JCS_EXT_BGRA: 156 mipsdspr2fct=jsimd_extbgrx_ycc_convert_mips_dspr2; 157 break; 158 case JCS_EXT_XBGR: 159 case JCS_EXT_ABGR: 160 mipsdspr2fct=jsimd_extxbgr_ycc_convert_mips_dspr2; 161 162 break; 163 case JCS_EXT_XRGB: 164 case JCS_EXT_ARGB: 165 mipsdspr2fct=jsimd_extxrgb_ycc_convert_mips_dspr2; 166 break; 167 default: 168 mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2; 169 break; 170 } 171 172 if (simd_support & JSIMD_MIPS_DSPR2) 173 mipsdspr2fct(cinfo->image_width, input_buf, 174 output_buf, output_row, num_rows); 175} 176 177GLOBAL(void) 178jsimd_rgb_gray_convert (j_compress_ptr cinfo, 179 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 180 JDIMENSION output_row, int num_rows) 181{ 182 void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 183 switch(cinfo->in_color_space) 184 { 185 case JCS_EXT_RGB: 186 mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2; 187 break; 188 case JCS_EXT_RGBX: 189 case JCS_EXT_RGBA: 190 mipsdspr2fct=jsimd_extrgbx_gray_convert_mips_dspr2; 191 break; 192 case JCS_EXT_BGR: 193 mipsdspr2fct=jsimd_extbgr_gray_convert_mips_dspr2; 194 break; 195 case JCS_EXT_BGRX: 196 case JCS_EXT_BGRA: 197 mipsdspr2fct=jsimd_extbgrx_gray_convert_mips_dspr2; 198 break; 199 case JCS_EXT_XBGR: 200 case JCS_EXT_ABGR: 201 mipsdspr2fct=jsimd_extxbgr_gray_convert_mips_dspr2; 202 break; 203 case JCS_EXT_XRGB: 204 case JCS_EXT_ARGB: 205 mipsdspr2fct=jsimd_extxrgb_gray_convert_mips_dspr2; 206 break; 207 default: 208 mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2; 209 break; 210 } 211 212 if (simd_support & JSIMD_MIPS_DSPR2) 213 mipsdspr2fct(cinfo->image_width, input_buf, 214 output_buf, output_row, num_rows); 215 216} 217 218GLOBAL(void) 219jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, 220 JSAMPIMAGE input_buf, JDIMENSION input_row, 221 JSAMPARRAY output_buf, int num_rows) 222{ 223 void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 224 225 switch(cinfo->out_color_space) 226 { 227 case JCS_EXT_RGB: 228 mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2; 229 break; 230 case JCS_EXT_RGBX: 231 case JCS_EXT_RGBA: 232 mipsdspr2fct=jsimd_ycc_extrgbx_convert_mips_dspr2; 233 break; 234 case JCS_EXT_BGR: 235 mipsdspr2fct=jsimd_ycc_extbgr_convert_mips_dspr2; 236 break; 237 case JCS_EXT_BGRX: 238 case JCS_EXT_BGRA: 239 mipsdspr2fct=jsimd_ycc_extbgrx_convert_mips_dspr2; 240 break; 241 case JCS_EXT_XBGR: 242 case JCS_EXT_ABGR: 243 mipsdspr2fct=jsimd_ycc_extxbgr_convert_mips_dspr2; 244 break; 245 case JCS_EXT_XRGB: 246 case JCS_EXT_ARGB: 247 mipsdspr2fct=jsimd_ycc_extxrgb_convert_mips_dspr2; 248 break; 249 default: 250 mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2; 251 break; 252 } 253 254 if (simd_support & JSIMD_MIPS_DSPR2) 255 mipsdspr2fct(cinfo->output_width, input_buf, 256 input_row, output_buf, num_rows); 257} 258 259GLOBAL(int) 260jsimd_can_h2v2_downsample (void) 261{ 262 init_simd(); 263 264 /* The code is optimised for these values only */ 265 if (BITS_IN_JSAMPLE != 8) 266 return 0; 267 if (sizeof(JDIMENSION) != 4) 268 return 0; 269 if (simd_support & JSIMD_MIPS_DSPR2) 270 return 1; 271 272 return 0; 273} 274 275GLOBAL(int) 276jsimd_can_h2v1_downsample (void) 277{ 278 init_simd(); 279 280 /* The code is optimised for these values only */ 281 if (BITS_IN_JSAMPLE != 8) 282 return 0; 283 if (sizeof(JDIMENSION) != 4) 284 return 0; 285 if (simd_support & JSIMD_MIPS_DSPR2) 286 return 1; 287 288 return 0; 289} 290 291GLOBAL(void) 292jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, 293 JSAMPARRAY input_data, JSAMPARRAY output_data) 294{ 295 if (simd_support & JSIMD_MIPS_DSPR2) 296 jsimd_h2v2_downsample_mips_dspr2(cinfo->image_width, 297 cinfo->max_v_samp_factor, compptr->v_samp_factor, 298 compptr->width_in_blocks, input_data, output_data); 299} 300 301GLOBAL(void) 302jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, 303 JSAMPARRAY input_data, JSAMPARRAY output_data) 304{ 305 if (simd_support & JSIMD_MIPS_DSPR2) 306 jsimd_h2v1_downsample_mips_dspr2(cinfo->image_width, 307 cinfo->max_v_samp_factor, compptr->v_samp_factor, 308 compptr->width_in_blocks, input_data, output_data); 309} 310 311GLOBAL(int) 312jsimd_can_h2v2_upsample (void) 313{ 314 init_simd(); 315 316 /* The code is optimised for these values only */ 317 if (BITS_IN_JSAMPLE != 8) 318 return 0; 319 if (sizeof(JDIMENSION) != 4) 320 return 0; 321 if (simd_support & JSIMD_MIPS_DSPR2) 322 return 1; 323 324 return 0; 325} 326 327GLOBAL(int) 328jsimd_can_h2v1_upsample (void) 329{ 330 init_simd(); 331 332 /* The code is optimised for these values only */ 333 if (BITS_IN_JSAMPLE != 8) 334 return 0; 335 if (sizeof(JDIMENSION) != 4) 336 return 0; 337 if (simd_support & JSIMD_MIPS_DSPR2) 338 return 1; 339 340 return 0; 341} 342 343GLOBAL(void) 344jsimd_h2v2_upsample (j_decompress_ptr cinfo, 345 jpeg_component_info * compptr, 346 JSAMPARRAY input_data, 347 JSAMPARRAY * output_data_ptr) 348{ 349 if (simd_support & JSIMD_MIPS_DSPR2) 350 jsimd_h2v2_upsample_mips_dspr2(cinfo->max_v_samp_factor, 351 cinfo->output_width, input_data, output_data_ptr); 352} 353 354GLOBAL(void) 355jsimd_h2v1_upsample (j_decompress_ptr cinfo, 356 jpeg_component_info * compptr, 357 JSAMPARRAY input_data, 358 JSAMPARRAY * output_data_ptr) 359{ 360 if (simd_support & JSIMD_MIPS_DSPR2) 361 jsimd_h2v1_upsample_mips_dspr2(cinfo->max_v_samp_factor, 362 cinfo->output_width, input_data, output_data_ptr); 363} 364 365GLOBAL(int) 366jsimd_can_h2v2_fancy_upsample (void) 367{ 368 init_simd(); 369 370 /* The code is optimised for these values only */ 371 if (BITS_IN_JSAMPLE != 8) 372 return 0; 373 if (sizeof(JDIMENSION) != 4) 374 return 0; 375 if (simd_support & JSIMD_MIPS_DSPR2) 376 return 1; 377 378 return 0; 379} 380 381GLOBAL(int) 382jsimd_can_h2v1_fancy_upsample (void) 383{ 384 init_simd(); 385 386 /* The code is optimised for these values only */ 387 if (BITS_IN_JSAMPLE != 8) 388 return 0; 389 if (sizeof(JDIMENSION) != 4) 390 return 0; 391 if (simd_support & JSIMD_MIPS_DSPR2) 392 return 1; 393 394 return 0; 395} 396 397GLOBAL(void) 398jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, 399 jpeg_component_info * compptr, 400 JSAMPARRAY input_data, 401 JSAMPARRAY * output_data_ptr) 402{ 403 if (simd_support & JSIMD_MIPS_DSPR2) 404 jsimd_h2v2_fancy_upsample_mips_dspr2(cinfo->max_v_samp_factor, 405 compptr->downsampled_width, input_data, output_data_ptr); 406} 407 408GLOBAL(void) 409jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, 410 jpeg_component_info * compptr, 411 JSAMPARRAY input_data, 412 JSAMPARRAY * output_data_ptr) 413{ 414 if (simd_support & JSIMD_MIPS_DSPR2) 415 jsimd_h2v1_fancy_upsample_mips_dspr2(cinfo->max_v_samp_factor, 416 compptr->downsampled_width, input_data, output_data_ptr); 417} 418 419GLOBAL(int) 420jsimd_can_h2v2_merged_upsample (void) 421{ 422 return 0; 423} 424 425GLOBAL(int) 426jsimd_can_h2v1_merged_upsample (void) 427{ 428 return 0; 429} 430 431GLOBAL(void) 432jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, 433 JSAMPIMAGE input_buf, 434 JDIMENSION in_row_group_ctr, 435 JSAMPARRAY output_buf) 436{ 437} 438 439GLOBAL(void) 440jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, 441 JSAMPIMAGE input_buf, 442 JDIMENSION in_row_group_ctr, 443 JSAMPARRAY output_buf) 444{ 445} 446 447GLOBAL(int) 448jsimd_can_convsamp (void) 449{ 450 return 0; 451} 452 453GLOBAL(int) 454jsimd_can_convsamp_float (void) 455{ 456 return 0; 457} 458 459GLOBAL(void) 460jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, 461 DCTELEM * workspace) 462{ 463} 464 465GLOBAL(void) 466jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, 467 FAST_FLOAT * workspace) 468{ 469} 470 471GLOBAL(int) 472jsimd_can_fdct_islow (void) 473{ 474 init_simd(); 475 476 /* The code is optimised for these values only */ 477 if (DCTSIZE != 8) 478 return 0; 479 if (sizeof(DCTELEM) != 2) 480 return 0; 481 482 if (simd_support & JSIMD_MIPS_DSPR2) 483 return 1; 484 485 return 0; 486} 487 488GLOBAL(int) 489jsimd_can_fdct_ifast (void) 490{ 491 return 0; 492} 493 494GLOBAL(int) 495jsimd_can_fdct_float (void) 496{ 497 return 0; 498} 499 500GLOBAL(void) 501jsimd_fdct_islow (DCTELEM * data) 502{ 503 if (simd_support & JSIMD_MIPS_DSPR2) 504 jsimd_fdct_islow_mips_dspr2(data); 505} 506 507GLOBAL(void) 508jsimd_fdct_ifast (DCTELEM * data) 509{ 510} 511 512GLOBAL(void) 513jsimd_fdct_float (FAST_FLOAT * data) 514{ 515} 516 517GLOBAL(int) 518jsimd_can_quantize (void) 519{ 520 init_simd(); 521 522 /* The code is optimised for these values only */ 523 if (DCTSIZE != 8) 524 return 0; 525 if (sizeof(JCOEF) != 2) 526 return 0; 527 if (sizeof(DCTELEM) != 2) 528 return 0; 529 530 if (simd_support & JSIMD_MIPS_DSPR2) 531 return 1; 532 533 return 0; 534} 535 536GLOBAL(int) 537jsimd_can_quantize_float (void) 538{ 539 return 0; 540} 541 542GLOBAL(void) 543jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, 544 DCTELEM * workspace) 545{ 546 if (simd_support & JSIMD_MIPS_DSPR2) 547 jsimd_quantize_mips_dspr2(coef_block, divisors, workspace); 548} 549 550GLOBAL(void) 551jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, 552 FAST_FLOAT * workspace) 553{ 554} 555 556GLOBAL(int) 557jsimd_can_idct_2x2 (void) 558{ 559 init_simd(); 560 561 /* The code is optimised for these values only */ 562 if (DCTSIZE != 8) 563 return 0; 564 if (sizeof(JCOEF) != 2) 565 return 0; 566 if (BITS_IN_JSAMPLE != 8) 567 return 0; 568 if (sizeof(JDIMENSION) != 4) 569 return 0; 570 if (sizeof(ISLOW_MULT_TYPE) != 2) 571 return 0; 572 573 if ((simd_support & JSIMD_MIPS_DSPR2)) 574 return 1; 575 576 return 0; 577} 578 579GLOBAL(int) 580jsimd_can_idct_4x4 (void) 581{ 582 init_simd(); 583 584 /* The code is optimised for these values only */ 585 if (DCTSIZE != 8) 586 return 0; 587 if (sizeof(JCOEF) != 2) 588 return 0; 589 if (BITS_IN_JSAMPLE != 8) 590 return 0; 591 if (sizeof(JDIMENSION) != 4) 592 return 0; 593 if (sizeof(ISLOW_MULT_TYPE) != 2) 594 return 0; 595 596 if ((simd_support & JSIMD_MIPS_DSPR2)) 597 return 1; 598 599 return 0; 600} 601 602GLOBAL(int) 603jsimd_can_idct_6x6 (void) 604{ 605 init_simd(); 606 607 /* The code is optimised for these values only */ 608 if (DCTSIZE != 8) 609 return 0; 610 if (sizeof(JCOEF) != 2) 611 return 0; 612 if (BITS_IN_JSAMPLE != 8) 613 return 0; 614 if (sizeof(JDIMENSION) != 4) 615 return 0; 616 if (sizeof(ISLOW_MULT_TYPE) != 2) 617 return 0; 618 619 if ((simd_support & JSIMD_MIPS_DSPR2)) 620 return 1; 621 622 return 0; 623} 624 625GLOBAL(int) 626jsimd_can_idct_12x12 (void) 627{ 628 init_simd(); 629 630 if (BITS_IN_JSAMPLE != 8) 631 return 0; 632 if (DCTSIZE != 8) 633 return 0; 634 if (sizeof(JCOEF) != 2) 635 return 0; 636 if (sizeof(JDIMENSION) != 4) 637 return 0; 638 if (sizeof(ISLOW_MULT_TYPE) != 2) 639 return 0; 640 641 if (simd_support & JSIMD_MIPS_DSPR2) 642 return 1; 643 644 return 0; 645} 646 647GLOBAL(void) 648jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 649 JCOEFPTR coef_block, JSAMPARRAY output_buf, 650 JDIMENSION output_col) 651{ 652 if ((simd_support & JSIMD_MIPS_DSPR2)) 653 jsimd_idct_2x2_mips_dspr2(compptr->dct_table, coef_block, 654 output_buf, output_col); 655} 656 657GLOBAL(void) 658jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 659 JCOEFPTR coef_block, JSAMPARRAY output_buf, 660 JDIMENSION output_col) 661{ 662 if ((simd_support & JSIMD_MIPS_DSPR2)) 663 { 664 int workspace[DCTSIZE*4]; /* buffers data between passes */ 665 jsimd_idct_4x4_mips_dspr2(compptr->dct_table, coef_block, 666 output_buf, output_col, workspace); 667 } 668} 669 670GLOBAL(void) 671jsimd_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 672 JCOEFPTR coef_block, JSAMPARRAY output_buf, 673 JDIMENSION output_col) 674{ 675 if ((simd_support & JSIMD_MIPS_DSPR2)) 676 jsimd_idct_6x6_mips_dspr2(compptr->dct_table, coef_block, 677 output_buf, output_col); 678} 679 680GLOBAL(void) 681jsimd_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 682 JCOEFPTR coef_block, 683 JSAMPARRAY output_buf, JDIMENSION output_col) 684{ 685 if (simd_support & JSIMD_MIPS_DSPR2) { 686 int workspace[96]; 687 int output[12] = { 688 (int)(output_buf[0] + output_col), 689 (int)(output_buf[1] + output_col), 690 (int)(output_buf[2] + output_col), 691 (int)(output_buf[3] + output_col), 692 (int)(output_buf[4] + output_col), 693 (int)(output_buf[5] + output_col), 694 (int)(output_buf[6] + output_col), 695 (int)(output_buf[7] + output_col), 696 (int)(output_buf[8] + output_col), 697 (int)(output_buf[9] + output_col), 698 (int)(output_buf[10] + output_col), 699 (int)(output_buf[11] + output_col), 700 }; 701 jsimd_idct_12x12_pass1_mips_dspr2(coef_block, 702 compptr->dct_table, workspace); 703 jsimd_idct_12x12_pass2_mips_dspr2(workspace, output); 704 } 705} 706 707GLOBAL(int) 708jsimd_can_idct_islow (void) 709{ 710 return 0; 711} 712 713GLOBAL(int) 714jsimd_can_idct_ifast (void) 715{ 716 return 0; 717} 718 719GLOBAL(int) 720jsimd_can_idct_float (void) 721{ 722 return 0; 723} 724 725GLOBAL(void) 726jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, 727 JCOEFPTR coef_block, JSAMPARRAY output_buf, 728 JDIMENSION output_col) 729{ 730} 731 732GLOBAL(void) 733jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, 734 JCOEFPTR coef_block, JSAMPARRAY output_buf, 735 JDIMENSION output_col) 736{ 737} 738 739GLOBAL(void) 740jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, 741 JCOEFPTR coef_block, JSAMPARRAY output_buf, 742 JDIMENSION output_col) 743{ 744} 745