1/* 2 * jsimd_i386.c 3 * 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 5 * Copyright 2009-2011, 2013-2014 D. R. Commander 6 * 7 * Based on the x86 SIMD extension for IJG JPEG library, 8 * Copyright (C) 1999-2006, MIYASAKA Masaru. 9 * For conditions of distribution and use, see copyright notice in jsimdext.inc 10 * 11 * This file contains the interface between the "normal" portions 12 * of the library and the SIMD implementations when running on a 13 * 32-bit x86 architecture. 14 */ 15 16#define JPEG_INTERNALS 17#include "../jinclude.h" 18#include "../jpeglib.h" 19#include "../jsimd.h" 20#include "../jdct.h" 21#include "../jsimddct.h" 22#include "jsimd.h" 23 24/* 25 * In the PIC cases, we have no guarantee that constants will keep 26 * their alignment. This macro allows us to verify it at runtime. 27 */ 28#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0) 29 30#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ 31 32static unsigned int simd_support = ~0; 33 34/* 35 * Check what SIMD accelerations are supported. 36 * 37 * FIXME: This code is racy under a multi-threaded environment. 38 */ 39LOCAL(void) 40init_simd (void) 41{ 42 char *env = NULL; 43 44 if (simd_support != ~0U) 45 return; 46 47 simd_support = jpeg_simd_cpu_support(); 48 49 /* Force different settings through environment variables */ 50 env = getenv("JSIMD_FORCEMMX"); 51 if ((env != NULL) && (strcmp(env, "1") == 0)) 52 simd_support &= JSIMD_MMX; 53 env = getenv("JSIMD_FORCE3DNOW"); 54 if ((env != NULL) && (strcmp(env, "1") == 0)) 55 simd_support &= JSIMD_3DNOW|JSIMD_MMX; 56 env = getenv("JSIMD_FORCESSE"); 57 if ((env != NULL) && (strcmp(env, "1") == 0)) 58 simd_support &= JSIMD_SSE|JSIMD_MMX; 59 env = getenv("JSIMD_FORCESSE2"); 60 if ((env != NULL) && (strcmp(env, "1") == 0)) 61 simd_support &= JSIMD_SSE2; 62 env = getenv("JSIMD_FORCENONE"); 63 if ((env != NULL) && (strcmp(env, "1") == 0)) 64 simd_support = 0; 65} 66 67GLOBAL(int) 68jsimd_can_rgb_ycc (void) 69{ 70 init_simd(); 71 72 /* The code is optimised for these values only */ 73 if (BITS_IN_JSAMPLE != 8) 74 return 0; 75 if (sizeof(JDIMENSION) != 4) 76 return 0; 77 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 78 return 0; 79 80 if ((simd_support & JSIMD_SSE2) && 81 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) 82 return 1; 83 if (simd_support & JSIMD_MMX) 84 return 1; 85 86 return 0; 87} 88 89GLOBAL(int) 90jsimd_can_rgb_gray (void) 91{ 92 init_simd(); 93 94 /* The code is optimised for these values only */ 95 if (BITS_IN_JSAMPLE != 8) 96 return 0; 97 if (sizeof(JDIMENSION) != 4) 98 return 0; 99 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 100 return 0; 101 102 if ((simd_support & JSIMD_SSE2) && 103 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) 104 return 1; 105 if (simd_support & JSIMD_MMX) 106 return 1; 107 108 return 0; 109} 110 111GLOBAL(int) 112jsimd_can_ycc_rgb (void) 113{ 114 init_simd(); 115 116 /* The code is optimised for these values only */ 117 if (BITS_IN_JSAMPLE != 8) 118 return 0; 119 if (sizeof(JDIMENSION) != 4) 120 return 0; 121 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 122 return 0; 123 124 if ((simd_support & JSIMD_SSE2) && 125 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) 126 return 1; 127 if (simd_support & JSIMD_MMX) 128 return 1; 129 130 return 0; 131} 132 133GLOBAL(int) 134jsimd_can_ycc_rgb565 (void) 135{ 136 return 0; 137} 138 139GLOBAL(void) 140jsimd_rgb_ycc_convert (j_compress_ptr cinfo, 141 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 142 JDIMENSION output_row, int num_rows) 143{ 144 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 145 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 146 147 switch(cinfo->in_color_space) { 148 case JCS_EXT_RGB: 149 sse2fct=jsimd_extrgb_ycc_convert_sse2; 150 mmxfct=jsimd_extrgb_ycc_convert_mmx; 151 break; 152 case JCS_EXT_RGBX: 153 case JCS_EXT_RGBA: 154 sse2fct=jsimd_extrgbx_ycc_convert_sse2; 155 mmxfct=jsimd_extrgbx_ycc_convert_mmx; 156 break; 157 case JCS_EXT_BGR: 158 sse2fct=jsimd_extbgr_ycc_convert_sse2; 159 mmxfct=jsimd_extbgr_ycc_convert_mmx; 160 break; 161 case JCS_EXT_BGRX: 162 case JCS_EXT_BGRA: 163 sse2fct=jsimd_extbgrx_ycc_convert_sse2; 164 mmxfct=jsimd_extbgrx_ycc_convert_mmx; 165 break; 166 case JCS_EXT_XBGR: 167 case JCS_EXT_ABGR: 168 sse2fct=jsimd_extxbgr_ycc_convert_sse2; 169 mmxfct=jsimd_extxbgr_ycc_convert_mmx; 170 break; 171 case JCS_EXT_XRGB: 172 case JCS_EXT_ARGB: 173 sse2fct=jsimd_extxrgb_ycc_convert_sse2; 174 mmxfct=jsimd_extxrgb_ycc_convert_mmx; 175 break; 176 default: 177 sse2fct=jsimd_rgb_ycc_convert_sse2; 178 mmxfct=jsimd_rgb_ycc_convert_mmx; 179 break; 180 } 181 182 if ((simd_support & JSIMD_SSE2) && 183 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) 184 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 185 else if (simd_support & JSIMD_MMX) 186 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 187} 188 189GLOBAL(void) 190jsimd_rgb_gray_convert (j_compress_ptr cinfo, 191 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 192 JDIMENSION output_row, int num_rows) 193{ 194 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 195 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 196 197 switch(cinfo->in_color_space) { 198 case JCS_EXT_RGB: 199 sse2fct=jsimd_extrgb_gray_convert_sse2; 200 mmxfct=jsimd_extrgb_gray_convert_mmx; 201 break; 202 case JCS_EXT_RGBX: 203 case JCS_EXT_RGBA: 204 sse2fct=jsimd_extrgbx_gray_convert_sse2; 205 mmxfct=jsimd_extrgbx_gray_convert_mmx; 206 break; 207 case JCS_EXT_BGR: 208 sse2fct=jsimd_extbgr_gray_convert_sse2; 209 mmxfct=jsimd_extbgr_gray_convert_mmx; 210 break; 211 case JCS_EXT_BGRX: 212 case JCS_EXT_BGRA: 213 sse2fct=jsimd_extbgrx_gray_convert_sse2; 214 mmxfct=jsimd_extbgrx_gray_convert_mmx; 215 break; 216 case JCS_EXT_XBGR: 217 case JCS_EXT_ABGR: 218 sse2fct=jsimd_extxbgr_gray_convert_sse2; 219 mmxfct=jsimd_extxbgr_gray_convert_mmx; 220 break; 221 case JCS_EXT_XRGB: 222 case JCS_EXT_ARGB: 223 sse2fct=jsimd_extxrgb_gray_convert_sse2; 224 mmxfct=jsimd_extxrgb_gray_convert_mmx; 225 break; 226 default: 227 sse2fct=jsimd_rgb_gray_convert_sse2; 228 mmxfct=jsimd_rgb_gray_convert_mmx; 229 break; 230 } 231 232 if ((simd_support & JSIMD_SSE2) && 233 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) 234 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 235 else if (simd_support & JSIMD_MMX) 236 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 237} 238 239GLOBAL(void) 240jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, 241 JSAMPIMAGE input_buf, JDIMENSION input_row, 242 JSAMPARRAY output_buf, int num_rows) 243{ 244 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 245 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 246 247 switch(cinfo->out_color_space) { 248 case JCS_EXT_RGB: 249 sse2fct=jsimd_ycc_extrgb_convert_sse2; 250 mmxfct=jsimd_ycc_extrgb_convert_mmx; 251 break; 252 case JCS_EXT_RGBX: 253 case JCS_EXT_RGBA: 254 sse2fct=jsimd_ycc_extrgbx_convert_sse2; 255 mmxfct=jsimd_ycc_extrgbx_convert_mmx; 256 break; 257 case JCS_EXT_BGR: 258 sse2fct=jsimd_ycc_extbgr_convert_sse2; 259 mmxfct=jsimd_ycc_extbgr_convert_mmx; 260 break; 261 case JCS_EXT_BGRX: 262 case JCS_EXT_BGRA: 263 sse2fct=jsimd_ycc_extbgrx_convert_sse2; 264 mmxfct=jsimd_ycc_extbgrx_convert_mmx; 265 break; 266 case JCS_EXT_XBGR: 267 case JCS_EXT_ABGR: 268 sse2fct=jsimd_ycc_extxbgr_convert_sse2; 269 mmxfct=jsimd_ycc_extxbgr_convert_mmx; 270 break; 271 case JCS_EXT_XRGB: 272 case JCS_EXT_ARGB: 273 sse2fct=jsimd_ycc_extxrgb_convert_sse2; 274 mmxfct=jsimd_ycc_extxrgb_convert_mmx; 275 break; 276 default: 277 sse2fct=jsimd_ycc_rgb_convert_sse2; 278 mmxfct=jsimd_ycc_rgb_convert_mmx; 279 break; 280 } 281 282 if ((simd_support & JSIMD_SSE2) && 283 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) 284 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); 285 else if (simd_support & JSIMD_MMX) 286 mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); 287} 288 289GLOBAL(void) 290jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, 291 JSAMPIMAGE input_buf, JDIMENSION input_row, 292 JSAMPARRAY output_buf, int num_rows) 293{ 294} 295 296GLOBAL(int) 297jsimd_can_h2v2_downsample (void) 298{ 299 init_simd(); 300 301 /* The code is optimised for these values only */ 302 if (BITS_IN_JSAMPLE != 8) 303 return 0; 304 if (sizeof(JDIMENSION) != 4) 305 return 0; 306 307 if (simd_support & JSIMD_SSE2) 308 return 1; 309 if (simd_support & JSIMD_MMX) 310 return 1; 311 312 return 0; 313} 314 315GLOBAL(int) 316jsimd_can_h2v1_downsample (void) 317{ 318 init_simd(); 319 320 /* The code is optimised for these values only */ 321 if (BITS_IN_JSAMPLE != 8) 322 return 0; 323 if (sizeof(JDIMENSION) != 4) 324 return 0; 325 326 if (simd_support & JSIMD_SSE2) 327 return 1; 328 if (simd_support & JSIMD_MMX) 329 return 1; 330 331 return 0; 332} 333 334GLOBAL(void) 335jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, 336 JSAMPARRAY input_data, JSAMPARRAY output_data) 337{ 338 if (simd_support & JSIMD_SSE2) 339 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, 340 compptr->v_samp_factor, 341 compptr->width_in_blocks, input_data, 342 output_data); 343 else if (simd_support & JSIMD_MMX) 344 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, 345 compptr->v_samp_factor, compptr->width_in_blocks, 346 input_data, output_data); 347} 348 349GLOBAL(void) 350jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, 351 JSAMPARRAY input_data, JSAMPARRAY output_data) 352{ 353 if (simd_support & JSIMD_SSE2) 354 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, 355 compptr->v_samp_factor, 356 compptr->width_in_blocks, input_data, 357 output_data); 358 else if (simd_support & JSIMD_MMX) 359 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, 360 compptr->v_samp_factor, compptr->width_in_blocks, 361 input_data, output_data); 362} 363 364GLOBAL(int) 365jsimd_can_h2v2_upsample (void) 366{ 367 init_simd(); 368 369 /* The code is optimised for these values only */ 370 if (BITS_IN_JSAMPLE != 8) 371 return 0; 372 if (sizeof(JDIMENSION) != 4) 373 return 0; 374 375 if (simd_support & JSIMD_SSE2) 376 return 1; 377 if (simd_support & JSIMD_MMX) 378 return 1; 379 380 return 0; 381} 382 383GLOBAL(int) 384jsimd_can_h2v1_upsample (void) 385{ 386 init_simd(); 387 388 /* The code is optimised for these values only */ 389 if (BITS_IN_JSAMPLE != 8) 390 return 0; 391 if (sizeof(JDIMENSION) != 4) 392 return 0; 393 394 if (simd_support & JSIMD_SSE2) 395 return 1; 396 if (simd_support & JSIMD_MMX) 397 return 1; 398 399 return 0; 400} 401 402GLOBAL(void) 403jsimd_h2v2_upsample (j_decompress_ptr cinfo, 404 jpeg_component_info * compptr, 405 JSAMPARRAY input_data, 406 JSAMPARRAY * output_data_ptr) 407{ 408 if (simd_support & JSIMD_SSE2) 409 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, 410 input_data, output_data_ptr); 411 else if (simd_support & JSIMD_MMX) 412 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width, 413 input_data, output_data_ptr); 414} 415 416GLOBAL(void) 417jsimd_h2v1_upsample (j_decompress_ptr cinfo, 418 jpeg_component_info * compptr, 419 JSAMPARRAY input_data, 420 JSAMPARRAY * output_data_ptr) 421{ 422 if (simd_support & JSIMD_SSE2) 423 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, 424 input_data, output_data_ptr); 425 else if (simd_support & JSIMD_MMX) 426 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width, 427 input_data, output_data_ptr); 428} 429 430GLOBAL(int) 431jsimd_can_h2v2_fancy_upsample (void) 432{ 433 init_simd(); 434 435 /* The code is optimised for these values only */ 436 if (BITS_IN_JSAMPLE != 8) 437 return 0; 438 if (sizeof(JDIMENSION) != 4) 439 return 0; 440 441 if ((simd_support & JSIMD_SSE2) && 442 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 443 return 1; 444 if (simd_support & JSIMD_MMX) 445 return 1; 446 447 return 0; 448} 449 450GLOBAL(int) 451jsimd_can_h2v1_fancy_upsample (void) 452{ 453 init_simd(); 454 455 /* The code is optimised for these values only */ 456 if (BITS_IN_JSAMPLE != 8) 457 return 0; 458 if (sizeof(JDIMENSION) != 4) 459 return 0; 460 461 if ((simd_support & JSIMD_SSE2) && 462 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 463 return 1; 464 if (simd_support & JSIMD_MMX) 465 return 1; 466 467 return 0; 468} 469 470GLOBAL(void) 471jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, 472 jpeg_component_info * compptr, 473 JSAMPARRAY input_data, 474 JSAMPARRAY * output_data_ptr) 475{ 476 if ((simd_support & JSIMD_SSE2) && 477 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 478 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, 479 compptr->downsampled_width, input_data, 480 output_data_ptr); 481 else if (simd_support & JSIMD_MMX) 482 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, 483 compptr->downsampled_width, input_data, 484 output_data_ptr); 485} 486 487GLOBAL(void) 488jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, 489 jpeg_component_info * compptr, 490 JSAMPARRAY input_data, 491 JSAMPARRAY * output_data_ptr) 492{ 493 if ((simd_support & JSIMD_SSE2) && 494 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 495 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, 496 compptr->downsampled_width, input_data, 497 output_data_ptr); 498 else if (simd_support & JSIMD_MMX) 499 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, 500 compptr->downsampled_width, input_data, 501 output_data_ptr); 502} 503 504GLOBAL(int) 505jsimd_can_h2v2_merged_upsample (void) 506{ 507 init_simd(); 508 509 /* The code is optimised for these values only */ 510 if (BITS_IN_JSAMPLE != 8) 511 return 0; 512 if (sizeof(JDIMENSION) != 4) 513 return 0; 514 515 if ((simd_support & JSIMD_SSE2) && 516 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 517 return 1; 518 if (simd_support & JSIMD_MMX) 519 return 1; 520 521 return 0; 522} 523 524GLOBAL(int) 525jsimd_can_h2v1_merged_upsample (void) 526{ 527 init_simd(); 528 529 /* The code is optimised for these values only */ 530 if (BITS_IN_JSAMPLE != 8) 531 return 0; 532 if (sizeof(JDIMENSION) != 4) 533 return 0; 534 535 if ((simd_support & JSIMD_SSE2) && 536 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 537 return 1; 538 if (simd_support & JSIMD_MMX) 539 return 1; 540 541 return 0; 542} 543 544GLOBAL(void) 545jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, 546 JSAMPIMAGE input_buf, 547 JDIMENSION in_row_group_ctr, 548 JSAMPARRAY output_buf) 549{ 550 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 551 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 552 553 switch(cinfo->out_color_space) { 554 case JCS_EXT_RGB: 555 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; 556 mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx; 557 break; 558 case JCS_EXT_RGBX: 559 case JCS_EXT_RGBA: 560 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; 561 mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx; 562 break; 563 case JCS_EXT_BGR: 564 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; 565 mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx; 566 break; 567 case JCS_EXT_BGRX: 568 case JCS_EXT_BGRA: 569 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; 570 mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx; 571 break; 572 case JCS_EXT_XBGR: 573 case JCS_EXT_ABGR: 574 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; 575 mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx; 576 break; 577 case JCS_EXT_XRGB: 578 case JCS_EXT_ARGB: 579 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; 580 mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx; 581 break; 582 default: 583 sse2fct=jsimd_h2v2_merged_upsample_sse2; 584 mmxfct=jsimd_h2v2_merged_upsample_mmx; 585 break; 586 } 587 588 if ((simd_support & JSIMD_SSE2) && 589 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 590 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 591 else if (simd_support & JSIMD_MMX) 592 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 593} 594 595GLOBAL(void) 596jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, 597 JSAMPIMAGE input_buf, 598 JDIMENSION in_row_group_ctr, 599 JSAMPARRAY output_buf) 600{ 601 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 602 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 603 604 switch(cinfo->out_color_space) { 605 case JCS_EXT_RGB: 606 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; 607 mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx; 608 break; 609 case JCS_EXT_RGBX: 610 case JCS_EXT_RGBA: 611 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; 612 mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx; 613 break; 614 case JCS_EXT_BGR: 615 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; 616 mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx; 617 break; 618 case JCS_EXT_BGRX: 619 case JCS_EXT_BGRA: 620 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; 621 mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx; 622 break; 623 case JCS_EXT_XBGR: 624 case JCS_EXT_ABGR: 625 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; 626 mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx; 627 break; 628 case JCS_EXT_XRGB: 629 case JCS_EXT_ARGB: 630 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; 631 mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx; 632 break; 633 default: 634 sse2fct=jsimd_h2v1_merged_upsample_sse2; 635 mmxfct=jsimd_h2v1_merged_upsample_mmx; 636 break; 637 } 638 639 if ((simd_support & JSIMD_SSE2) && 640 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 641 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 642 else if (simd_support & JSIMD_MMX) 643 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 644} 645 646GLOBAL(int) 647jsimd_can_convsamp (void) 648{ 649 init_simd(); 650 651 /* The code is optimised for these values only */ 652 if (DCTSIZE != 8) 653 return 0; 654 if (BITS_IN_JSAMPLE != 8) 655 return 0; 656 if (sizeof(JDIMENSION) != 4) 657 return 0; 658 if (sizeof(DCTELEM) != 2) 659 return 0; 660 661 if (simd_support & JSIMD_SSE2) 662 return 1; 663 if (simd_support & JSIMD_MMX) 664 return 1; 665 666 return 0; 667} 668 669GLOBAL(int) 670jsimd_can_convsamp_float (void) 671{ 672 init_simd(); 673 674 /* The code is optimised for these values only */ 675 if (DCTSIZE != 8) 676 return 0; 677 if (BITS_IN_JSAMPLE != 8) 678 return 0; 679 if (sizeof(JDIMENSION) != 4) 680 return 0; 681 if (sizeof(FAST_FLOAT) != 4) 682 return 0; 683 684 if (simd_support & JSIMD_SSE2) 685 return 1; 686 if (simd_support & JSIMD_SSE) 687 return 1; 688 if (simd_support & JSIMD_3DNOW) 689 return 1; 690 691 return 0; 692} 693 694GLOBAL(void) 695jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, 696 DCTELEM * workspace) 697{ 698 if (simd_support & JSIMD_SSE2) 699 jsimd_convsamp_sse2(sample_data, start_col, workspace); 700 else if (simd_support & JSIMD_MMX) 701 jsimd_convsamp_mmx(sample_data, start_col, workspace); 702} 703 704GLOBAL(void) 705jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, 706 FAST_FLOAT * workspace) 707{ 708 if (simd_support & JSIMD_SSE2) 709 jsimd_convsamp_float_sse2(sample_data, start_col, workspace); 710 else if (simd_support & JSIMD_SSE) 711 jsimd_convsamp_float_sse(sample_data, start_col, workspace); 712 else if (simd_support & JSIMD_3DNOW) 713 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace); 714} 715 716GLOBAL(int) 717jsimd_can_fdct_islow (void) 718{ 719 init_simd(); 720 721 /* The code is optimised for these values only */ 722 if (DCTSIZE != 8) 723 return 0; 724 if (sizeof(DCTELEM) != 2) 725 return 0; 726 727 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) 728 return 1; 729 if (simd_support & JSIMD_MMX) 730 return 1; 731 732 return 0; 733} 734 735GLOBAL(int) 736jsimd_can_fdct_ifast (void) 737{ 738 init_simd(); 739 740 /* The code is optimised for these values only */ 741 if (DCTSIZE != 8) 742 return 0; 743 if (sizeof(DCTELEM) != 2) 744 return 0; 745 746 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) 747 return 1; 748 if (simd_support & JSIMD_MMX) 749 return 1; 750 751 return 0; 752} 753 754GLOBAL(int) 755jsimd_can_fdct_float (void) 756{ 757 init_simd(); 758 759 /* The code is optimised for these values only */ 760 if (DCTSIZE != 8) 761 return 0; 762 if (sizeof(FAST_FLOAT) != 4) 763 return 0; 764 765 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) 766 return 1; 767 if (simd_support & JSIMD_3DNOW) 768 return 1; 769 770 return 0; 771} 772 773GLOBAL(void) 774jsimd_fdct_islow (DCTELEM * data) 775{ 776 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) 777 jsimd_fdct_islow_sse2(data); 778 else if (simd_support & JSIMD_MMX) 779 jsimd_fdct_islow_mmx(data); 780} 781 782GLOBAL(void) 783jsimd_fdct_ifast (DCTELEM * data) 784{ 785 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) 786 jsimd_fdct_ifast_sse2(data); 787 else if (simd_support & JSIMD_MMX) 788 jsimd_fdct_ifast_mmx(data); 789} 790 791GLOBAL(void) 792jsimd_fdct_float (FAST_FLOAT * data) 793{ 794 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) 795 jsimd_fdct_float_sse(data); 796 else if (simd_support & JSIMD_3DNOW) 797 jsimd_fdct_float_3dnow(data); 798} 799 800GLOBAL(int) 801jsimd_can_quantize (void) 802{ 803 init_simd(); 804 805 /* The code is optimised for these values only */ 806 if (DCTSIZE != 8) 807 return 0; 808 if (sizeof(JCOEF) != 2) 809 return 0; 810 if (sizeof(DCTELEM) != 2) 811 return 0; 812 813 if (simd_support & JSIMD_SSE2) 814 return 1; 815 if (simd_support & JSIMD_MMX) 816 return 1; 817 818 return 0; 819} 820 821GLOBAL(int) 822jsimd_can_quantize_float (void) 823{ 824 init_simd(); 825 826 /* The code is optimised for these values only */ 827 if (DCTSIZE != 8) 828 return 0; 829 if (sizeof(JCOEF) != 2) 830 return 0; 831 if (sizeof(FAST_FLOAT) != 4) 832 return 0; 833 834 if (simd_support & JSIMD_SSE2) 835 return 1; 836 if (simd_support & JSIMD_SSE) 837 return 1; 838 if (simd_support & JSIMD_3DNOW) 839 return 1; 840 841 return 0; 842} 843 844GLOBAL(void) 845jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, 846 DCTELEM * workspace) 847{ 848 if (simd_support & JSIMD_SSE2) 849 jsimd_quantize_sse2(coef_block, divisors, workspace); 850 else if (simd_support & JSIMD_MMX) 851 jsimd_quantize_mmx(coef_block, divisors, workspace); 852} 853 854GLOBAL(void) 855jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, 856 FAST_FLOAT * workspace) 857{ 858 if (simd_support & JSIMD_SSE2) 859 jsimd_quantize_float_sse2(coef_block, divisors, workspace); 860 else if (simd_support & JSIMD_SSE) 861 jsimd_quantize_float_sse(coef_block, divisors, workspace); 862 else if (simd_support & JSIMD_3DNOW) 863 jsimd_quantize_float_3dnow(coef_block, divisors, workspace); 864} 865 866GLOBAL(int) 867jsimd_can_idct_2x2 (void) 868{ 869 init_simd(); 870 871 /* The code is optimised for these values only */ 872 if (DCTSIZE != 8) 873 return 0; 874 if (sizeof(JCOEF) != 2) 875 return 0; 876 if (BITS_IN_JSAMPLE != 8) 877 return 0; 878 if (sizeof(JDIMENSION) != 4) 879 return 0; 880 if (sizeof(ISLOW_MULT_TYPE) != 2) 881 return 0; 882 883 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 884 return 1; 885 if (simd_support & JSIMD_MMX) 886 return 1; 887 888 return 0; 889} 890 891GLOBAL(int) 892jsimd_can_idct_4x4 (void) 893{ 894 init_simd(); 895 896 /* The code is optimised for these values only */ 897 if (DCTSIZE != 8) 898 return 0; 899 if (sizeof(JCOEF) != 2) 900 return 0; 901 if (BITS_IN_JSAMPLE != 8) 902 return 0; 903 if (sizeof(JDIMENSION) != 4) 904 return 0; 905 if (sizeof(ISLOW_MULT_TYPE) != 2) 906 return 0; 907 908 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 909 return 1; 910 if (simd_support & JSIMD_MMX) 911 return 1; 912 913 return 0; 914} 915 916GLOBAL(void) 917jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 918 JCOEFPTR coef_block, JSAMPARRAY output_buf, 919 JDIMENSION output_col) 920{ 921 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 922 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, 923 output_col); 924 else if (simd_support & JSIMD_MMX) 925 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col); 926} 927 928GLOBAL(void) 929jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 930 JCOEFPTR coef_block, JSAMPARRAY output_buf, 931 JDIMENSION output_col) 932{ 933 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 934 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, 935 output_col); 936 else if (simd_support & JSIMD_MMX) 937 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col); 938} 939 940GLOBAL(int) 941jsimd_can_idct_islow (void) 942{ 943 init_simd(); 944 945 /* The code is optimised for these values only */ 946 if (DCTSIZE != 8) 947 return 0; 948 if (sizeof(JCOEF) != 2) 949 return 0; 950 if (BITS_IN_JSAMPLE != 8) 951 return 0; 952 if (sizeof(JDIMENSION) != 4) 953 return 0; 954 if (sizeof(ISLOW_MULT_TYPE) != 2) 955 return 0; 956 957 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) 958 return 1; 959 if (simd_support & JSIMD_MMX) 960 return 1; 961 962 return 0; 963} 964 965GLOBAL(int) 966jsimd_can_idct_ifast (void) 967{ 968 init_simd(); 969 970 /* The code is optimised for these values only */ 971 if (DCTSIZE != 8) 972 return 0; 973 if (sizeof(JCOEF) != 2) 974 return 0; 975 if (BITS_IN_JSAMPLE != 8) 976 return 0; 977 if (sizeof(JDIMENSION) != 4) 978 return 0; 979 if (sizeof(IFAST_MULT_TYPE) != 2) 980 return 0; 981 if (IFAST_SCALE_BITS != 2) 982 return 0; 983 984 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) 985 return 1; 986 if (simd_support & JSIMD_MMX) 987 return 1; 988 989 return 0; 990} 991 992GLOBAL(int) 993jsimd_can_idct_float (void) 994{ 995 init_simd(); 996 997 if (DCTSIZE != 8) 998 return 0; 999 if (sizeof(JCOEF) != 2) 1000 return 0; 1001 if (BITS_IN_JSAMPLE != 8) 1002 return 0; 1003 if (sizeof(JDIMENSION) != 4) 1004 return 0; 1005 if (sizeof(FAST_FLOAT) != 4) 1006 return 0; 1007 if (sizeof(FLOAT_MULT_TYPE) != 4) 1008 return 0; 1009 1010 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) 1011 return 1; 1012 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) 1013 return 1; 1014 if (simd_support & JSIMD_3DNOW) 1015 return 1; 1016 1017 return 0; 1018} 1019 1020GLOBAL(void) 1021jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1022 JCOEFPTR coef_block, JSAMPARRAY output_buf, 1023 JDIMENSION output_col) 1024{ 1025 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) 1026 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, 1027 output_col); 1028 else if (simd_support & JSIMD_MMX) 1029 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, 1030 output_col); 1031} 1032 1033GLOBAL(void) 1034jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1035 JCOEFPTR coef_block, JSAMPARRAY output_buf, 1036 JDIMENSION output_col) 1037{ 1038 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) 1039 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, 1040 output_col); 1041 else if (simd_support & JSIMD_MMX) 1042 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, 1043 output_col); 1044} 1045 1046GLOBAL(void) 1047jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1048 JCOEFPTR coef_block, JSAMPARRAY output_buf, 1049 JDIMENSION output_col) 1050{ 1051 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) 1052 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, 1053 output_col); 1054 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) 1055 jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf, 1056 output_col); 1057 else if (simd_support & JSIMD_3DNOW) 1058 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf, 1059 output_col); 1060} 1061 1062