jsimd.c (27656B)
1 /* 2 * jsimd_x86_64.c 3 * 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 5 * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander. 6 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. 7 * 8 * Based on the x86 SIMD extension for IJG JPEG library, 9 * Copyright (C) 1999-2006, MIYASAKA Masaru. 10 * For conditions of distribution and use, see copyright notice in jsimdext.inc 11 * 12 * This file contains the interface between the "normal" portions 13 * of the library and the SIMD implementations when running on a 14 * 64-bit x86 architecture. 15 */ 16 17 #define JPEG_INTERNALS 18 #include "../../jinclude.h" 19 #include "../../jpeglib.h" 20 #include "../../jsimd.h" 21 #include "../../jdct.h" 22 #include "../../jsimddct.h" 23 #include "../jsimd.h" 24 25 /* 26 * In the PIC cases, we have no guarantee that constants will keep 27 * their alignment. This macro allows us to verify it at runtime. 28 */ 29 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0) 30 31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ 32 #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */ 33 34 static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0); 35 static THREAD_LOCAL unsigned int simd_huffman = 1; 36 37 /* 38 * Check what SIMD accelerations are supported. 39 */ 40 LOCAL(void) 41 init_simd(void) 42 { 43 #ifndef NO_GETENV 44 char env[2] = { 0 }; 45 #endif 46 47 if (simd_support != ~0U) 48 return; 49 50 simd_support = jpeg_simd_cpu_support(); 51 52 #ifndef NO_GETENV 53 /* Force different settings through environment variables */ 54 if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1")) 55 simd_support &= JSIMD_SSE2; 56 if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1")) 57 simd_support &= JSIMD_AVX2; 58 if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1")) 59 simd_support = 0; 60 if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1")) 61 simd_huffman = 0; 62 #endif 63 } 64 65 GLOBAL(int) 66 jsimd_can_rgb_ycc(void) 67 { 68 init_simd(); 69 70 /* The code is optimised for these values only */ 71 if (BITS_IN_JSAMPLE != 8) 72 return 0; 73 if (sizeof(JDIMENSION) != 4) 74 return 0; 75 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 76 return 0; 77 78 if ((simd_support & JSIMD_AVX2) && 79 IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2)) 80 return 1; 81 if ((simd_support & JSIMD_SSE2) && 82 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) 83 return 1; 84 85 return 0; 86 } 87 88 GLOBAL(int) 89 jsimd_can_rgb_gray(void) 90 { 91 init_simd(); 92 93 /* The code is optimised for these values only */ 94 if (BITS_IN_JSAMPLE != 8) 95 return 0; 96 if (sizeof(JDIMENSION) != 4) 97 return 0; 98 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 99 return 0; 100 101 if ((simd_support & JSIMD_AVX2) && 102 IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2)) 103 return 1; 104 if ((simd_support & JSIMD_SSE2) && 105 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) 106 return 1; 107 108 return 0; 109 } 110 111 GLOBAL(int) 112 jsimd_can_ycc_rgb(void) 113 { 114 init_simd(); 115 116 /* The code is optimised for these values only */ 117 if (BITS_IN_JSAMPLE != 8) 118 return 0; 119 if (sizeof(JDIMENSION) != 4) 120 return 0; 121 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 122 return 0; 123 124 if ((simd_support & JSIMD_AVX2) && 125 IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2)) 126 return 1; 127 if ((simd_support & JSIMD_SSE2) && 128 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) 129 return 1; 130 131 return 0; 132 } 133 134 GLOBAL(int) 135 jsimd_can_ycc_rgb565(void) 136 { 137 return 0; 138 } 139 140 GLOBAL(void) 141 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, 142 JSAMPIMAGE output_buf, JDIMENSION output_row, 143 int num_rows) 144 { 145 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 146 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 147 148 if (simd_support == ~0U) 149 init_simd(); 150 151 switch (cinfo->in_color_space) { 152 case JCS_EXT_RGB: 153 avx2fct = jsimd_extrgb_ycc_convert_avx2; 154 sse2fct = jsimd_extrgb_ycc_convert_sse2; 155 break; 156 case JCS_EXT_RGBX: 157 case JCS_EXT_RGBA: 158 avx2fct = jsimd_extrgbx_ycc_convert_avx2; 159 sse2fct = jsimd_extrgbx_ycc_convert_sse2; 160 break; 161 case JCS_EXT_BGR: 162 avx2fct = jsimd_extbgr_ycc_convert_avx2; 163 sse2fct = jsimd_extbgr_ycc_convert_sse2; 164 break; 165 case JCS_EXT_BGRX: 166 case JCS_EXT_BGRA: 167 avx2fct = jsimd_extbgrx_ycc_convert_avx2; 168 sse2fct = jsimd_extbgrx_ycc_convert_sse2; 169 break; 170 case JCS_EXT_XBGR: 171 case JCS_EXT_ABGR: 172 avx2fct = jsimd_extxbgr_ycc_convert_avx2; 173 sse2fct = jsimd_extxbgr_ycc_convert_sse2; 174 break; 175 case JCS_EXT_XRGB: 176 case JCS_EXT_ARGB: 177 avx2fct = jsimd_extxrgb_ycc_convert_avx2; 178 sse2fct = jsimd_extxrgb_ycc_convert_sse2; 179 break; 180 default: 181 avx2fct = jsimd_rgb_ycc_convert_avx2; 182 sse2fct = jsimd_rgb_ycc_convert_sse2; 183 break; 184 } 185 186 if (simd_support & JSIMD_AVX2) 187 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 188 else 189 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 190 } 191 192 GLOBAL(void) 193 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, 194 JSAMPIMAGE output_buf, JDIMENSION output_row, 195 int num_rows) 196 { 197 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 198 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 199 200 if (simd_support == ~0U) 201 init_simd(); 202 203 switch (cinfo->in_color_space) { 204 case JCS_EXT_RGB: 205 avx2fct = jsimd_extrgb_gray_convert_avx2; 206 sse2fct = jsimd_extrgb_gray_convert_sse2; 207 break; 208 case JCS_EXT_RGBX: 209 case JCS_EXT_RGBA: 210 avx2fct = jsimd_extrgbx_gray_convert_avx2; 211 sse2fct = jsimd_extrgbx_gray_convert_sse2; 212 break; 213 case JCS_EXT_BGR: 214 avx2fct = jsimd_extbgr_gray_convert_avx2; 215 sse2fct = jsimd_extbgr_gray_convert_sse2; 216 break; 217 case JCS_EXT_BGRX: 218 case JCS_EXT_BGRA: 219 avx2fct = jsimd_extbgrx_gray_convert_avx2; 220 sse2fct = jsimd_extbgrx_gray_convert_sse2; 221 break; 222 case JCS_EXT_XBGR: 223 case JCS_EXT_ABGR: 224 avx2fct = jsimd_extxbgr_gray_convert_avx2; 225 sse2fct = jsimd_extxbgr_gray_convert_sse2; 226 break; 227 case JCS_EXT_XRGB: 228 case JCS_EXT_ARGB: 229 avx2fct = jsimd_extxrgb_gray_convert_avx2; 230 sse2fct = jsimd_extxrgb_gray_convert_sse2; 231 break; 232 default: 233 avx2fct = jsimd_rgb_gray_convert_avx2; 234 sse2fct = jsimd_rgb_gray_convert_sse2; 235 break; 236 } 237 238 if (simd_support & JSIMD_AVX2) 239 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 240 else 241 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 242 } 243 244 GLOBAL(void) 245 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, 246 JDIMENSION input_row, JSAMPARRAY output_buf, 247 int num_rows) 248 { 249 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 250 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 251 252 if (simd_support == ~0U) 253 init_simd(); 254 255 switch (cinfo->out_color_space) { 256 case JCS_EXT_RGB: 257 avx2fct = jsimd_ycc_extrgb_convert_avx2; 258 sse2fct = jsimd_ycc_extrgb_convert_sse2; 259 break; 260 case JCS_EXT_RGBX: 261 case JCS_EXT_RGBA: 262 avx2fct = jsimd_ycc_extrgbx_convert_avx2; 263 sse2fct = jsimd_ycc_extrgbx_convert_sse2; 264 break; 265 case JCS_EXT_BGR: 266 avx2fct = jsimd_ycc_extbgr_convert_avx2; 267 sse2fct = jsimd_ycc_extbgr_convert_sse2; 268 break; 269 case JCS_EXT_BGRX: 270 case JCS_EXT_BGRA: 271 avx2fct = jsimd_ycc_extbgrx_convert_avx2; 272 sse2fct = jsimd_ycc_extbgrx_convert_sse2; 273 break; 274 case JCS_EXT_XBGR: 275 case JCS_EXT_ABGR: 276 avx2fct = jsimd_ycc_extxbgr_convert_avx2; 277 sse2fct = jsimd_ycc_extxbgr_convert_sse2; 278 break; 279 case JCS_EXT_XRGB: 280 case JCS_EXT_ARGB: 281 avx2fct = jsimd_ycc_extxrgb_convert_avx2; 282 sse2fct = jsimd_ycc_extxrgb_convert_sse2; 283 break; 284 default: 285 avx2fct = jsimd_ycc_rgb_convert_avx2; 286 sse2fct = jsimd_ycc_rgb_convert_sse2; 287 break; 288 } 289 290 if (simd_support & JSIMD_AVX2) 291 avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); 292 else 293 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); 294 } 295 296 GLOBAL(void) 297 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, 298 JDIMENSION input_row, JSAMPARRAY output_buf, 299 int num_rows) 300 { 301 } 302 303 GLOBAL(int) 304 jsimd_can_h2v2_downsample(void) 305 { 306 init_simd(); 307 308 /* The code is optimised for these values only */ 309 if (BITS_IN_JSAMPLE != 8) 310 return 0; 311 if (sizeof(JDIMENSION) != 4) 312 return 0; 313 314 if (simd_support & JSIMD_AVX2) 315 return 1; 316 if (simd_support & JSIMD_SSE2) 317 return 1; 318 319 return 0; 320 } 321 322 GLOBAL(int) 323 jsimd_can_h2v1_downsample(void) 324 { 325 init_simd(); 326 327 /* The code is optimised for these values only */ 328 if (BITS_IN_JSAMPLE != 8) 329 return 0; 330 if (sizeof(JDIMENSION) != 4) 331 return 0; 332 333 if (simd_support & JSIMD_AVX2) 334 return 1; 335 if (simd_support & JSIMD_SSE2) 336 return 1; 337 338 return 0; 339 } 340 341 GLOBAL(void) 342 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, 343 JSAMPARRAY input_data, JSAMPARRAY output_data) 344 { 345 if (simd_support == ~0U) 346 init_simd(); 347 348 if (simd_support & JSIMD_AVX2) 349 jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, 350 compptr->v_samp_factor, 351 compptr->width_in_blocks, input_data, 352 output_data); 353 else 354 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, 355 compptr->v_samp_factor, 356 compptr->width_in_blocks, input_data, 357 output_data); 358 } 359 360 GLOBAL(void) 361 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, 362 JSAMPARRAY input_data, JSAMPARRAY output_data) 363 { 364 if (simd_support == ~0U) 365 init_simd(); 366 367 if (simd_support & JSIMD_AVX2) 368 jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, 369 compptr->v_samp_factor, 370 compptr->width_in_blocks, input_data, 371 output_data); 372 else 373 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, 374 compptr->v_samp_factor, 375 compptr->width_in_blocks, input_data, 376 output_data); 377 } 378 379 GLOBAL(int) 380 jsimd_can_h2v2_upsample(void) 381 { 382 init_simd(); 383 384 /* The code is optimised for these values only */ 385 if (BITS_IN_JSAMPLE != 8) 386 return 0; 387 if (sizeof(JDIMENSION) != 4) 388 return 0; 389 390 if (simd_support & JSIMD_AVX2) 391 return 1; 392 if (simd_support & JSIMD_SSE2) 393 return 1; 394 395 return 0; 396 } 397 398 GLOBAL(int) 399 jsimd_can_h2v1_upsample(void) 400 { 401 init_simd(); 402 403 /* The code is optimised for these values only */ 404 if (BITS_IN_JSAMPLE != 8) 405 return 0; 406 if (sizeof(JDIMENSION) != 4) 407 return 0; 408 409 if (simd_support & JSIMD_AVX2) 410 return 1; 411 if (simd_support & JSIMD_SSE2) 412 return 1; 413 414 return 0; 415 } 416 417 GLOBAL(void) 418 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, 419 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) 420 { 421 if (simd_support == ~0U) 422 init_simd(); 423 424 if (simd_support & JSIMD_AVX2) 425 jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, 426 input_data, output_data_ptr); 427 else 428 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, 429 input_data, output_data_ptr); 430 } 431 432 GLOBAL(void) 433 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, 434 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) 435 { 436 if (simd_support == ~0U) 437 init_simd(); 438 439 if (simd_support & JSIMD_AVX2) 440 jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, 441 input_data, output_data_ptr); 442 else 443 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, 444 input_data, output_data_ptr); 445 } 446 447 GLOBAL(int) 448 jsimd_can_h2v2_fancy_upsample(void) 449 { 450 init_simd(); 451 452 /* The code is optimised for these values only */ 453 if (BITS_IN_JSAMPLE != 8) 454 return 0; 455 if (sizeof(JDIMENSION) != 4) 456 return 0; 457 458 if ((simd_support & JSIMD_AVX2) && 459 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2)) 460 return 1; 461 if ((simd_support & JSIMD_SSE2) && 462 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 463 return 1; 464 465 return 0; 466 } 467 468 GLOBAL(int) 469 jsimd_can_h2v1_fancy_upsample(void) 470 { 471 init_simd(); 472 473 /* The code is optimised for these values only */ 474 if (BITS_IN_JSAMPLE != 8) 475 return 0; 476 if (sizeof(JDIMENSION) != 4) 477 return 0; 478 479 if ((simd_support & JSIMD_AVX2) && 480 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2)) 481 return 1; 482 if ((simd_support & JSIMD_SSE2) && 483 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 484 return 1; 485 486 return 0; 487 } 488 489 GLOBAL(void) 490 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, 491 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) 492 { 493 if (simd_support == ~0U) 494 init_simd(); 495 496 if (simd_support & JSIMD_AVX2) 497 jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor, 498 compptr->downsampled_width, input_data, 499 output_data_ptr); 500 else 501 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, 502 compptr->downsampled_width, input_data, 503 output_data_ptr); 504 } 505 506 GLOBAL(void) 507 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, 508 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) 509 { 510 if (simd_support == ~0U) 511 init_simd(); 512 513 if (simd_support & JSIMD_AVX2) 514 jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor, 515 compptr->downsampled_width, input_data, 516 output_data_ptr); 517 else 518 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, 519 compptr->downsampled_width, input_data, 520 output_data_ptr); 521 } 522 523 GLOBAL(int) 524 jsimd_can_h2v2_merged_upsample(void) 525 { 526 init_simd(); 527 528 /* The code is optimised for these values only */ 529 if (BITS_IN_JSAMPLE != 8) 530 return 0; 531 if (sizeof(JDIMENSION) != 4) 532 return 0; 533 534 if ((simd_support & JSIMD_AVX2) && 535 IS_ALIGNED_AVX(jconst_merged_upsample_avx2)) 536 return 1; 537 if ((simd_support & JSIMD_SSE2) && 538 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 539 return 1; 540 541 return 0; 542 } 543 544 GLOBAL(int) 545 jsimd_can_h2v1_merged_upsample(void) 546 { 547 init_simd(); 548 549 /* The code is optimised for these values only */ 550 if (BITS_IN_JSAMPLE != 8) 551 return 0; 552 if (sizeof(JDIMENSION) != 4) 553 return 0; 554 555 if ((simd_support & JSIMD_AVX2) && 556 IS_ALIGNED_AVX(jconst_merged_upsample_avx2)) 557 return 1; 558 if ((simd_support & JSIMD_SSE2) && 559 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 560 return 1; 561 562 return 0; 563 } 564 565 GLOBAL(void) 566 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, 567 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) 568 { 569 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 570 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 571 572 if (simd_support == ~0U) 573 init_simd(); 574 575 switch (cinfo->out_color_space) { 576 case JCS_EXT_RGB: 577 avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2; 578 sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2; 579 break; 580 case JCS_EXT_RGBX: 581 case JCS_EXT_RGBA: 582 avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2; 583 sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2; 584 break; 585 case JCS_EXT_BGR: 586 avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2; 587 sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2; 588 break; 589 case JCS_EXT_BGRX: 590 case JCS_EXT_BGRA: 591 avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2; 592 sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2; 593 break; 594 case JCS_EXT_XBGR: 595 case JCS_EXT_ABGR: 596 avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2; 597 sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2; 598 break; 599 case JCS_EXT_XRGB: 600 case JCS_EXT_ARGB: 601 avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2; 602 sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2; 603 break; 604 default: 605 avx2fct = jsimd_h2v2_merged_upsample_avx2; 606 sse2fct = jsimd_h2v2_merged_upsample_sse2; 607 break; 608 } 609 610 if (simd_support & JSIMD_AVX2) 611 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 612 else 613 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 614 } 615 616 GLOBAL(void) 617 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, 618 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) 619 { 620 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 621 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 622 623 if (simd_support == ~0U) 624 init_simd(); 625 626 switch (cinfo->out_color_space) { 627 case JCS_EXT_RGB: 628 avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2; 629 sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2; 630 break; 631 case JCS_EXT_RGBX: 632 case JCS_EXT_RGBA: 633 avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2; 634 sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2; 635 break; 636 case JCS_EXT_BGR: 637 avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2; 638 sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2; 639 break; 640 case JCS_EXT_BGRX: 641 case JCS_EXT_BGRA: 642 avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2; 643 sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2; 644 break; 645 case JCS_EXT_XBGR: 646 case JCS_EXT_ABGR: 647 avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2; 648 sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2; 649 break; 650 case JCS_EXT_XRGB: 651 case JCS_EXT_ARGB: 652 avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2; 653 sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2; 654 break; 655 default: 656 avx2fct = jsimd_h2v1_merged_upsample_avx2; 657 sse2fct = jsimd_h2v1_merged_upsample_sse2; 658 break; 659 } 660 661 if (simd_support & JSIMD_AVX2) 662 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 663 else 664 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 665 } 666 667 GLOBAL(int) 668 jsimd_can_convsamp(void) 669 { 670 init_simd(); 671 672 /* The code is optimised for these values only */ 673 if (DCTSIZE != 8) 674 return 0; 675 if (BITS_IN_JSAMPLE != 8) 676 return 0; 677 if (sizeof(JDIMENSION) != 4) 678 return 0; 679 if (sizeof(DCTELEM) != 2) 680 return 0; 681 682 if (simd_support & JSIMD_AVX2) 683 return 1; 684 if (simd_support & JSIMD_SSE2) 685 return 1; 686 687 return 0; 688 } 689 690 GLOBAL(int) 691 jsimd_can_convsamp_float(void) 692 { 693 init_simd(); 694 695 /* The code is optimised for these values only */ 696 if (DCTSIZE != 8) 697 return 0; 698 if (BITS_IN_JSAMPLE != 8) 699 return 0; 700 if (sizeof(JDIMENSION) != 4) 701 return 0; 702 if (sizeof(FAST_FLOAT) != 4) 703 return 0; 704 705 if (simd_support & JSIMD_SSE2) 706 return 1; 707 708 return 0; 709 } 710 711 GLOBAL(void) 712 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, 713 DCTELEM *workspace) 714 { 715 if (simd_support == ~0U) 716 init_simd(); 717 718 if (simd_support & JSIMD_AVX2) 719 jsimd_convsamp_avx2(sample_data, start_col, workspace); 720 else 721 jsimd_convsamp_sse2(sample_data, start_col, workspace); 722 } 723 724 GLOBAL(void) 725 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, 726 FAST_FLOAT *workspace) 727 { 728 jsimd_convsamp_float_sse2(sample_data, start_col, workspace); 729 } 730 731 GLOBAL(int) 732 jsimd_can_fdct_islow(void) 733 { 734 init_simd(); 735 736 /* The code is optimised for these values only */ 737 if (DCTSIZE != 8) 738 return 0; 739 if (sizeof(DCTELEM) != 2) 740 return 0; 741 742 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2)) 743 return 1; 744 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) 745 return 1; 746 747 return 0; 748 } 749 750 GLOBAL(int) 751 jsimd_can_fdct_ifast(void) 752 { 753 init_simd(); 754 755 /* The code is optimised for these values only */ 756 if (DCTSIZE != 8) 757 return 0; 758 if (sizeof(DCTELEM) != 2) 759 return 0; 760 761 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) 762 return 1; 763 764 return 0; 765 } 766 767 GLOBAL(int) 768 jsimd_can_fdct_float(void) 769 { 770 init_simd(); 771 772 /* The code is optimised for these values only */ 773 if (DCTSIZE != 8) 774 return 0; 775 if (sizeof(FAST_FLOAT) != 4) 776 return 0; 777 778 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) 779 return 1; 780 781 return 0; 782 } 783 784 GLOBAL(void) 785 jsimd_fdct_islow(DCTELEM *data) 786 { 787 if (simd_support == ~0U) 788 init_simd(); 789 790 if (simd_support & JSIMD_AVX2) 791 jsimd_fdct_islow_avx2(data); 792 else 793 jsimd_fdct_islow_sse2(data); 794 } 795 796 GLOBAL(void) 797 jsimd_fdct_ifast(DCTELEM *data) 798 { 799 jsimd_fdct_ifast_sse2(data); 800 } 801 802 GLOBAL(void) 803 jsimd_fdct_float(FAST_FLOAT *data) 804 { 805 jsimd_fdct_float_sse(data); 806 } 807 808 GLOBAL(int) 809 jsimd_can_quantize(void) 810 { 811 init_simd(); 812 813 /* The code is optimised for these values only */ 814 if (DCTSIZE != 8) 815 return 0; 816 if (sizeof(JCOEF) != 2) 817 return 0; 818 if (sizeof(DCTELEM) != 2) 819 return 0; 820 821 if (simd_support & JSIMD_AVX2) 822 return 1; 823 if (simd_support & JSIMD_SSE2) 824 return 1; 825 826 return 0; 827 } 828 829 GLOBAL(int) 830 jsimd_can_quantize_float(void) 831 { 832 init_simd(); 833 834 /* The code is optimised for these values only */ 835 if (DCTSIZE != 8) 836 return 0; 837 if (sizeof(JCOEF) != 2) 838 return 0; 839 if (sizeof(FAST_FLOAT) != 4) 840 return 0; 841 842 if (simd_support & JSIMD_SSE2) 843 return 1; 844 845 return 0; 846 } 847 848 GLOBAL(void) 849 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) 850 { 851 if (simd_support == ~0U) 852 init_simd(); 853 854 if (simd_support & JSIMD_AVX2) 855 jsimd_quantize_avx2(coef_block, divisors, workspace); 856 else 857 jsimd_quantize_sse2(coef_block, divisors, workspace); 858 } 859 860 GLOBAL(void) 861 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, 862 FAST_FLOAT *workspace) 863 { 864 jsimd_quantize_float_sse2(coef_block, divisors, workspace); 865 } 866 867 GLOBAL(int) 868 jsimd_can_idct_2x2(void) 869 { 870 init_simd(); 871 872 /* The code is optimised for these values only */ 873 if (DCTSIZE != 8) 874 return 0; 875 if (sizeof(JCOEF) != 2) 876 return 0; 877 if (BITS_IN_JSAMPLE != 8) 878 return 0; 879 if (sizeof(JDIMENSION) != 4) 880 return 0; 881 if (sizeof(ISLOW_MULT_TYPE) != 2) 882 return 0; 883 884 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 885 return 1; 886 887 return 0; 888 } 889 890 GLOBAL(int) 891 jsimd_can_idct_4x4(void) 892 { 893 init_simd(); 894 895 /* The code is optimised for these values only */ 896 if (DCTSIZE != 8) 897 return 0; 898 if (sizeof(JCOEF) != 2) 899 return 0; 900 if (BITS_IN_JSAMPLE != 8) 901 return 0; 902 if (sizeof(JDIMENSION) != 4) 903 return 0; 904 if (sizeof(ISLOW_MULT_TYPE) != 2) 905 return 0; 906 907 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 908 return 1; 909 910 return 0; 911 } 912 913 GLOBAL(void) 914 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, 915 JCOEFPTR coef_block, JSAMPARRAY output_buf, 916 JDIMENSION output_col) 917 { 918 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); 919 } 920 921 GLOBAL(void) 922 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, 923 JCOEFPTR coef_block, JSAMPARRAY output_buf, 924 JDIMENSION output_col) 925 { 926 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); 927 } 928 929 GLOBAL(int) 930 jsimd_can_idct_islow(void) 931 { 932 init_simd(); 933 934 /* The code is optimised for these values only */ 935 if (DCTSIZE != 8) 936 return 0; 937 if (sizeof(JCOEF) != 2) 938 return 0; 939 if (BITS_IN_JSAMPLE != 8) 940 return 0; 941 if (sizeof(JDIMENSION) != 4) 942 return 0; 943 if (sizeof(ISLOW_MULT_TYPE) != 2) 944 return 0; 945 946 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2)) 947 return 1; 948 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) 949 return 1; 950 951 return 0; 952 } 953 954 GLOBAL(int) 955 jsimd_can_idct_ifast(void) 956 { 957 init_simd(); 958 959 /* The code is optimised for these values only */ 960 if (DCTSIZE != 8) 961 return 0; 962 if (sizeof(JCOEF) != 2) 963 return 0; 964 if (BITS_IN_JSAMPLE != 8) 965 return 0; 966 if (sizeof(JDIMENSION) != 4) 967 return 0; 968 if (sizeof(IFAST_MULT_TYPE) != 2) 969 return 0; 970 if (IFAST_SCALE_BITS != 2) 971 return 0; 972 973 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) 974 return 1; 975 976 return 0; 977 } 978 979 GLOBAL(int) 980 jsimd_can_idct_float(void) 981 { 982 init_simd(); 983 984 if (DCTSIZE != 8) 985 return 0; 986 if (sizeof(JCOEF) != 2) 987 return 0; 988 if (BITS_IN_JSAMPLE != 8) 989 return 0; 990 if (sizeof(JDIMENSION) != 4) 991 return 0; 992 if (sizeof(FAST_FLOAT) != 4) 993 return 0; 994 if (sizeof(FLOAT_MULT_TYPE) != 4) 995 return 0; 996 997 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) 998 return 1; 999 1000 return 0; 1001 } 1002 1003 GLOBAL(void) 1004 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, 1005 JCOEFPTR coef_block, JSAMPARRAY output_buf, 1006 JDIMENSION output_col) 1007 { 1008 if (simd_support == ~0U) 1009 init_simd(); 1010 1011 if (simd_support & JSIMD_AVX2) 1012 jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf, 1013 output_col); 1014 else 1015 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, 1016 output_col); 1017 } 1018 1019 GLOBAL(void) 1020 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, 1021 JCOEFPTR coef_block, JSAMPARRAY output_buf, 1022 JDIMENSION output_col) 1023 { 1024 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, 1025 output_col); 1026 } 1027 1028 GLOBAL(void) 1029 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, 1030 JCOEFPTR coef_block, JSAMPARRAY output_buf, 1031 JDIMENSION output_col) 1032 { 1033 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, 1034 output_col); 1035 } 1036 1037 GLOBAL(int) 1038 jsimd_can_huff_encode_one_block(void) 1039 { 1040 init_simd(); 1041 1042 if (DCTSIZE != 8) 1043 return 0; 1044 if (sizeof(JCOEF) != 2) 1045 return 0; 1046 1047 if ((simd_support & JSIMD_SSE2) && simd_huffman && 1048 IS_ALIGNED_SSE(jconst_huff_encode_one_block)) 1049 return 1; 1050 1051 return 0; 1052 } 1053 1054 GLOBAL(JOCTET *) 1055 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block, 1056 int last_dc_val, c_derived_tbl *dctbl, 1057 c_derived_tbl *actbl) 1058 { 1059 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, 1060 dctbl, actbl); 1061 } 1062 1063 GLOBAL(int) 1064 jsimd_can_encode_mcu_AC_first_prepare(void) 1065 { 1066 init_simd(); 1067 1068 if (DCTSIZE != 8) 1069 return 0; 1070 if (sizeof(JCOEF) != 2) 1071 return 0; 1072 if (simd_support & JSIMD_SSE2) 1073 return 1; 1074 1075 return 0; 1076 } 1077 1078 GLOBAL(void) 1079 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, 1080 const int *jpeg_natural_order_start, int Sl, 1081 int Al, UJCOEF *values, size_t *zerobits) 1082 { 1083 jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start, 1084 Sl, Al, values, zerobits); 1085 } 1086 1087 GLOBAL(int) 1088 jsimd_can_encode_mcu_AC_refine_prepare(void) 1089 { 1090 init_simd(); 1091 1092 if (DCTSIZE != 8) 1093 return 0; 1094 if (sizeof(JCOEF) != 2) 1095 return 0; 1096 if (simd_support & JSIMD_SSE2) 1097 return 1; 1098 1099 return 0; 1100 } 1101 1102 GLOBAL(int) 1103 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, 1104 const int *jpeg_natural_order_start, int Sl, 1105 int Al, UJCOEF *absvalues, size_t *bits) 1106 { 1107 return jsimd_encode_mcu_AC_refine_prepare_sse2(block, 1108 jpeg_natural_order_start, 1109 Sl, Al, absvalues, bits); 1110 }