tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

jsimd.c (27656B)


      1 /*
      2 * jsimd_x86_64.c
      3 *
      4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
      5 * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander.
      6 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
      7 *
      8 * Based on the x86 SIMD extension for IJG JPEG library,
      9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
     10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
     11 *
     12 * This file contains the interface between the "normal" portions
     13 * of the library and the SIMD implementations when running on a
     14 * 64-bit x86 architecture.
     15 */
     16 
     17 #define JPEG_INTERNALS
     18 #include "../../jinclude.h"
     19 #include "../../jpeglib.h"
     20 #include "../../jsimd.h"
     21 #include "../../jdct.h"
     22 #include "../../jsimddct.h"
     23 #include "../jsimd.h"
     24 
     25 /*
     26 * In the PIC cases, we have no guarantee that constants will keep
     27 * their alignment. This macro allows us to verify it at runtime.
     28 */
     29 #define IS_ALIGNED(ptr, order)  (((size_t)ptr & ((1 << order) - 1)) == 0)
     30 
     31 #define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
     32 #define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
     33 
     34 static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
     35 static THREAD_LOCAL unsigned int simd_huffman = 1;
     36 
     37 /*
     38 * Check what SIMD accelerations are supported.
     39 */
     40 LOCAL(void)
     41 init_simd(void)
     42 {
     43 #ifndef NO_GETENV
     44  char env[2] = { 0 };
     45 #endif
     46 
     47  if (simd_support != ~0U)
     48    return;
     49 
     50  simd_support = jpeg_simd_cpu_support();
     51 
     52 #ifndef NO_GETENV
     53  /* Force different settings through environment variables */
     54  if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
     55    simd_support &= JSIMD_SSE2;
     56  if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
     57    simd_support &= JSIMD_AVX2;
     58  if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
     59    simd_support = 0;
     60  if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
     61    simd_huffman = 0;
     62 #endif
     63 }
     64 
     65 GLOBAL(int)
     66 jsimd_can_rgb_ycc(void)
     67 {
     68  init_simd();
     69 
     70  /* The code is optimised for these values only */
     71  if (BITS_IN_JSAMPLE != 8)
     72    return 0;
     73  if (sizeof(JDIMENSION) != 4)
     74    return 0;
     75  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
     76    return 0;
     77 
     78  if ((simd_support & JSIMD_AVX2) &&
     79      IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
     80    return 1;
     81  if ((simd_support & JSIMD_SSE2) &&
     82      IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
     83    return 1;
     84 
     85  return 0;
     86 }
     87 
     88 GLOBAL(int)
     89 jsimd_can_rgb_gray(void)
     90 {
     91  init_simd();
     92 
     93  /* The code is optimised for these values only */
     94  if (BITS_IN_JSAMPLE != 8)
     95    return 0;
     96  if (sizeof(JDIMENSION) != 4)
     97    return 0;
     98  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
     99    return 0;
    100 
    101  if ((simd_support & JSIMD_AVX2) &&
    102      IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
    103    return 1;
    104  if ((simd_support & JSIMD_SSE2) &&
    105      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
    106    return 1;
    107 
    108  return 0;
    109 }
    110 
    111 GLOBAL(int)
    112 jsimd_can_ycc_rgb(void)
    113 {
    114  init_simd();
    115 
    116  /* The code is optimised for these values only */
    117  if (BITS_IN_JSAMPLE != 8)
    118    return 0;
    119  if (sizeof(JDIMENSION) != 4)
    120    return 0;
    121  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    122    return 0;
    123 
    124  if ((simd_support & JSIMD_AVX2) &&
    125      IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
    126    return 1;
    127  if ((simd_support & JSIMD_SSE2) &&
    128      IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
    129    return 1;
    130 
    131  return 0;
    132 }
    133 
    134 GLOBAL(int)
    135 jsimd_can_ycc_rgb565(void)
    136 {
    137  return 0;
    138 }
    139 
    140 GLOBAL(void)
    141 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
    142                      JSAMPIMAGE output_buf, JDIMENSION output_row,
    143                      int num_rows)
    144 {
    145  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    146  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    147 
    148  if (simd_support == ~0U)
    149    init_simd();
    150 
    151  switch (cinfo->in_color_space) {
    152  case JCS_EXT_RGB:
    153    avx2fct = jsimd_extrgb_ycc_convert_avx2;
    154    sse2fct = jsimd_extrgb_ycc_convert_sse2;
    155    break;
    156  case JCS_EXT_RGBX:
    157  case JCS_EXT_RGBA:
    158    avx2fct = jsimd_extrgbx_ycc_convert_avx2;
    159    sse2fct = jsimd_extrgbx_ycc_convert_sse2;
    160    break;
    161  case JCS_EXT_BGR:
    162    avx2fct = jsimd_extbgr_ycc_convert_avx2;
    163    sse2fct = jsimd_extbgr_ycc_convert_sse2;
    164    break;
    165  case JCS_EXT_BGRX:
    166  case JCS_EXT_BGRA:
    167    avx2fct = jsimd_extbgrx_ycc_convert_avx2;
    168    sse2fct = jsimd_extbgrx_ycc_convert_sse2;
    169    break;
    170  case JCS_EXT_XBGR:
    171  case JCS_EXT_ABGR:
    172    avx2fct = jsimd_extxbgr_ycc_convert_avx2;
    173    sse2fct = jsimd_extxbgr_ycc_convert_sse2;
    174    break;
    175  case JCS_EXT_XRGB:
    176  case JCS_EXT_ARGB:
    177    avx2fct = jsimd_extxrgb_ycc_convert_avx2;
    178    sse2fct = jsimd_extxrgb_ycc_convert_sse2;
    179    break;
    180  default:
    181    avx2fct = jsimd_rgb_ycc_convert_avx2;
    182    sse2fct = jsimd_rgb_ycc_convert_sse2;
    183    break;
    184  }
    185 
    186  if (simd_support & JSIMD_AVX2)
    187    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    188  else
    189    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    190 }
    191 
    192 GLOBAL(void)
    193 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
    194                       JSAMPIMAGE output_buf, JDIMENSION output_row,
    195                       int num_rows)
    196 {
    197  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    198  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    199 
    200  if (simd_support == ~0U)
    201    init_simd();
    202 
    203  switch (cinfo->in_color_space) {
    204  case JCS_EXT_RGB:
    205    avx2fct = jsimd_extrgb_gray_convert_avx2;
    206    sse2fct = jsimd_extrgb_gray_convert_sse2;
    207    break;
    208  case JCS_EXT_RGBX:
    209  case JCS_EXT_RGBA:
    210    avx2fct = jsimd_extrgbx_gray_convert_avx2;
    211    sse2fct = jsimd_extrgbx_gray_convert_sse2;
    212    break;
    213  case JCS_EXT_BGR:
    214    avx2fct = jsimd_extbgr_gray_convert_avx2;
    215    sse2fct = jsimd_extbgr_gray_convert_sse2;
    216    break;
    217  case JCS_EXT_BGRX:
    218  case JCS_EXT_BGRA:
    219    avx2fct = jsimd_extbgrx_gray_convert_avx2;
    220    sse2fct = jsimd_extbgrx_gray_convert_sse2;
    221    break;
    222  case JCS_EXT_XBGR:
    223  case JCS_EXT_ABGR:
    224    avx2fct = jsimd_extxbgr_gray_convert_avx2;
    225    sse2fct = jsimd_extxbgr_gray_convert_sse2;
    226    break;
    227  case JCS_EXT_XRGB:
    228  case JCS_EXT_ARGB:
    229    avx2fct = jsimd_extxrgb_gray_convert_avx2;
    230    sse2fct = jsimd_extxrgb_gray_convert_sse2;
    231    break;
    232  default:
    233    avx2fct = jsimd_rgb_gray_convert_avx2;
    234    sse2fct = jsimd_rgb_gray_convert_sse2;
    235    break;
    236  }
    237 
    238  if (simd_support & JSIMD_AVX2)
    239    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    240  else
    241    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    242 }
    243 
    244 GLOBAL(void)
    245 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    246                      JDIMENSION input_row, JSAMPARRAY output_buf,
    247                      int num_rows)
    248 {
    249  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    250  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    251 
    252  if (simd_support == ~0U)
    253    init_simd();
    254 
    255  switch (cinfo->out_color_space) {
    256  case JCS_EXT_RGB:
    257    avx2fct = jsimd_ycc_extrgb_convert_avx2;
    258    sse2fct = jsimd_ycc_extrgb_convert_sse2;
    259    break;
    260  case JCS_EXT_RGBX:
    261  case JCS_EXT_RGBA:
    262    avx2fct = jsimd_ycc_extrgbx_convert_avx2;
    263    sse2fct = jsimd_ycc_extrgbx_convert_sse2;
    264    break;
    265  case JCS_EXT_BGR:
    266    avx2fct = jsimd_ycc_extbgr_convert_avx2;
    267    sse2fct = jsimd_ycc_extbgr_convert_sse2;
    268    break;
    269  case JCS_EXT_BGRX:
    270  case JCS_EXT_BGRA:
    271    avx2fct = jsimd_ycc_extbgrx_convert_avx2;
    272    sse2fct = jsimd_ycc_extbgrx_convert_sse2;
    273    break;
    274  case JCS_EXT_XBGR:
    275  case JCS_EXT_ABGR:
    276    avx2fct = jsimd_ycc_extxbgr_convert_avx2;
    277    sse2fct = jsimd_ycc_extxbgr_convert_sse2;
    278    break;
    279  case JCS_EXT_XRGB:
    280  case JCS_EXT_ARGB:
    281    avx2fct = jsimd_ycc_extxrgb_convert_avx2;
    282    sse2fct = jsimd_ycc_extxrgb_convert_sse2;
    283    break;
    284  default:
    285    avx2fct = jsimd_ycc_rgb_convert_avx2;
    286    sse2fct = jsimd_ycc_rgb_convert_sse2;
    287    break;
    288  }
    289 
    290  if (simd_support & JSIMD_AVX2)
    291    avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
    292  else
    293    sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
    294 }
    295 
    296 GLOBAL(void)
    297 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    298                         JDIMENSION input_row, JSAMPARRAY output_buf,
    299                         int num_rows)
    300 {
    301 }
    302 
    303 GLOBAL(int)
    304 jsimd_can_h2v2_downsample(void)
    305 {
    306  init_simd();
    307 
    308  /* The code is optimised for these values only */
    309  if (BITS_IN_JSAMPLE != 8)
    310    return 0;
    311  if (sizeof(JDIMENSION) != 4)
    312    return 0;
    313 
    314  if (simd_support & JSIMD_AVX2)
    315    return 1;
    316  if (simd_support & JSIMD_SSE2)
    317    return 1;
    318 
    319  return 0;
    320 }
    321 
    322 GLOBAL(int)
    323 jsimd_can_h2v1_downsample(void)
    324 {
    325  init_simd();
    326 
    327  /* The code is optimised for these values only */
    328  if (BITS_IN_JSAMPLE != 8)
    329    return 0;
    330  if (sizeof(JDIMENSION) != 4)
    331    return 0;
    332 
    333  if (simd_support & JSIMD_AVX2)
    334    return 1;
    335  if (simd_support & JSIMD_SSE2)
    336    return 1;
    337 
    338  return 0;
    339 }
    340 
    341 GLOBAL(void)
    342 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
    343                      JSAMPARRAY input_data, JSAMPARRAY output_data)
    344 {
    345  if (simd_support == ~0U)
    346    init_simd();
    347 
    348  if (simd_support & JSIMD_AVX2)
    349    jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
    350                               compptr->v_samp_factor,
    351                               compptr->width_in_blocks, input_data,
    352                               output_data);
    353  else
    354    jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
    355                               compptr->v_samp_factor,
    356                               compptr->width_in_blocks, input_data,
    357                               output_data);
    358 }
    359 
    360 GLOBAL(void)
    361 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
    362                      JSAMPARRAY input_data, JSAMPARRAY output_data)
    363 {
    364  if (simd_support == ~0U)
    365    init_simd();
    366 
    367  if (simd_support & JSIMD_AVX2)
    368    jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
    369                               compptr->v_samp_factor,
    370                               compptr->width_in_blocks, input_data,
    371                               output_data);
    372  else
    373    jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
    374                               compptr->v_samp_factor,
    375                               compptr->width_in_blocks, input_data,
    376                               output_data);
    377 }
    378 
    379 GLOBAL(int)
    380 jsimd_can_h2v2_upsample(void)
    381 {
    382  init_simd();
    383 
    384  /* The code is optimised for these values only */
    385  if (BITS_IN_JSAMPLE != 8)
    386    return 0;
    387  if (sizeof(JDIMENSION) != 4)
    388    return 0;
    389 
    390  if (simd_support & JSIMD_AVX2)
    391    return 1;
    392  if (simd_support & JSIMD_SSE2)
    393    return 1;
    394 
    395  return 0;
    396 }
    397 
    398 GLOBAL(int)
    399 jsimd_can_h2v1_upsample(void)
    400 {
    401  init_simd();
    402 
    403  /* The code is optimised for these values only */
    404  if (BITS_IN_JSAMPLE != 8)
    405    return 0;
    406  if (sizeof(JDIMENSION) != 4)
    407    return 0;
    408 
    409  if (simd_support & JSIMD_AVX2)
    410    return 1;
    411  if (simd_support & JSIMD_SSE2)
    412    return 1;
    413 
    414  return 0;
    415 }
    416 
    417 GLOBAL(void)
    418 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    419                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
    420 {
    421  if (simd_support == ~0U)
    422    init_simd();
    423 
    424  if (simd_support & JSIMD_AVX2)
    425    jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
    426                             input_data, output_data_ptr);
    427  else
    428    jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
    429                             input_data, output_data_ptr);
    430 }
    431 
    432 GLOBAL(void)
    433 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    434                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
    435 {
    436  if (simd_support == ~0U)
    437    init_simd();
    438 
    439  if (simd_support & JSIMD_AVX2)
    440    jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
    441                             input_data, output_data_ptr);
    442  else
    443    jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
    444                             input_data, output_data_ptr);
    445 }
    446 
    447 GLOBAL(int)
    448 jsimd_can_h2v2_fancy_upsample(void)
    449 {
    450  init_simd();
    451 
    452  /* The code is optimised for these values only */
    453  if (BITS_IN_JSAMPLE != 8)
    454    return 0;
    455  if (sizeof(JDIMENSION) != 4)
    456    return 0;
    457 
    458  if ((simd_support & JSIMD_AVX2) &&
    459      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
    460    return 1;
    461  if ((simd_support & JSIMD_SSE2) &&
    462      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
    463    return 1;
    464 
    465  return 0;
    466 }
    467 
    468 GLOBAL(int)
    469 jsimd_can_h2v1_fancy_upsample(void)
    470 {
    471  init_simd();
    472 
    473  /* The code is optimised for these values only */
    474  if (BITS_IN_JSAMPLE != 8)
    475    return 0;
    476  if (sizeof(JDIMENSION) != 4)
    477    return 0;
    478 
    479  if ((simd_support & JSIMD_AVX2) &&
    480      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
    481    return 1;
    482  if ((simd_support & JSIMD_SSE2) &&
    483      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
    484    return 1;
    485 
    486  return 0;
    487 }
    488 
    489 GLOBAL(void)
    490 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    491                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
    492 {
    493  if (simd_support == ~0U)
    494    init_simd();
    495 
    496  if (simd_support & JSIMD_AVX2)
    497    jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
    498                                   compptr->downsampled_width, input_data,
    499                                   output_data_ptr);
    500  else
    501    jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
    502                                   compptr->downsampled_width, input_data,
    503                                   output_data_ptr);
    504 }
    505 
    506 GLOBAL(void)
    507 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    508                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
    509 {
    510  if (simd_support == ~0U)
    511    init_simd();
    512 
    513  if (simd_support & JSIMD_AVX2)
    514    jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
    515                                   compptr->downsampled_width, input_data,
    516                                   output_data_ptr);
    517  else
    518    jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
    519                                   compptr->downsampled_width, input_data,
    520                                   output_data_ptr);
    521 }
    522 
    523 GLOBAL(int)
    524 jsimd_can_h2v2_merged_upsample(void)
    525 {
    526  init_simd();
    527 
    528  /* The code is optimised for these values only */
    529  if (BITS_IN_JSAMPLE != 8)
    530    return 0;
    531  if (sizeof(JDIMENSION) != 4)
    532    return 0;
    533 
    534  if ((simd_support & JSIMD_AVX2) &&
    535      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
    536    return 1;
    537  if ((simd_support & JSIMD_SSE2) &&
    538      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
    539    return 1;
    540 
    541  return 0;
    542 }
    543 
    544 GLOBAL(int)
    545 jsimd_can_h2v1_merged_upsample(void)
    546 {
    547  init_simd();
    548 
    549  /* The code is optimised for these values only */
    550  if (BITS_IN_JSAMPLE != 8)
    551    return 0;
    552  if (sizeof(JDIMENSION) != 4)
    553    return 0;
    554 
    555  if ((simd_support & JSIMD_AVX2) &&
    556      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
    557    return 1;
    558  if ((simd_support & JSIMD_SSE2) &&
    559      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
    560    return 1;
    561 
    562  return 0;
    563 }
    564 
    565 GLOBAL(void)
    566 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    567                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
    568 {
    569  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    570  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    571 
    572  if (simd_support == ~0U)
    573    init_simd();
    574 
    575  switch (cinfo->out_color_space) {
    576  case JCS_EXT_RGB:
    577    avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
    578    sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
    579    break;
    580  case JCS_EXT_RGBX:
    581  case JCS_EXT_RGBA:
    582    avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
    583    sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
    584    break;
    585  case JCS_EXT_BGR:
    586    avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
    587    sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
    588    break;
    589  case JCS_EXT_BGRX:
    590  case JCS_EXT_BGRA:
    591    avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
    592    sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
    593    break;
    594  case JCS_EXT_XBGR:
    595  case JCS_EXT_ABGR:
    596    avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
    597    sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
    598    break;
    599  case JCS_EXT_XRGB:
    600  case JCS_EXT_ARGB:
    601    avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
    602    sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
    603    break;
    604  default:
    605    avx2fct = jsimd_h2v2_merged_upsample_avx2;
    606    sse2fct = jsimd_h2v2_merged_upsample_sse2;
    607    break;
    608  }
    609 
    610  if (simd_support & JSIMD_AVX2)
    611    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    612  else
    613    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    614 }
    615 
    616 GLOBAL(void)
    617 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    618                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
    619 {
    620  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    621  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    622 
    623  if (simd_support == ~0U)
    624    init_simd();
    625 
    626  switch (cinfo->out_color_space) {
    627  case JCS_EXT_RGB:
    628    avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
    629    sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
    630    break;
    631  case JCS_EXT_RGBX:
    632  case JCS_EXT_RGBA:
    633    avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
    634    sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
    635    break;
    636  case JCS_EXT_BGR:
    637    avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
    638    sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
    639    break;
    640  case JCS_EXT_BGRX:
    641  case JCS_EXT_BGRA:
    642    avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
    643    sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
    644    break;
    645  case JCS_EXT_XBGR:
    646  case JCS_EXT_ABGR:
    647    avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
    648    sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
    649    break;
    650  case JCS_EXT_XRGB:
    651  case JCS_EXT_ARGB:
    652    avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
    653    sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
    654    break;
    655  default:
    656    avx2fct = jsimd_h2v1_merged_upsample_avx2;
    657    sse2fct = jsimd_h2v1_merged_upsample_sse2;
    658    break;
    659  }
    660 
    661  if (simd_support & JSIMD_AVX2)
    662    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    663  else
    664    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    665 }
    666 
    667 GLOBAL(int)
    668 jsimd_can_convsamp(void)
    669 {
    670  init_simd();
    671 
    672  /* The code is optimised for these values only */
    673  if (DCTSIZE != 8)
    674    return 0;
    675  if (BITS_IN_JSAMPLE != 8)
    676    return 0;
    677  if (sizeof(JDIMENSION) != 4)
    678    return 0;
    679  if (sizeof(DCTELEM) != 2)
    680    return 0;
    681 
    682  if (simd_support & JSIMD_AVX2)
    683    return 1;
    684  if (simd_support & JSIMD_SSE2)
    685    return 1;
    686 
    687  return 0;
    688 }
    689 
    690 GLOBAL(int)
    691 jsimd_can_convsamp_float(void)
    692 {
    693  init_simd();
    694 
    695  /* The code is optimised for these values only */
    696  if (DCTSIZE != 8)
    697    return 0;
    698  if (BITS_IN_JSAMPLE != 8)
    699    return 0;
    700  if (sizeof(JDIMENSION) != 4)
    701    return 0;
    702  if (sizeof(FAST_FLOAT) != 4)
    703    return 0;
    704 
    705  if (simd_support & JSIMD_SSE2)
    706    return 1;
    707 
    708  return 0;
    709 }
    710 
    711 GLOBAL(void)
    712 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
    713               DCTELEM *workspace)
    714 {
    715  if (simd_support == ~0U)
    716    init_simd();
    717 
    718  if (simd_support & JSIMD_AVX2)
    719    jsimd_convsamp_avx2(sample_data, start_col, workspace);
    720  else
    721    jsimd_convsamp_sse2(sample_data, start_col, workspace);
    722 }
    723 
    724 GLOBAL(void)
    725 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
    726                     FAST_FLOAT *workspace)
    727 {
    728  jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
    729 }
    730 
    731 GLOBAL(int)
    732 jsimd_can_fdct_islow(void)
    733 {
    734  init_simd();
    735 
    736  /* The code is optimised for these values only */
    737  if (DCTSIZE != 8)
    738    return 0;
    739  if (sizeof(DCTELEM) != 2)
    740    return 0;
    741 
    742  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
    743    return 1;
    744  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
    745    return 1;
    746 
    747  return 0;
    748 }
    749 
    750 GLOBAL(int)
    751 jsimd_can_fdct_ifast(void)
    752 {
    753  init_simd();
    754 
    755  /* The code is optimised for these values only */
    756  if (DCTSIZE != 8)
    757    return 0;
    758  if (sizeof(DCTELEM) != 2)
    759    return 0;
    760 
    761  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
    762    return 1;
    763 
    764  return 0;
    765 }
    766 
    767 GLOBAL(int)
    768 jsimd_can_fdct_float(void)
    769 {
    770  init_simd();
    771 
    772  /* The code is optimised for these values only */
    773  if (DCTSIZE != 8)
    774    return 0;
    775  if (sizeof(FAST_FLOAT) != 4)
    776    return 0;
    777 
    778  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
    779    return 1;
    780 
    781  return 0;
    782 }
    783 
    784 GLOBAL(void)
    785 jsimd_fdct_islow(DCTELEM *data)
    786 {
    787  if (simd_support == ~0U)
    788    init_simd();
    789 
    790  if (simd_support & JSIMD_AVX2)
    791    jsimd_fdct_islow_avx2(data);
    792  else
    793    jsimd_fdct_islow_sse2(data);
    794 }
    795 
    796 GLOBAL(void)
    797 jsimd_fdct_ifast(DCTELEM *data)
    798 {
    799  jsimd_fdct_ifast_sse2(data);
    800 }
    801 
    802 GLOBAL(void)
    803 jsimd_fdct_float(FAST_FLOAT *data)
    804 {
    805  jsimd_fdct_float_sse(data);
    806 }
    807 
    808 GLOBAL(int)
    809 jsimd_can_quantize(void)
    810 {
    811  init_simd();
    812 
    813  /* The code is optimised for these values only */
    814  if (DCTSIZE != 8)
    815    return 0;
    816  if (sizeof(JCOEF) != 2)
    817    return 0;
    818  if (sizeof(DCTELEM) != 2)
    819    return 0;
    820 
    821  if (simd_support & JSIMD_AVX2)
    822    return 1;
    823  if (simd_support & JSIMD_SSE2)
    824    return 1;
    825 
    826  return 0;
    827 }
    828 
    829 GLOBAL(int)
    830 jsimd_can_quantize_float(void)
    831 {
    832  init_simd();
    833 
    834  /* The code is optimised for these values only */
    835  if (DCTSIZE != 8)
    836    return 0;
    837  if (sizeof(JCOEF) != 2)
    838    return 0;
    839  if (sizeof(FAST_FLOAT) != 4)
    840    return 0;
    841 
    842  if (simd_support & JSIMD_SSE2)
    843    return 1;
    844 
    845  return 0;
    846 }
    847 
    848 GLOBAL(void)
    849 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
    850 {
    851  if (simd_support == ~0U)
    852    init_simd();
    853 
    854  if (simd_support & JSIMD_AVX2)
    855    jsimd_quantize_avx2(coef_block, divisors, workspace);
    856  else
    857    jsimd_quantize_sse2(coef_block, divisors, workspace);
    858 }
    859 
    860 GLOBAL(void)
    861 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
    862                     FAST_FLOAT *workspace)
    863 {
    864  jsimd_quantize_float_sse2(coef_block, divisors, workspace);
    865 }
    866 
    867 GLOBAL(int)
    868 jsimd_can_idct_2x2(void)
    869 {
    870  init_simd();
    871 
    872  /* The code is optimised for these values only */
    873  if (DCTSIZE != 8)
    874    return 0;
    875  if (sizeof(JCOEF) != 2)
    876    return 0;
    877  if (BITS_IN_JSAMPLE != 8)
    878    return 0;
    879  if (sizeof(JDIMENSION) != 4)
    880    return 0;
    881  if (sizeof(ISLOW_MULT_TYPE) != 2)
    882    return 0;
    883 
    884  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
    885    return 1;
    886 
    887  return 0;
    888 }
    889 
    890 GLOBAL(int)
    891 jsimd_can_idct_4x4(void)
    892 {
    893  init_simd();
    894 
    895  /* The code is optimised for these values only */
    896  if (DCTSIZE != 8)
    897    return 0;
    898  if (sizeof(JCOEF) != 2)
    899    return 0;
    900  if (BITS_IN_JSAMPLE != 8)
    901    return 0;
    902  if (sizeof(JDIMENSION) != 4)
    903    return 0;
    904  if (sizeof(ISLOW_MULT_TYPE) != 2)
    905    return 0;
    906 
    907  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
    908    return 1;
    909 
    910  return 0;
    911 }
    912 
    913 GLOBAL(void)
    914 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    915               JCOEFPTR coef_block, JSAMPARRAY output_buf,
    916               JDIMENSION output_col)
    917 {
    918  jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
    919 }
    920 
    921 GLOBAL(void)
    922 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    923               JCOEFPTR coef_block, JSAMPARRAY output_buf,
    924               JDIMENSION output_col)
    925 {
    926  jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
    927 }
    928 
    929 GLOBAL(int)
    930 jsimd_can_idct_islow(void)
    931 {
    932  init_simd();
    933 
    934  /* The code is optimised for these values only */
    935  if (DCTSIZE != 8)
    936    return 0;
    937  if (sizeof(JCOEF) != 2)
    938    return 0;
    939  if (BITS_IN_JSAMPLE != 8)
    940    return 0;
    941  if (sizeof(JDIMENSION) != 4)
    942    return 0;
    943  if (sizeof(ISLOW_MULT_TYPE) != 2)
    944    return 0;
    945 
    946  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
    947    return 1;
    948  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
    949    return 1;
    950 
    951  return 0;
    952 }
    953 
    954 GLOBAL(int)
    955 jsimd_can_idct_ifast(void)
    956 {
    957  init_simd();
    958 
    959  /* The code is optimised for these values only */
    960  if (DCTSIZE != 8)
    961    return 0;
    962  if (sizeof(JCOEF) != 2)
    963    return 0;
    964  if (BITS_IN_JSAMPLE != 8)
    965    return 0;
    966  if (sizeof(JDIMENSION) != 4)
    967    return 0;
    968  if (sizeof(IFAST_MULT_TYPE) != 2)
    969    return 0;
    970  if (IFAST_SCALE_BITS != 2)
    971    return 0;
    972 
    973  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
    974    return 1;
    975 
    976  return 0;
    977 }
    978 
    979 GLOBAL(int)
    980 jsimd_can_idct_float(void)
    981 {
    982  init_simd();
    983 
    984  if (DCTSIZE != 8)
    985    return 0;
    986  if (sizeof(JCOEF) != 2)
    987    return 0;
    988  if (BITS_IN_JSAMPLE != 8)
    989    return 0;
    990  if (sizeof(JDIMENSION) != 4)
    991    return 0;
    992  if (sizeof(FAST_FLOAT) != 4)
    993    return 0;
    994  if (sizeof(FLOAT_MULT_TYPE) != 4)
    995    return 0;
    996 
    997  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
    998    return 1;
    999 
   1000  return 0;
   1001 }
   1002 
   1003 GLOBAL(void)
   1004 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   1005                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1006                 JDIMENSION output_col)
   1007 {
   1008  if (simd_support == ~0U)
   1009    init_simd();
   1010 
   1011  if (simd_support & JSIMD_AVX2)
   1012    jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
   1013                          output_col);
   1014  else
   1015    jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
   1016                          output_col);
   1017 }
   1018 
   1019 GLOBAL(void)
   1020 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   1021                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1022                 JDIMENSION output_col)
   1023 {
   1024  jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
   1025                        output_col);
   1026 }
   1027 
   1028 GLOBAL(void)
   1029 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   1030                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1031                 JDIMENSION output_col)
   1032 {
   1033  jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
   1034                        output_col);
   1035 }
   1036 
   1037 GLOBAL(int)
   1038 jsimd_can_huff_encode_one_block(void)
   1039 {
   1040  init_simd();
   1041 
   1042  if (DCTSIZE != 8)
   1043    return 0;
   1044  if (sizeof(JCOEF) != 2)
   1045    return 0;
   1046 
   1047  if ((simd_support & JSIMD_SSE2) && simd_huffman &&
   1048      IS_ALIGNED_SSE(jconst_huff_encode_one_block))
   1049    return 1;
   1050 
   1051  return 0;
   1052 }
   1053 
   1054 GLOBAL(JOCTET *)
   1055 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
   1056                            int last_dc_val, c_derived_tbl *dctbl,
   1057                            c_derived_tbl *actbl)
   1058 {
   1059  return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
   1060                                          dctbl, actbl);
   1061 }
   1062 
   1063 GLOBAL(int)
   1064 jsimd_can_encode_mcu_AC_first_prepare(void)
   1065 {
   1066  init_simd();
   1067 
   1068  if (DCTSIZE != 8)
   1069    return 0;
   1070  if (sizeof(JCOEF) != 2)
   1071    return 0;
   1072  if (simd_support & JSIMD_SSE2)
   1073    return 1;
   1074 
   1075  return 0;
   1076 }
   1077 
   1078 GLOBAL(void)
   1079 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
   1080                                  const int *jpeg_natural_order_start, int Sl,
   1081                                  int Al, UJCOEF *values, size_t *zerobits)
   1082 {
   1083  jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
   1084                                         Sl, Al, values, zerobits);
   1085 }
   1086 
   1087 GLOBAL(int)
   1088 jsimd_can_encode_mcu_AC_refine_prepare(void)
   1089 {
   1090  init_simd();
   1091 
   1092  if (DCTSIZE != 8)
   1093    return 0;
   1094  if (sizeof(JCOEF) != 2)
   1095    return 0;
   1096  if (simd_support & JSIMD_SSE2)
   1097    return 1;
   1098 
   1099  return 0;
   1100 }
   1101 
   1102 GLOBAL(int)
   1103 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
   1104                                   const int *jpeg_natural_order_start, int Sl,
   1105                                   int Al, UJCOEF *absvalues, size_t *bits)
   1106 {
   1107  return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
   1108                                                 jpeg_natural_order_start,
   1109                                                 Sl, Al, absvalues, bits);
   1110 }