tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

jsimd.c (35555B)


      1 /*
      2 * jsimd_i386.c
      3 *
      4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
      5 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2023, D. R. Commander.
      6 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
      7 *
      8 * Based on the x86 SIMD extension for IJG JPEG library,
      9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
     10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
     11 *
     12 * This file contains the interface between the "normal" portions
     13 * of the library and the SIMD implementations when running on a
     14 * 32-bit x86 architecture.
     15 */
     16 
     17 #define JPEG_INTERNALS
     18 #include "../../jinclude.h"
     19 #include "../../jpeglib.h"
     20 #include "../../jsimd.h"
     21 #include "../../jdct.h"
     22 #include "../../jsimddct.h"
     23 #include "../jsimd.h"
     24 
     25 /*
     26 * In the PIC cases, we have no guarantee that constants will keep
     27 * their alignment. This macro allows us to verify it at runtime.
     28 */
     29 #define IS_ALIGNED(ptr, order)  (((unsigned)ptr & ((1 << order) - 1)) == 0)
     30 
     31 #define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
     32 #define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
     33 
     34 static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
     35 static THREAD_LOCAL unsigned int simd_huffman = 1;
     36 
     37 /*
     38 * Check what SIMD accelerations are supported.
     39 */
     40 LOCAL(void)
     41 init_simd(void)
     42 {
     43 #ifndef NO_GETENV
     44  char env[2] = { 0 };
     45 #endif
     46 
     47  if (simd_support != ~0U)
     48    return;
     49 
     50  simd_support = jpeg_simd_cpu_support();
     51 
     52 #ifndef NO_GETENV
     53  /* Force different settings through environment variables */
     54  if (!GETENV_S(env, 2, "JSIMD_FORCEMMX") && !strcmp(env, "1"))
     55    simd_support &= JSIMD_MMX;
     56  if (!GETENV_S(env, 2, "JSIMD_FORCE3DNOW") && !strcmp(env, "1"))
     57    simd_support &= JSIMD_3DNOW | JSIMD_MMX;
     58  if (!GETENV_S(env, 2, "JSIMD_FORCESSE") && !strcmp(env, "1"))
     59    simd_support &= JSIMD_SSE | JSIMD_MMX;
     60  if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
     61    simd_support &= JSIMD_SSE2;
     62  if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
     63    simd_support &= JSIMD_AVX2;
     64  if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
     65    simd_support = 0;
     66  if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
     67    simd_huffman = 0;
     68 #endif
     69 }
     70 
     71 GLOBAL(int)
     72 jsimd_can_rgb_ycc(void)
     73 {
     74  init_simd();
     75 
     76  /* The code is optimised for these values only */
     77  if (BITS_IN_JSAMPLE != 8)
     78    return 0;
     79  if (sizeof(JDIMENSION) != 4)
     80    return 0;
     81  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
     82    return 0;
     83 
     84  if ((simd_support & JSIMD_AVX2) &&
     85      IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
     86    return 1;
     87  if ((simd_support & JSIMD_SSE2) &&
     88      IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
     89    return 1;
     90  if (simd_support & JSIMD_MMX)
     91    return 1;
     92 
     93  return 0;
     94 }
     95 
     96 GLOBAL(int)
     97 jsimd_can_rgb_gray(void)
     98 {
     99  init_simd();
    100 
    101  /* The code is optimised for these values only */
    102  if (BITS_IN_JSAMPLE != 8)
    103    return 0;
    104  if (sizeof(JDIMENSION) != 4)
    105    return 0;
    106  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    107    return 0;
    108 
    109  if ((simd_support & JSIMD_AVX2) &&
    110      IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
    111    return 1;
    112  if ((simd_support & JSIMD_SSE2) &&
    113      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
    114    return 1;
    115  if (simd_support & JSIMD_MMX)
    116    return 1;
    117 
    118  return 0;
    119 }
    120 
    121 GLOBAL(int)
    122 jsimd_can_ycc_rgb(void)
    123 {
    124  init_simd();
    125 
    126  /* The code is optimised for these values only */
    127  if (BITS_IN_JSAMPLE != 8)
    128    return 0;
    129  if (sizeof(JDIMENSION) != 4)
    130    return 0;
    131  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    132    return 0;
    133 
    134  if ((simd_support & JSIMD_AVX2) &&
    135      IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
    136    return 1;
    137  if ((simd_support & JSIMD_SSE2) &&
    138      IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
    139    return 1;
    140  if (simd_support & JSIMD_MMX)
    141    return 1;
    142 
    143  return 0;
    144 }
    145 
    146 GLOBAL(int)
    147 jsimd_can_ycc_rgb565(void)
    148 {
    149  return 0;
    150 }
    151 
    152 GLOBAL(void)
    153 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
    154                      JSAMPIMAGE output_buf, JDIMENSION output_row,
    155                      int num_rows)
    156 {
    157  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    158  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    159  void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    160 
    161  if (simd_support == ~0U)
    162    init_simd();
    163 
    164  switch (cinfo->in_color_space) {
    165  case JCS_EXT_RGB:
    166    avx2fct = jsimd_extrgb_ycc_convert_avx2;
    167    sse2fct = jsimd_extrgb_ycc_convert_sse2;
    168    mmxfct = jsimd_extrgb_ycc_convert_mmx;
    169    break;
    170  case JCS_EXT_RGBX:
    171  case JCS_EXT_RGBA:
    172    avx2fct = jsimd_extrgbx_ycc_convert_avx2;
    173    sse2fct = jsimd_extrgbx_ycc_convert_sse2;
    174    mmxfct = jsimd_extrgbx_ycc_convert_mmx;
    175    break;
    176  case JCS_EXT_BGR:
    177    avx2fct = jsimd_extbgr_ycc_convert_avx2;
    178    sse2fct = jsimd_extbgr_ycc_convert_sse2;
    179    mmxfct = jsimd_extbgr_ycc_convert_mmx;
    180    break;
    181  case JCS_EXT_BGRX:
    182  case JCS_EXT_BGRA:
    183    avx2fct = jsimd_extbgrx_ycc_convert_avx2;
    184    sse2fct = jsimd_extbgrx_ycc_convert_sse2;
    185    mmxfct = jsimd_extbgrx_ycc_convert_mmx;
    186    break;
    187  case JCS_EXT_XBGR:
    188  case JCS_EXT_ABGR:
    189    avx2fct = jsimd_extxbgr_ycc_convert_avx2;
    190    sse2fct = jsimd_extxbgr_ycc_convert_sse2;
    191    mmxfct = jsimd_extxbgr_ycc_convert_mmx;
    192    break;
    193  case JCS_EXT_XRGB:
    194  case JCS_EXT_ARGB:
    195    avx2fct = jsimd_extxrgb_ycc_convert_avx2;
    196    sse2fct = jsimd_extxrgb_ycc_convert_sse2;
    197    mmxfct = jsimd_extxrgb_ycc_convert_mmx;
    198    break;
    199  default:
    200    avx2fct = jsimd_rgb_ycc_convert_avx2;
    201    sse2fct = jsimd_rgb_ycc_convert_sse2;
    202    mmxfct = jsimd_rgb_ycc_convert_mmx;
    203    break;
    204  }
    205 
    206  if (simd_support & JSIMD_AVX2)
    207    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    208  else if (simd_support & JSIMD_SSE2)
    209    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    210  else
    211    mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    212 }
    213 
    214 GLOBAL(void)
    215 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
    216                       JSAMPIMAGE output_buf, JDIMENSION output_row,
    217                       int num_rows)
    218 {
    219  void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    220  void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    221  void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    222 
    223  if (simd_support == ~0U)
    224    init_simd();
    225 
    226  switch (cinfo->in_color_space) {
    227  case JCS_EXT_RGB:
    228    avx2fct = jsimd_extrgb_gray_convert_avx2;
    229    sse2fct = jsimd_extrgb_gray_convert_sse2;
    230    mmxfct = jsimd_extrgb_gray_convert_mmx;
    231    break;
    232  case JCS_EXT_RGBX:
    233  case JCS_EXT_RGBA:
    234    avx2fct = jsimd_extrgbx_gray_convert_avx2;
    235    sse2fct = jsimd_extrgbx_gray_convert_sse2;
    236    mmxfct = jsimd_extrgbx_gray_convert_mmx;
    237    break;
    238  case JCS_EXT_BGR:
    239    avx2fct = jsimd_extbgr_gray_convert_avx2;
    240    sse2fct = jsimd_extbgr_gray_convert_sse2;
    241    mmxfct = jsimd_extbgr_gray_convert_mmx;
    242    break;
    243  case JCS_EXT_BGRX:
    244  case JCS_EXT_BGRA:
    245    avx2fct = jsimd_extbgrx_gray_convert_avx2;
    246    sse2fct = jsimd_extbgrx_gray_convert_sse2;
    247    mmxfct = jsimd_extbgrx_gray_convert_mmx;
    248    break;
    249  case JCS_EXT_XBGR:
    250  case JCS_EXT_ABGR:
    251    avx2fct = jsimd_extxbgr_gray_convert_avx2;
    252    sse2fct = jsimd_extxbgr_gray_convert_sse2;
    253    mmxfct = jsimd_extxbgr_gray_convert_mmx;
    254    break;
    255  case JCS_EXT_XRGB:
    256  case JCS_EXT_ARGB:
    257    avx2fct = jsimd_extxrgb_gray_convert_avx2;
    258    sse2fct = jsimd_extxrgb_gray_convert_sse2;
    259    mmxfct = jsimd_extxrgb_gray_convert_mmx;
    260    break;
    261  default:
    262    avx2fct = jsimd_rgb_gray_convert_avx2;
    263    sse2fct = jsimd_rgb_gray_convert_sse2;
    264    mmxfct = jsimd_rgb_gray_convert_mmx;
    265    break;
    266  }
    267 
    268  if (simd_support & JSIMD_AVX2)
    269    avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    270  else if (simd_support & JSIMD_SSE2)
    271    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    272  else
    273    mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    274 }
    275 
    276 GLOBAL(void)
    277 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    278                      JDIMENSION input_row, JSAMPARRAY output_buf,
    279                      int num_rows)
    280 {
    281  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    282  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    283  void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    284 
    285  if (simd_support == ~0U)
    286    init_simd();
    287 
    288  switch (cinfo->out_color_space) {
    289  case JCS_EXT_RGB:
    290    avx2fct = jsimd_ycc_extrgb_convert_avx2;
    291    sse2fct = jsimd_ycc_extrgb_convert_sse2;
    292    mmxfct = jsimd_ycc_extrgb_convert_mmx;
    293    break;
    294  case JCS_EXT_RGBX:
    295  case JCS_EXT_RGBA:
    296    avx2fct = jsimd_ycc_extrgbx_convert_avx2;
    297    sse2fct = jsimd_ycc_extrgbx_convert_sse2;
    298    mmxfct = jsimd_ycc_extrgbx_convert_mmx;
    299    break;
    300  case JCS_EXT_BGR:
    301    avx2fct = jsimd_ycc_extbgr_convert_avx2;
    302    sse2fct = jsimd_ycc_extbgr_convert_sse2;
    303    mmxfct = jsimd_ycc_extbgr_convert_mmx;
    304    break;
    305  case JCS_EXT_BGRX:
    306  case JCS_EXT_BGRA:
    307    avx2fct = jsimd_ycc_extbgrx_convert_avx2;
    308    sse2fct = jsimd_ycc_extbgrx_convert_sse2;
    309    mmxfct = jsimd_ycc_extbgrx_convert_mmx;
    310    break;
    311  case JCS_EXT_XBGR:
    312  case JCS_EXT_ABGR:
    313    avx2fct = jsimd_ycc_extxbgr_convert_avx2;
    314    sse2fct = jsimd_ycc_extxbgr_convert_sse2;
    315    mmxfct = jsimd_ycc_extxbgr_convert_mmx;
    316    break;
    317  case JCS_EXT_XRGB:
    318  case JCS_EXT_ARGB:
    319    avx2fct = jsimd_ycc_extxrgb_convert_avx2;
    320    sse2fct = jsimd_ycc_extxrgb_convert_sse2;
    321    mmxfct = jsimd_ycc_extxrgb_convert_mmx;
    322    break;
    323  default:
    324    avx2fct = jsimd_ycc_rgb_convert_avx2;
    325    sse2fct = jsimd_ycc_rgb_convert_sse2;
    326    mmxfct = jsimd_ycc_rgb_convert_mmx;
    327    break;
    328  }
    329 
    330  if (simd_support & JSIMD_AVX2)
    331    avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
    332  else if (simd_support & JSIMD_SSE2)
    333    sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
    334  else
    335    mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
    336 }
    337 
    338 GLOBAL(void)
    339 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    340                         JDIMENSION input_row, JSAMPARRAY output_buf,
    341                         int num_rows)
    342 {
    343 }
    344 
    345 GLOBAL(int)
    346 jsimd_can_h2v2_downsample(void)
    347 {
    348  init_simd();
    349 
    350  /* The code is optimised for these values only */
    351  if (BITS_IN_JSAMPLE != 8)
    352    return 0;
    353  if (sizeof(JDIMENSION) != 4)
    354    return 0;
    355 
    356  if (simd_support & JSIMD_AVX2)
    357    return 1;
    358  if (simd_support & JSIMD_SSE2)
    359    return 1;
    360  if (simd_support & JSIMD_MMX)
    361    return 1;
    362 
    363  return 0;
    364 }
    365 
    366 GLOBAL(int)
    367 jsimd_can_h2v1_downsample(void)
    368 {
    369  init_simd();
    370 
    371  /* The code is optimised for these values only */
    372  if (BITS_IN_JSAMPLE != 8)
    373    return 0;
    374  if (sizeof(JDIMENSION) != 4)
    375    return 0;
    376 
    377  if (simd_support & JSIMD_AVX2)
    378    return 1;
    379  if (simd_support & JSIMD_SSE2)
    380    return 1;
    381  if (simd_support & JSIMD_MMX)
    382    return 1;
    383 
    384  return 0;
    385 }
    386 
    387 GLOBAL(void)
    388 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
    389                      JSAMPARRAY input_data, JSAMPARRAY output_data)
    390 {
    391  if (simd_support == ~0U)
    392    init_simd();
    393 
    394  if (simd_support & JSIMD_AVX2)
    395    jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
    396                               compptr->v_samp_factor,
    397                               compptr->width_in_blocks, input_data,
    398                               output_data);
    399  else if (simd_support & JSIMD_SSE2)
    400    jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
    401                               compptr->v_samp_factor,
    402                               compptr->width_in_blocks, input_data,
    403                               output_data);
    404  else
    405    jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
    406                              compptr->v_samp_factor, compptr->width_in_blocks,
    407                              input_data, output_data);
    408 }
    409 
    410 GLOBAL(void)
    411 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
    412                      JSAMPARRAY input_data, JSAMPARRAY output_data)
    413 {
    414  if (simd_support == ~0U)
    415    init_simd();
    416 
    417  if (simd_support & JSIMD_AVX2)
    418    jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
    419                               compptr->v_samp_factor,
    420                               compptr->width_in_blocks, input_data,
    421                               output_data);
    422  else if (simd_support & JSIMD_SSE2)
    423    jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
    424                               compptr->v_samp_factor,
    425                               compptr->width_in_blocks, input_data,
    426                               output_data);
    427  else
    428    jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
    429                              compptr->v_samp_factor, compptr->width_in_blocks,
    430                              input_data, output_data);
    431 }
    432 
    433 GLOBAL(int)
    434 jsimd_can_h2v2_upsample(void)
    435 {
    436  init_simd();
    437 
    438  /* The code is optimised for these values only */
    439  if (BITS_IN_JSAMPLE != 8)
    440    return 0;
    441  if (sizeof(JDIMENSION) != 4)
    442    return 0;
    443 
    444  if (simd_support & JSIMD_AVX2)
    445    return 1;
    446  if (simd_support & JSIMD_SSE2)
    447    return 1;
    448  if (simd_support & JSIMD_MMX)
    449    return 1;
    450 
    451  return 0;
    452 }
    453 
    454 GLOBAL(int)
    455 jsimd_can_h2v1_upsample(void)
    456 {
    457  init_simd();
    458 
    459  /* The code is optimised for these values only */
    460  if (BITS_IN_JSAMPLE != 8)
    461    return 0;
    462  if (sizeof(JDIMENSION) != 4)
    463    return 0;
    464 
    465  if (simd_support & JSIMD_AVX2)
    466    return 1;
    467  if (simd_support & JSIMD_SSE2)
    468    return 1;
    469  if (simd_support & JSIMD_MMX)
    470    return 1;
    471 
    472  return 0;
    473 }
    474 
    475 GLOBAL(void)
    476 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    477                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
    478 {
    479  if (simd_support == ~0U)
    480    init_simd();
    481 
    482  if (simd_support & JSIMD_AVX2)
    483    jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
    484                             input_data, output_data_ptr);
    485  else if (simd_support & JSIMD_SSE2)
    486    jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
    487                             input_data, output_data_ptr);
    488  else
    489    jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
    490                            input_data, output_data_ptr);
    491 }
    492 
    493 GLOBAL(void)
    494 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    495                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
    496 {
    497  if (simd_support == ~0U)
    498    init_simd();
    499 
    500  if (simd_support & JSIMD_AVX2)
    501    jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
    502                             input_data, output_data_ptr);
    503  else if (simd_support & JSIMD_SSE2)
    504    jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
    505                             input_data, output_data_ptr);
    506  else
    507    jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
    508                            input_data, output_data_ptr);
    509 }
    510 
    511 GLOBAL(int)
    512 jsimd_can_h2v2_fancy_upsample(void)
    513 {
    514  init_simd();
    515 
    516  /* The code is optimised for these values only */
    517  if (BITS_IN_JSAMPLE != 8)
    518    return 0;
    519  if (sizeof(JDIMENSION) != 4)
    520    return 0;
    521 
    522  if ((simd_support & JSIMD_AVX2) &&
    523      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
    524    return 1;
    525  if ((simd_support & JSIMD_SSE2) &&
    526      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
    527    return 1;
    528  if (simd_support & JSIMD_MMX)
    529    return 1;
    530 
    531  return 0;
    532 }
    533 
    534 GLOBAL(int)
    535 jsimd_can_h2v1_fancy_upsample(void)
    536 {
    537  init_simd();
    538 
    539  /* The code is optimised for these values only */
    540  if (BITS_IN_JSAMPLE != 8)
    541    return 0;
    542  if (sizeof(JDIMENSION) != 4)
    543    return 0;
    544 
    545  if ((simd_support & JSIMD_AVX2) &&
    546      IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
    547    return 1;
    548  if ((simd_support & JSIMD_SSE2) &&
    549      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
    550    return 1;
    551  if (simd_support & JSIMD_MMX)
    552    return 1;
    553 
    554  return 0;
    555 }
    556 
    557 GLOBAL(void)
    558 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    559                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
    560 {
    561  if (simd_support == ~0U)
    562    init_simd();
    563 
    564  if (simd_support & JSIMD_AVX2)
    565    jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
    566                                   compptr->downsampled_width, input_data,
    567                                   output_data_ptr);
    568  else if (simd_support & JSIMD_SSE2)
    569    jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
    570                                   compptr->downsampled_width, input_data,
    571                                   output_data_ptr);
    572  else
    573    jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
    574                                  compptr->downsampled_width, input_data,
    575                                  output_data_ptr);
    576 }
    577 
    578 GLOBAL(void)
    579 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    580                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
    581 {
    582  if (simd_support == ~0U)
    583    init_simd();
    584 
    585  if (simd_support & JSIMD_AVX2)
    586    jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
    587                                   compptr->downsampled_width, input_data,
    588                                   output_data_ptr);
    589  else if (simd_support & JSIMD_SSE2)
    590    jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
    591                                   compptr->downsampled_width, input_data,
    592                                   output_data_ptr);
    593  else
    594    jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
    595                                  compptr->downsampled_width, input_data,
    596                                  output_data_ptr);
    597 }
    598 
    599 GLOBAL(int)
    600 jsimd_can_h2v2_merged_upsample(void)
    601 {
    602  init_simd();
    603 
    604  /* The code is optimised for these values only */
    605  if (BITS_IN_JSAMPLE != 8)
    606    return 0;
    607  if (sizeof(JDIMENSION) != 4)
    608    return 0;
    609 
    610  if ((simd_support & JSIMD_AVX2) &&
    611      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
    612    return 1;
    613  if ((simd_support & JSIMD_SSE2) &&
    614      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
    615    return 1;
    616  if (simd_support & JSIMD_MMX)
    617    return 1;
    618 
    619  return 0;
    620 }
    621 
    622 GLOBAL(int)
    623 jsimd_can_h2v1_merged_upsample(void)
    624 {
    625  init_simd();
    626 
    627  /* The code is optimised for these values only */
    628  if (BITS_IN_JSAMPLE != 8)
    629    return 0;
    630  if (sizeof(JDIMENSION) != 4)
    631    return 0;
    632 
    633  if ((simd_support & JSIMD_AVX2) &&
    634      IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
    635    return 1;
    636  if ((simd_support & JSIMD_SSE2) &&
    637      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
    638    return 1;
    639  if (simd_support & JSIMD_MMX)
    640    return 1;
    641 
    642  return 0;
    643 }
    644 
    645 GLOBAL(void)
    646 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    647                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
    648 {
    649  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    650  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    651  void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    652 
    653  if (simd_support == ~0U)
    654    init_simd();
    655 
    656  switch (cinfo->out_color_space) {
    657  case JCS_EXT_RGB:
    658    avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
    659    sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
    660    mmxfct = jsimd_h2v2_extrgb_merged_upsample_mmx;
    661    break;
    662  case JCS_EXT_RGBX:
    663  case JCS_EXT_RGBA:
    664    avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
    665    sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
    666    mmxfct = jsimd_h2v2_extrgbx_merged_upsample_mmx;
    667    break;
    668  case JCS_EXT_BGR:
    669    avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
    670    sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
    671    mmxfct = jsimd_h2v2_extbgr_merged_upsample_mmx;
    672    break;
    673  case JCS_EXT_BGRX:
    674  case JCS_EXT_BGRA:
    675    avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
    676    sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
    677    mmxfct = jsimd_h2v2_extbgrx_merged_upsample_mmx;
    678    break;
    679  case JCS_EXT_XBGR:
    680  case JCS_EXT_ABGR:
    681    avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
    682    sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
    683    mmxfct = jsimd_h2v2_extxbgr_merged_upsample_mmx;
    684    break;
    685  case JCS_EXT_XRGB:
    686  case JCS_EXT_ARGB:
    687    avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
    688    sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
    689    mmxfct = jsimd_h2v2_extxrgb_merged_upsample_mmx;
    690    break;
    691  default:
    692    avx2fct = jsimd_h2v2_merged_upsample_avx2;
    693    sse2fct = jsimd_h2v2_merged_upsample_sse2;
    694    mmxfct = jsimd_h2v2_merged_upsample_mmx;
    695    break;
    696  }
    697 
    698  if (simd_support & JSIMD_AVX2)
    699    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    700  else if (simd_support & JSIMD_SSE2)
    701    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    702  else
    703    mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    704 }
    705 
    706 GLOBAL(void)
    707 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    708                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
    709 {
    710  void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    711  void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    712  void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    713 
    714  if (simd_support == ~0U)
    715    init_simd();
    716 
    717  switch (cinfo->out_color_space) {
    718  case JCS_EXT_RGB:
    719    avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
    720    sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
    721    mmxfct = jsimd_h2v1_extrgb_merged_upsample_mmx;
    722    break;
    723  case JCS_EXT_RGBX:
    724  case JCS_EXT_RGBA:
    725    avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
    726    sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
    727    mmxfct = jsimd_h2v1_extrgbx_merged_upsample_mmx;
    728    break;
    729  case JCS_EXT_BGR:
    730    avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
    731    sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
    732    mmxfct = jsimd_h2v1_extbgr_merged_upsample_mmx;
    733    break;
    734  case JCS_EXT_BGRX:
    735  case JCS_EXT_BGRA:
    736    avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
    737    sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
    738    mmxfct = jsimd_h2v1_extbgrx_merged_upsample_mmx;
    739    break;
    740  case JCS_EXT_XBGR:
    741  case JCS_EXT_ABGR:
    742    avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
    743    sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
    744    mmxfct = jsimd_h2v1_extxbgr_merged_upsample_mmx;
    745    break;
    746  case JCS_EXT_XRGB:
    747  case JCS_EXT_ARGB:
    748    avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
    749    sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
    750    mmxfct = jsimd_h2v1_extxrgb_merged_upsample_mmx;
    751    break;
    752  default:
    753    avx2fct = jsimd_h2v1_merged_upsample_avx2;
    754    sse2fct = jsimd_h2v1_merged_upsample_sse2;
    755    mmxfct = jsimd_h2v1_merged_upsample_mmx;
    756    break;
    757  }
    758 
    759  if (simd_support & JSIMD_AVX2)
    760    avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    761  else if (simd_support & JSIMD_SSE2)
    762    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    763  else
    764    mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    765 }
    766 
    767 GLOBAL(int)
    768 jsimd_can_convsamp(void)
    769 {
    770  init_simd();
    771 
    772  /* The code is optimised for these values only */
    773  if (DCTSIZE != 8)
    774    return 0;
    775  if (BITS_IN_JSAMPLE != 8)
    776    return 0;
    777  if (sizeof(JDIMENSION) != 4)
    778    return 0;
    779  if (sizeof(DCTELEM) != 2)
    780    return 0;
    781 
    782  if (simd_support & JSIMD_AVX2)
    783    return 1;
    784  if (simd_support & JSIMD_SSE2)
    785    return 1;
    786  if (simd_support & JSIMD_MMX)
    787    return 1;
    788 
    789  return 0;
    790 }
    791 
    792 GLOBAL(int)
    793 jsimd_can_convsamp_float(void)
    794 {
    795  init_simd();
    796 
    797  /* The code is optimised for these values only */
    798  if (DCTSIZE != 8)
    799    return 0;
    800  if (BITS_IN_JSAMPLE != 8)
    801    return 0;
    802  if (sizeof(JDIMENSION) != 4)
    803    return 0;
    804  if (sizeof(FAST_FLOAT) != 4)
    805    return 0;
    806 
    807  if (simd_support & JSIMD_SSE2)
    808    return 1;
    809  if (simd_support & JSIMD_SSE)
    810    return 1;
    811  if (simd_support & JSIMD_3DNOW)
    812    return 1;
    813 
    814  return 0;
    815 }
    816 
    817 GLOBAL(void)
    818 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
    819               DCTELEM *workspace)
    820 {
    821  if (simd_support == ~0U)
    822    init_simd();
    823 
    824  if (simd_support & JSIMD_AVX2)
    825    jsimd_convsamp_avx2(sample_data, start_col, workspace);
    826  else if (simd_support & JSIMD_SSE2)
    827    jsimd_convsamp_sse2(sample_data, start_col, workspace);
    828  else
    829    jsimd_convsamp_mmx(sample_data, start_col, workspace);
    830 }
    831 
    832 GLOBAL(void)
    833 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
    834                     FAST_FLOAT *workspace)
    835 {
    836  if (simd_support == ~0U)
    837    init_simd();
    838 
    839  if (simd_support & JSIMD_SSE2)
    840    jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
    841  else if (simd_support & JSIMD_SSE)
    842    jsimd_convsamp_float_sse(sample_data, start_col, workspace);
    843  else
    844    jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
    845 }
    846 
    847 GLOBAL(int)
    848 jsimd_can_fdct_islow(void)
    849 {
    850  init_simd();
    851 
    852  /* The code is optimised for these values only */
    853  if (DCTSIZE != 8)
    854    return 0;
    855  if (sizeof(DCTELEM) != 2)
    856    return 0;
    857 
    858  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
    859    return 1;
    860  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
    861    return 1;
    862  if (simd_support & JSIMD_MMX)
    863    return 1;
    864 
    865  return 0;
    866 }
    867 
    868 GLOBAL(int)
    869 jsimd_can_fdct_ifast(void)
    870 {
    871  init_simd();
    872 
    873  /* The code is optimised for these values only */
    874  if (DCTSIZE != 8)
    875    return 0;
    876  if (sizeof(DCTELEM) != 2)
    877    return 0;
    878 
    879  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
    880    return 1;
    881  if (simd_support & JSIMD_MMX)
    882    return 1;
    883 
    884  return 0;
    885 }
    886 
    887 GLOBAL(int)
    888 jsimd_can_fdct_float(void)
    889 {
    890  init_simd();
    891 
    892  /* The code is optimised for these values only */
    893  if (DCTSIZE != 8)
    894    return 0;
    895  if (sizeof(FAST_FLOAT) != 4)
    896    return 0;
    897 
    898  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
    899    return 1;
    900  if (simd_support & JSIMD_3DNOW)
    901    return 1;
    902 
    903  return 0;
    904 }
    905 
    906 GLOBAL(void)
    907 jsimd_fdct_islow(DCTELEM *data)
    908 {
    909  if (simd_support == ~0U)
    910    init_simd();
    911 
    912  if (simd_support & JSIMD_AVX2)
    913    jsimd_fdct_islow_avx2(data);
    914  else if (simd_support & JSIMD_SSE2)
    915    jsimd_fdct_islow_sse2(data);
    916  else
    917    jsimd_fdct_islow_mmx(data);
    918 }
    919 
    920 GLOBAL(void)
    921 jsimd_fdct_ifast(DCTELEM *data)
    922 {
    923  if (simd_support == ~0U)
    924    init_simd();
    925 
    926  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
    927    jsimd_fdct_ifast_sse2(data);
    928  else
    929    jsimd_fdct_ifast_mmx(data);
    930 }
    931 
    932 GLOBAL(void)
    933 jsimd_fdct_float(FAST_FLOAT *data)
    934 {
    935  if (simd_support == ~0U)
    936    init_simd();
    937 
    938  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
    939    jsimd_fdct_float_sse(data);
    940  else if (simd_support & JSIMD_3DNOW)
    941    jsimd_fdct_float_3dnow(data);
    942 }
    943 
    944 GLOBAL(int)
    945 jsimd_can_quantize(void)
    946 {
    947  init_simd();
    948 
    949  /* The code is optimised for these values only */
    950  if (DCTSIZE != 8)
    951    return 0;
    952  if (sizeof(JCOEF) != 2)
    953    return 0;
    954  if (sizeof(DCTELEM) != 2)
    955    return 0;
    956 
    957  if (simd_support & JSIMD_AVX2)
    958    return 1;
    959  if (simd_support & JSIMD_SSE2)
    960    return 1;
    961  if (simd_support & JSIMD_MMX)
    962    return 1;
    963 
    964  return 0;
    965 }
    966 
    967 GLOBAL(int)
    968 jsimd_can_quantize_float(void)
    969 {
    970  init_simd();
    971 
    972  /* The code is optimised for these values only */
    973  if (DCTSIZE != 8)
    974    return 0;
    975  if (sizeof(JCOEF) != 2)
    976    return 0;
    977  if (sizeof(FAST_FLOAT) != 4)
    978    return 0;
    979 
    980  if (simd_support & JSIMD_SSE2)
    981    return 1;
    982  if (simd_support & JSIMD_SSE)
    983    return 1;
    984  if (simd_support & JSIMD_3DNOW)
    985    return 1;
    986 
    987  return 0;
    988 }
    989 
    990 GLOBAL(void)
    991 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
    992 {
    993  if (simd_support == ~0U)
    994    init_simd();
    995 
    996  if (simd_support & JSIMD_AVX2)
    997    jsimd_quantize_avx2(coef_block, divisors, workspace);
    998  else if (simd_support & JSIMD_SSE2)
    999    jsimd_quantize_sse2(coef_block, divisors, workspace);
   1000  else
   1001    jsimd_quantize_mmx(coef_block, divisors, workspace);
   1002 }
   1003 
   1004 GLOBAL(void)
   1005 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
   1006                     FAST_FLOAT *workspace)
   1007 {
   1008  if (simd_support == ~0U)
   1009    init_simd();
   1010 
   1011  if (simd_support & JSIMD_SSE2)
   1012    jsimd_quantize_float_sse2(coef_block, divisors, workspace);
   1013  else if (simd_support & JSIMD_SSE)
   1014    jsimd_quantize_float_sse(coef_block, divisors, workspace);
   1015  else
   1016    jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
   1017 }
   1018 
   1019 GLOBAL(int)
   1020 jsimd_can_idct_2x2(void)
   1021 {
   1022  init_simd();
   1023 
   1024  /* The code is optimised for these values only */
   1025  if (DCTSIZE != 8)
   1026    return 0;
   1027  if (sizeof(JCOEF) != 2)
   1028    return 0;
   1029  if (BITS_IN_JSAMPLE != 8)
   1030    return 0;
   1031  if (sizeof(JDIMENSION) != 4)
   1032    return 0;
   1033  if (sizeof(ISLOW_MULT_TYPE) != 2)
   1034    return 0;
   1035 
   1036  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
   1037    return 1;
   1038  if (simd_support & JSIMD_MMX)
   1039    return 1;
   1040 
   1041  return 0;
   1042 }
   1043 
   1044 GLOBAL(int)
   1045 jsimd_can_idct_4x4(void)
   1046 {
   1047  init_simd();
   1048 
   1049  /* The code is optimised for these values only */
   1050  if (DCTSIZE != 8)
   1051    return 0;
   1052  if (sizeof(JCOEF) != 2)
   1053    return 0;
   1054  if (BITS_IN_JSAMPLE != 8)
   1055    return 0;
   1056  if (sizeof(JDIMENSION) != 4)
   1057    return 0;
   1058  if (sizeof(ISLOW_MULT_TYPE) != 2)
   1059    return 0;
   1060 
   1061  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
   1062    return 1;
   1063  if (simd_support & JSIMD_MMX)
   1064    return 1;
   1065 
   1066  return 0;
   1067 }
   1068 
   1069 GLOBAL(void)
   1070 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   1071               JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1072               JDIMENSION output_col)
   1073 {
   1074  if (simd_support == ~0U)
   1075    init_simd();
   1076 
   1077  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
   1078    jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
   1079                        output_col);
   1080  else
   1081    jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
   1082 }
   1083 
   1084 GLOBAL(void)
   1085 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   1086               JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1087               JDIMENSION output_col)
   1088 {
   1089  if (simd_support == ~0U)
   1090    init_simd();
   1091 
   1092  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
   1093    jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
   1094                        output_col);
   1095  else
   1096    jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
   1097 }
   1098 
   1099 GLOBAL(int)
   1100 jsimd_can_idct_islow(void)
   1101 {
   1102  init_simd();
   1103 
   1104  /* The code is optimised for these values only */
   1105  if (DCTSIZE != 8)
   1106    return 0;
   1107  if (sizeof(JCOEF) != 2)
   1108    return 0;
   1109  if (BITS_IN_JSAMPLE != 8)
   1110    return 0;
   1111  if (sizeof(JDIMENSION) != 4)
   1112    return 0;
   1113  if (sizeof(ISLOW_MULT_TYPE) != 2)
   1114    return 0;
   1115 
   1116  if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
   1117    return 1;
   1118  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
   1119    return 1;
   1120  if (simd_support & JSIMD_MMX)
   1121    return 1;
   1122 
   1123  return 0;
   1124 }
   1125 
   1126 GLOBAL(int)
   1127 jsimd_can_idct_ifast(void)
   1128 {
   1129  init_simd();
   1130 
   1131  /* The code is optimised for these values only */
   1132  if (DCTSIZE != 8)
   1133    return 0;
   1134  if (sizeof(JCOEF) != 2)
   1135    return 0;
   1136  if (BITS_IN_JSAMPLE != 8)
   1137    return 0;
   1138  if (sizeof(JDIMENSION) != 4)
   1139    return 0;
   1140  if (sizeof(IFAST_MULT_TYPE) != 2)
   1141    return 0;
   1142  if (IFAST_SCALE_BITS != 2)
   1143    return 0;
   1144 
   1145  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
   1146    return 1;
   1147  if (simd_support & JSIMD_MMX)
   1148    return 1;
   1149 
   1150  return 0;
   1151 }
   1152 
   1153 GLOBAL(int)
   1154 jsimd_can_idct_float(void)
   1155 {
   1156  init_simd();
   1157 
   1158  if (DCTSIZE != 8)
   1159    return 0;
   1160  if (sizeof(JCOEF) != 2)
   1161    return 0;
   1162  if (BITS_IN_JSAMPLE != 8)
   1163    return 0;
   1164  if (sizeof(JDIMENSION) != 4)
   1165    return 0;
   1166  if (sizeof(FAST_FLOAT) != 4)
   1167    return 0;
   1168  if (sizeof(FLOAT_MULT_TYPE) != 4)
   1169    return 0;
   1170 
   1171  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
   1172    return 1;
   1173  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
   1174    return 1;
   1175  if (simd_support & JSIMD_3DNOW)
   1176    return 1;
   1177 
   1178  return 0;
   1179 }
   1180 
   1181 GLOBAL(void)
   1182 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   1183                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1184                 JDIMENSION output_col)
   1185 {
   1186  if (simd_support == ~0U)
   1187    init_simd();
   1188 
   1189  if (simd_support & JSIMD_AVX2)
   1190    jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
   1191                          output_col);
   1192  else if (simd_support & JSIMD_SSE2)
   1193    jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
   1194                          output_col);
   1195  else
   1196    jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
   1197                         output_col);
   1198 }
   1199 
   1200 GLOBAL(void)
   1201 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   1202                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1203                 JDIMENSION output_col)
   1204 {
   1205  if (simd_support == ~0U)
   1206    init_simd();
   1207 
   1208  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
   1209    jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
   1210                          output_col);
   1211  else
   1212    jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
   1213                         output_col);
   1214 }
   1215 
   1216 GLOBAL(void)
   1217 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   1218                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1219                 JDIMENSION output_col)
   1220 {
   1221  if (simd_support == ~0U)
   1222    init_simd();
   1223 
   1224  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
   1225    jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
   1226                          output_col);
   1227  else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
   1228    jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
   1229                         output_col);
   1230  else
   1231    jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
   1232                           output_col);
   1233 }
   1234 
   1235 GLOBAL(int)
   1236 jsimd_can_huff_encode_one_block(void)
   1237 {
   1238  init_simd();
   1239 
   1240  if (DCTSIZE != 8)
   1241    return 0;
   1242  if (sizeof(JCOEF) != 2)
   1243    return 0;
   1244 
   1245  if ((simd_support & JSIMD_SSE2) && simd_huffman &&
   1246      IS_ALIGNED_SSE(jconst_huff_encode_one_block))
   1247    return 1;
   1248 
   1249  return 0;
   1250 }
   1251 
   1252 GLOBAL(JOCTET *)
   1253 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
   1254                            int last_dc_val, c_derived_tbl *dctbl,
   1255                            c_derived_tbl *actbl)
   1256 {
   1257  return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
   1258                                          dctbl, actbl);
   1259 }
   1260 
   1261 GLOBAL(int)
   1262 jsimd_can_encode_mcu_AC_first_prepare(void)
   1263 {
   1264  init_simd();
   1265 
   1266  if (DCTSIZE != 8)
   1267    return 0;
   1268  if (sizeof(JCOEF) != 2)
   1269    return 0;
   1270  if (SIZEOF_SIZE_T != 4)
   1271    return 0;
   1272  if (simd_support & JSIMD_SSE2)
   1273    return 1;
   1274 
   1275  return 0;
   1276 }
   1277 
   1278 GLOBAL(void)
   1279 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
   1280                                  const int *jpeg_natural_order_start, int Sl,
   1281                                  int Al, UJCOEF *values, size_t *zerobits)
   1282 {
   1283  jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
   1284                                         Sl, Al, values, zerobits);
   1285 }
   1286 
   1287 GLOBAL(int)
   1288 jsimd_can_encode_mcu_AC_refine_prepare(void)
   1289 {
   1290  init_simd();
   1291 
   1292  if (DCTSIZE != 8)
   1293    return 0;
   1294  if (sizeof(JCOEF) != 2)
   1295    return 0;
   1296  if (SIZEOF_SIZE_T != 4)
   1297    return 0;
   1298  if (simd_support & JSIMD_SSE2)
   1299    return 1;
   1300 
   1301  return 0;
   1302 }
   1303 
   1304 GLOBAL(int)
   1305 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
   1306                                   const int *jpeg_natural_order_start, int Sl,
   1307                                   int Al, UJCOEF *absvalues, size_t *bits)
   1308 {
   1309  return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
   1310                                                 jpeg_natural_order_start,
   1311                                                 Sl, Al, absvalues, bits);
   1312 }