tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

jsimd.c (24260B)


      1 /*
      2 * jsimd_arm64.c
      3 *
      4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
      5 * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
      6 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander.
      7 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
      8 * Copyright (C) 2020, Arm Limited.
      9 *
     10 * Based on the x86 SIMD extension for IJG JPEG library,
     11 * Copyright (C) 1999-2006, MIYASAKA Masaru.
     12 * For conditions of distribution and use, see copyright notice in jsimdext.inc
     13 *
     14 * This file contains the interface between the "normal" portions
     15 * of the library and the SIMD implementations when running on a
     16 * 64-bit Arm architecture.
     17 */
     18 
     19 #define JPEG_INTERNALS
     20 #include "../../../jinclude.h"
     21 #include "../../../jpeglib.h"
     22 #include "../../../jsimd.h"
     23 #include "../../../jdct.h"
     24 #include "../../../jsimddct.h"
     25 #include "../../jsimd.h"
     26 
     27 #include <ctype.h>
     28 
     29 #define JSIMD_FASTLD3  1
     30 #define JSIMD_FASTST3  2
     31 #define JSIMD_FASTTBL  4
     32 
     33 static THREAD_LOCAL unsigned int simd_support = ~0;
     34 static THREAD_LOCAL unsigned int simd_huffman = 1;
     35 static THREAD_LOCAL unsigned int simd_features = JSIMD_FASTLD3 |
     36                                                 JSIMD_FASTST3 | JSIMD_FASTTBL;
     37 
     38 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
     39 
     40 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT  (1024 * 1024)
     41 
     42 LOCAL(int)
     43 check_cpuinfo(char *buffer, const char *field, char *value)
     44 {
     45  char *p;
     46 
     47  if (*value == 0)
     48    return 0;
     49  if (strncmp(buffer, field, strlen(field)) != 0)
     50    return 0;
     51  buffer += strlen(field);
     52  while (isspace(*buffer))
     53    buffer++;
     54 
     55  /* Check if 'value' is present in the buffer as a separate word */
     56  while ((p = strstr(buffer, value))) {
     57    if (p > buffer && !isspace(*(p - 1))) {
     58      buffer++;
     59      continue;
     60    }
     61    p += strlen(value);
     62    if (*p != 0 && !isspace(*p)) {
     63      buffer++;
     64      continue;
     65    }
     66    return 1;
     67  }
     68  return 0;
     69 }
     70 
     71 LOCAL(int)
     72 parse_proc_cpuinfo(int bufsize)
     73 {
     74  char *buffer = (char *)malloc(bufsize);
     75  FILE *fd;
     76 
     77  if (!buffer)
     78    return 0;
     79 
     80  fd = fopen("/proc/cpuinfo", "r");
     81  if (fd) {
     82    while (fgets(buffer, bufsize, fd)) {
     83      if (!strchr(buffer, '\n') && !feof(fd)) {
     84        /* "impossible" happened - insufficient size of the buffer! */
     85        fclose(fd);
     86        free(buffer);
     87        return 0;
     88      }
     89      if (check_cpuinfo(buffer, "CPU part", "0xd03") ||
     90          check_cpuinfo(buffer, "CPU part", "0xd07"))
     91        /* The Cortex-A53 has a slow tbl implementation.  We can gain a few
     92           percent speedup by disabling the use of that instruction.  The
     93           speedup on Cortex-A57 is more subtle but still measurable. */
     94        simd_features &= ~JSIMD_FASTTBL;
     95      else if (check_cpuinfo(buffer, "CPU part", "0x0a1"))
     96        /* The SIMD version of Huffman encoding is slower than the C version on
     97           Cavium ThunderX.  Also, ld3 and st3 are abyssmally slow on that
     98           CPU. */
     99        simd_huffman = simd_features = 0;
    100    }
    101    fclose(fd);
    102  }
    103  free(buffer);
    104  return 1;
    105 }
    106 
    107 #endif
    108 
    109 /*
    110 * Check what SIMD accelerations are supported.
    111 */
    112 
    113 /*
    114 * Armv8 architectures support Neon extensions by default.
    115 * It is no longer optional as it was with Armv7.
    116 */
    117 
    118 
    119 LOCAL(void)
    120 init_simd(void)
    121 {
    122 #ifndef NO_GETENV
    123  char env[2] = { 0 };
    124 #endif
    125 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
    126  int bufsize = 1024; /* an initial guess for the line buffer size limit */
    127 #endif
    128 
    129  if (simd_support != ~0U)
    130    return;
    131 
    132  simd_support = 0;
    133 
    134  simd_support |= JSIMD_NEON;
    135 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
    136  while (!parse_proc_cpuinfo(bufsize)) {
    137    bufsize *= 2;
    138    if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
    139      break;
    140  }
    141 #endif
    142 
    143 #ifndef NO_GETENV
    144  /* Force different settings through environment variables */
    145  if (!GETENV_S(env, 2, "JSIMD_FORCENEON") && !strcmp(env, "1"))
    146    simd_support = JSIMD_NEON;
    147  if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
    148    simd_support = 0;
    149  if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
    150    simd_huffman = 0;
    151  if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "1"))
    152    simd_features |= JSIMD_FASTLD3;
    153  if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "0"))
    154    simd_features &= ~JSIMD_FASTLD3;
    155  if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "1"))
    156    simd_features |= JSIMD_FASTST3;
    157  if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "0"))
    158    simd_features &= ~JSIMD_FASTST3;
    159 #endif
    160 }
    161 
    162 GLOBAL(int)
    163 jsimd_can_rgb_ycc(void)
    164 {
    165  init_simd();
    166 
    167  /* The code is optimised for these values only */
    168  if (BITS_IN_JSAMPLE != 8)
    169    return 0;
    170  if (sizeof(JDIMENSION) != 4)
    171    return 0;
    172  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    173    return 0;
    174 
    175  if (simd_support & JSIMD_NEON)
    176    return 1;
    177 
    178  return 0;
    179 }
    180 
    181 GLOBAL(int)
    182 jsimd_can_rgb_gray(void)
    183 {
    184  init_simd();
    185 
    186  /* The code is optimised for these values only */
    187  if (BITS_IN_JSAMPLE != 8)
    188    return 0;
    189  if (sizeof(JDIMENSION) != 4)
    190    return 0;
    191  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    192    return 0;
    193 
    194  if (simd_support & JSIMD_NEON)
    195    return 1;
    196 
    197  return 0;
    198 }
    199 
    200 GLOBAL(int)
    201 jsimd_can_ycc_rgb(void)
    202 {
    203  init_simd();
    204 
    205  /* The code is optimised for these values only */
    206  if (BITS_IN_JSAMPLE != 8)
    207    return 0;
    208  if (sizeof(JDIMENSION) != 4)
    209    return 0;
    210  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    211    return 0;
    212 
    213  if (simd_support & JSIMD_NEON)
    214    return 1;
    215 
    216  return 0;
    217 }
    218 
    219 GLOBAL(int)
    220 jsimd_can_ycc_rgb565(void)
    221 {
    222  init_simd();
    223 
    224  /* The code is optimised for these values only */
    225  if (BITS_IN_JSAMPLE != 8)
    226    return 0;
    227  if (sizeof(JDIMENSION) != 4)
    228    return 0;
    229 
    230  if (simd_support & JSIMD_NEON)
    231    return 1;
    232 
    233  return 0;
    234 }
    235 
    236 GLOBAL(void)
    237 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
    238                      JSAMPIMAGE output_buf, JDIMENSION output_row,
    239                      int num_rows)
    240 {
    241  void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    242 
    243  switch (cinfo->in_color_space) {
    244  case JCS_EXT_RGB:
    245 #ifndef NEON_INTRINSICS
    246    if (simd_features & JSIMD_FASTLD3)
    247 #endif
    248      neonfct = jsimd_extrgb_ycc_convert_neon;
    249 #ifndef NEON_INTRINSICS
    250    else
    251      neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
    252 #endif
    253    break;
    254  case JCS_EXT_RGBX:
    255  case JCS_EXT_RGBA:
    256    neonfct = jsimd_extrgbx_ycc_convert_neon;
    257    break;
    258  case JCS_EXT_BGR:
    259 #ifndef NEON_INTRINSICS
    260    if (simd_features & JSIMD_FASTLD3)
    261 #endif
    262      neonfct = jsimd_extbgr_ycc_convert_neon;
    263 #ifndef NEON_INTRINSICS
    264    else
    265      neonfct = jsimd_extbgr_ycc_convert_neon_slowld3;
    266 #endif
    267    break;
    268  case JCS_EXT_BGRX:
    269  case JCS_EXT_BGRA:
    270    neonfct = jsimd_extbgrx_ycc_convert_neon;
    271    break;
    272  case JCS_EXT_XBGR:
    273  case JCS_EXT_ABGR:
    274    neonfct = jsimd_extxbgr_ycc_convert_neon;
    275    break;
    276  case JCS_EXT_XRGB:
    277  case JCS_EXT_ARGB:
    278    neonfct = jsimd_extxrgb_ycc_convert_neon;
    279    break;
    280  default:
    281 #ifndef NEON_INTRINSICS
    282    if (simd_features & JSIMD_FASTLD3)
    283 #endif
    284      neonfct = jsimd_extrgb_ycc_convert_neon;
    285 #ifndef NEON_INTRINSICS
    286    else
    287      neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
    288 #endif
    289    break;
    290  }
    291 
    292  neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    293 }
    294 
    295 GLOBAL(void)
    296 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
    297                       JSAMPIMAGE output_buf, JDIMENSION output_row,
    298                       int num_rows)
    299 {
    300  void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    301 
    302  switch (cinfo->in_color_space) {
    303  case JCS_EXT_RGB:
    304    neonfct = jsimd_extrgb_gray_convert_neon;
    305    break;
    306  case JCS_EXT_RGBX:
    307  case JCS_EXT_RGBA:
    308    neonfct = jsimd_extrgbx_gray_convert_neon;
    309    break;
    310  case JCS_EXT_BGR:
    311    neonfct = jsimd_extbgr_gray_convert_neon;
    312    break;
    313  case JCS_EXT_BGRX:
    314  case JCS_EXT_BGRA:
    315    neonfct = jsimd_extbgrx_gray_convert_neon;
    316    break;
    317  case JCS_EXT_XBGR:
    318  case JCS_EXT_ABGR:
    319    neonfct = jsimd_extxbgr_gray_convert_neon;
    320    break;
    321  case JCS_EXT_XRGB:
    322  case JCS_EXT_ARGB:
    323    neonfct = jsimd_extxrgb_gray_convert_neon;
    324    break;
    325  default:
    326    neonfct = jsimd_extrgb_gray_convert_neon;
    327    break;
    328  }
    329 
    330  neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
    331 }
    332 
    333 GLOBAL(void)
    334 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    335                      JDIMENSION input_row, JSAMPARRAY output_buf,
    336                      int num_rows)
    337 {
    338  void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
    339 
    340  switch (cinfo->out_color_space) {
    341  case JCS_EXT_RGB:
    342 #ifndef NEON_INTRINSICS
    343    if (simd_features & JSIMD_FASTST3)
    344 #endif
    345      neonfct = jsimd_ycc_extrgb_convert_neon;
    346 #ifndef NEON_INTRINSICS
    347    else
    348      neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
    349 #endif
    350    break;
    351  case JCS_EXT_RGBX:
    352  case JCS_EXT_RGBA:
    353    neonfct = jsimd_ycc_extrgbx_convert_neon;
    354    break;
    355  case JCS_EXT_BGR:
    356 #ifndef NEON_INTRINSICS
    357    if (simd_features & JSIMD_FASTST3)
    358 #endif
    359      neonfct = jsimd_ycc_extbgr_convert_neon;
    360 #ifndef NEON_INTRINSICS
    361    else
    362      neonfct = jsimd_ycc_extbgr_convert_neon_slowst3;
    363 #endif
    364    break;
    365  case JCS_EXT_BGRX:
    366  case JCS_EXT_BGRA:
    367    neonfct = jsimd_ycc_extbgrx_convert_neon;
    368    break;
    369  case JCS_EXT_XBGR:
    370  case JCS_EXT_ABGR:
    371    neonfct = jsimd_ycc_extxbgr_convert_neon;
    372    break;
    373  case JCS_EXT_XRGB:
    374  case JCS_EXT_ARGB:
    375    neonfct = jsimd_ycc_extxrgb_convert_neon;
    376    break;
    377  default:
    378 #ifndef NEON_INTRINSICS
    379    if (simd_features & JSIMD_FASTST3)
    380 #endif
    381      neonfct = jsimd_ycc_extrgb_convert_neon;
    382 #ifndef NEON_INTRINSICS
    383    else
    384      neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
    385 #endif
    386    break;
    387  }
    388 
    389  neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
    390 }
    391 
    392 GLOBAL(void)
    393 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    394                         JDIMENSION input_row, JSAMPARRAY output_buf,
    395                         int num_rows)
    396 {
    397  jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
    398                                output_buf, num_rows);
    399 }
    400 
    401 GLOBAL(int)
    402 jsimd_can_h2v2_downsample(void)
    403 {
    404  init_simd();
    405 
    406  /* The code is optimised for these values only */
    407  if (BITS_IN_JSAMPLE != 8)
    408    return 0;
    409  if (DCTSIZE != 8)
    410    return 0;
    411  if (sizeof(JDIMENSION) != 4)
    412    return 0;
    413 
    414  if (simd_support & JSIMD_NEON)
    415    return 1;
    416 
    417  return 0;
    418 }
    419 
    420 GLOBAL(int)
    421 jsimd_can_h2v1_downsample(void)
    422 {
    423  init_simd();
    424 
    425  /* The code is optimised for these values only */
    426  if (BITS_IN_JSAMPLE != 8)
    427    return 0;
    428  if (DCTSIZE != 8)
    429    return 0;
    430  if (sizeof(JDIMENSION) != 4)
    431    return 0;
    432 
    433  if (simd_support & JSIMD_NEON)
    434    return 1;
    435 
    436  return 0;
    437 }
    438 
    439 GLOBAL(void)
    440 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
    441                      JSAMPARRAY input_data, JSAMPARRAY output_data)
    442 {
    443  jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
    444                             compptr->v_samp_factor, compptr->width_in_blocks,
    445                             input_data, output_data);
    446 }
    447 
    448 GLOBAL(void)
    449 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
    450                      JSAMPARRAY input_data, JSAMPARRAY output_data)
    451 {
    452  jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
    453                             compptr->v_samp_factor, compptr->width_in_blocks,
    454                             input_data, output_data);
    455 }
    456 
    457 GLOBAL(int)
    458 jsimd_can_h2v2_upsample(void)
    459 {
    460  init_simd();
    461 
    462  /* The code is optimised for these values only */
    463  if (BITS_IN_JSAMPLE != 8)
    464    return 0;
    465  if (sizeof(JDIMENSION) != 4)
    466    return 0;
    467 
    468  if (simd_support & JSIMD_NEON)
    469    return 1;
    470 
    471  return 0;
    472 }
    473 
    474 GLOBAL(int)
    475 jsimd_can_h2v1_upsample(void)
    476 {
    477  init_simd();
    478 
    479  /* The code is optimised for these values only */
    480  if (BITS_IN_JSAMPLE != 8)
    481    return 0;
    482  if (sizeof(JDIMENSION) != 4)
    483    return 0;
    484  if (simd_support & JSIMD_NEON)
    485    return 1;
    486 
    487  return 0;
    488 }
    489 
    490 GLOBAL(void)
    491 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    492                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
    493 {
    494  jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
    495                           input_data, output_data_ptr);
    496 }
    497 
    498 GLOBAL(void)
    499 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    500                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
    501 {
    502  jsimd_h2v1_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
    503                           input_data, output_data_ptr);
    504 }
    505 
    506 GLOBAL(int)
    507 jsimd_can_h2v2_fancy_upsample(void)
    508 {
    509  init_simd();
    510 
    511  /* The code is optimised for these values only */
    512  if (BITS_IN_JSAMPLE != 8)
    513    return 0;
    514  if (sizeof(JDIMENSION) != 4)
    515    return 0;
    516 
    517  if (simd_support & JSIMD_NEON)
    518    return 1;
    519 
    520  return 0;
    521 }
    522 
    523 GLOBAL(int)
    524 jsimd_can_h2v1_fancy_upsample(void)
    525 {
    526  init_simd();
    527 
    528  /* The code is optimised for these values only */
    529  if (BITS_IN_JSAMPLE != 8)
    530    return 0;
    531  if (sizeof(JDIMENSION) != 4)
    532    return 0;
    533 
    534  if (simd_support & JSIMD_NEON)
    535    return 1;
    536 
    537  return 0;
    538 }
    539 
    540 GLOBAL(int)
    541 jsimd_can_h1v2_fancy_upsample(void)
    542 {
    543  init_simd();
    544 
    545  /* The code is optimised for these values only */
    546  if (BITS_IN_JSAMPLE != 8)
    547    return 0;
    548  if (sizeof(JDIMENSION) != 4)
    549    return 0;
    550 
    551  if (simd_support & JSIMD_NEON)
    552    return 1;
    553 
    554  return 0;
    555 }
    556 
    557 GLOBAL(void)
    558 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    559                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
    560 {
    561  jsimd_h2v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
    562                                 compptr->downsampled_width, input_data,
    563                                 output_data_ptr);
    564 }
    565 
    566 GLOBAL(void)
    567 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    568                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
    569 {
    570  jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
    571                                 compptr->downsampled_width, input_data,
    572                                 output_data_ptr);
    573 }
    574 
    575 GLOBAL(void)
    576 jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    577                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
    578 {
    579  jsimd_h1v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
    580                                 compptr->downsampled_width, input_data,
    581                                 output_data_ptr);
    582 }
    583 
    584 GLOBAL(int)
    585 jsimd_can_h2v2_merged_upsample(void)
    586 {
    587  init_simd();
    588 
    589  /* The code is optimised for these values only */
    590  if (BITS_IN_JSAMPLE != 8)
    591    return 0;
    592  if (sizeof(JDIMENSION) != 4)
    593    return 0;
    594 
    595  if (simd_support & JSIMD_NEON)
    596    return 1;
    597 
    598  return 0;
    599 }
    600 
    601 GLOBAL(int)
    602 jsimd_can_h2v1_merged_upsample(void)
    603 {
    604  init_simd();
    605 
    606  /* The code is optimised for these values only */
    607  if (BITS_IN_JSAMPLE != 8)
    608    return 0;
    609  if (sizeof(JDIMENSION) != 4)
    610    return 0;
    611 
    612  if (simd_support & JSIMD_NEON)
    613    return 1;
    614 
    615  return 0;
    616 }
    617 
    618 GLOBAL(void)
    619 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    620                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
    621 {
    622  void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    623 
    624  switch (cinfo->out_color_space) {
    625    case JCS_EXT_RGB:
    626      neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
    627      break;
    628    case JCS_EXT_RGBX:
    629    case JCS_EXT_RGBA:
    630      neonfct = jsimd_h2v2_extrgbx_merged_upsample_neon;
    631      break;
    632    case JCS_EXT_BGR:
    633      neonfct = jsimd_h2v2_extbgr_merged_upsample_neon;
    634      break;
    635    case JCS_EXT_BGRX:
    636    case JCS_EXT_BGRA:
    637      neonfct = jsimd_h2v2_extbgrx_merged_upsample_neon;
    638      break;
    639    case JCS_EXT_XBGR:
    640    case JCS_EXT_ABGR:
    641      neonfct = jsimd_h2v2_extxbgr_merged_upsample_neon;
    642      break;
    643    case JCS_EXT_XRGB:
    644    case JCS_EXT_ARGB:
    645      neonfct = jsimd_h2v2_extxrgb_merged_upsample_neon;
    646      break;
    647    default:
    648      neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
    649      break;
    650  }
    651 
    652  neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    653 }
    654 
    655 GLOBAL(void)
    656 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
    657                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
    658 {
    659  void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
    660 
    661  switch (cinfo->out_color_space) {
    662    case JCS_EXT_RGB:
    663      neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
    664      break;
    665    case JCS_EXT_RGBX:
    666    case JCS_EXT_RGBA:
    667      neonfct = jsimd_h2v1_extrgbx_merged_upsample_neon;
    668      break;
    669    case JCS_EXT_BGR:
    670      neonfct = jsimd_h2v1_extbgr_merged_upsample_neon;
    671      break;
    672    case JCS_EXT_BGRX:
    673    case JCS_EXT_BGRA:
    674      neonfct = jsimd_h2v1_extbgrx_merged_upsample_neon;
    675      break;
    676    case JCS_EXT_XBGR:
    677    case JCS_EXT_ABGR:
    678      neonfct = jsimd_h2v1_extxbgr_merged_upsample_neon;
    679      break;
    680    case JCS_EXT_XRGB:
    681    case JCS_EXT_ARGB:
    682      neonfct = jsimd_h2v1_extxrgb_merged_upsample_neon;
    683      break;
    684    default:
    685      neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
    686      break;
    687  }
    688 
    689  neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
    690 }
    691 
    692 GLOBAL(int)
    693 jsimd_can_convsamp(void)
    694 {
    695  init_simd();
    696 
    697  /* The code is optimised for these values only */
    698  if (DCTSIZE != 8)
    699    return 0;
    700  if (BITS_IN_JSAMPLE != 8)
    701    return 0;
    702  if (sizeof(JDIMENSION) != 4)
    703    return 0;
    704  if (sizeof(DCTELEM) != 2)
    705    return 0;
    706 
    707  if (simd_support & JSIMD_NEON)
    708    return 1;
    709 
    710  return 0;
    711 }
    712 
    713 GLOBAL(int)
    714 jsimd_can_convsamp_float(void)
    715 {
    716  return 0;
    717 }
    718 
    719 GLOBAL(void)
    720 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
    721               DCTELEM *workspace)
    722 {
    723  jsimd_convsamp_neon(sample_data, start_col, workspace);
    724 }
    725 
    726 GLOBAL(void)
    727 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
    728                     FAST_FLOAT *workspace)
    729 {
    730 }
    731 
    732 GLOBAL(int)
    733 jsimd_can_fdct_islow(void)
    734 {
    735  init_simd();
    736 
    737  /* The code is optimised for these values only */
    738  if (DCTSIZE != 8)
    739    return 0;
    740  if (sizeof(DCTELEM) != 2)
    741    return 0;
    742 
    743  if (simd_support & JSIMD_NEON)
    744    return 1;
    745 
    746  return 0;
    747 }
    748 
    749 GLOBAL(int)
    750 jsimd_can_fdct_ifast(void)
    751 {
    752  init_simd();
    753 
    754  /* The code is optimised for these values only */
    755  if (DCTSIZE != 8)
    756    return 0;
    757  if (sizeof(DCTELEM) != 2)
    758    return 0;
    759 
    760  if (simd_support & JSIMD_NEON)
    761    return 1;
    762 
    763  return 0;
    764 }
    765 
    766 GLOBAL(int)
    767 jsimd_can_fdct_float(void)
    768 {
    769  return 0;
    770 }
    771 
    772 GLOBAL(void)
    773 jsimd_fdct_islow(DCTELEM *data)
    774 {
    775  jsimd_fdct_islow_neon(data);
    776 }
    777 
    778 GLOBAL(void)
    779 jsimd_fdct_ifast(DCTELEM *data)
    780 {
    781  jsimd_fdct_ifast_neon(data);
    782 }
    783 
    784 GLOBAL(void)
    785 jsimd_fdct_float(FAST_FLOAT *data)
    786 {
    787 }
    788 
    789 GLOBAL(int)
    790 jsimd_can_quantize(void)
    791 {
    792  init_simd();
    793 
    794  /* The code is optimised for these values only */
    795  if (DCTSIZE != 8)
    796    return 0;
    797  if (sizeof(JCOEF) != 2)
    798    return 0;
    799  if (sizeof(DCTELEM) != 2)
    800    return 0;
    801 
    802  if (simd_support & JSIMD_NEON)
    803    return 1;
    804 
    805  return 0;
    806 }
    807 
    808 GLOBAL(int)
    809 jsimd_can_quantize_float(void)
    810 {
    811  return 0;
    812 }
    813 
    814 GLOBAL(void)
    815 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
    816 {
    817  jsimd_quantize_neon(coef_block, divisors, workspace);
    818 }
    819 
    820 GLOBAL(void)
    821 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
    822                     FAST_FLOAT *workspace)
    823 {
    824 }
    825 
    826 GLOBAL(int)
    827 jsimd_can_idct_2x2(void)
    828 {
    829  init_simd();
    830 
    831  /* The code is optimised for these values only */
    832  if (DCTSIZE != 8)
    833    return 0;
    834  if (sizeof(JCOEF) != 2)
    835    return 0;
    836  if (BITS_IN_JSAMPLE != 8)
    837    return 0;
    838  if (sizeof(JDIMENSION) != 4)
    839    return 0;
    840  if (sizeof(ISLOW_MULT_TYPE) != 2)
    841    return 0;
    842 
    843  if (simd_support & JSIMD_NEON)
    844    return 1;
    845 
    846  return 0;
    847 }
    848 
    849 GLOBAL(int)
    850 jsimd_can_idct_4x4(void)
    851 {
    852  init_simd();
    853 
    854  /* The code is optimised for these values only */
    855  if (DCTSIZE != 8)
    856    return 0;
    857  if (sizeof(JCOEF) != 2)
    858    return 0;
    859  if (BITS_IN_JSAMPLE != 8)
    860    return 0;
    861  if (sizeof(JDIMENSION) != 4)
    862    return 0;
    863  if (sizeof(ISLOW_MULT_TYPE) != 2)
    864    return 0;
    865 
    866  if (simd_support & JSIMD_NEON)
    867    return 1;
    868 
    869  return 0;
    870 }
    871 
    872 GLOBAL(void)
    873 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    874               JCOEFPTR coef_block, JSAMPARRAY output_buf,
    875               JDIMENSION output_col)
    876 {
    877  jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
    878 }
    879 
    880 GLOBAL(void)
    881 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    882               JCOEFPTR coef_block, JSAMPARRAY output_buf,
    883               JDIMENSION output_col)
    884 {
    885  jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
    886 }
    887 
    888 GLOBAL(int)
    889 jsimd_can_idct_islow(void)
    890 {
    891  init_simd();
    892 
    893  /* The code is optimised for these values only */
    894  if (DCTSIZE != 8)
    895    return 0;
    896  if (sizeof(JCOEF) != 2)
    897    return 0;
    898  if (BITS_IN_JSAMPLE != 8)
    899    return 0;
    900  if (sizeof(JDIMENSION) != 4)
    901    return 0;
    902  if (sizeof(ISLOW_MULT_TYPE) != 2)
    903    return 0;
    904 
    905  if (simd_support & JSIMD_NEON)
    906    return 1;
    907 
    908  return 0;
    909 }
    910 
    911 GLOBAL(int)
    912 jsimd_can_idct_ifast(void)
    913 {
    914  init_simd();
    915 
    916  /* The code is optimised for these values only */
    917  if (DCTSIZE != 8)
    918    return 0;
    919  if (sizeof(JCOEF) != 2)
    920    return 0;
    921  if (BITS_IN_JSAMPLE != 8)
    922    return 0;
    923  if (sizeof(JDIMENSION) != 4)
    924    return 0;
    925  if (sizeof(IFAST_MULT_TYPE) != 2)
    926    return 0;
    927  if (IFAST_SCALE_BITS != 2)
    928    return 0;
    929 
    930  if (simd_support & JSIMD_NEON)
    931    return 1;
    932 
    933  return 0;
    934 }
    935 
    936 GLOBAL(int)
    937 jsimd_can_idct_float(void)
    938 {
    939  return 0;
    940 }
    941 
    942 GLOBAL(void)
    943 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    944                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    945                 JDIMENSION output_col)
    946 {
    947  jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
    948                        output_col);
    949 }
    950 
    951 GLOBAL(void)
    952 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    953                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    954                 JDIMENSION output_col)
    955 {
    956  jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
    957                        output_col);
    958 }
    959 
    960 GLOBAL(void)
    961 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
    962                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
    963                 JDIMENSION output_col)
    964 {
    965 }
    966 
    967 GLOBAL(int)
    968 jsimd_can_huff_encode_one_block(void)
    969 {
    970  init_simd();
    971 
    972  if (DCTSIZE != 8)
    973    return 0;
    974  if (sizeof(JCOEF) != 2)
    975    return 0;
    976 
    977  if (simd_support & JSIMD_NEON && simd_huffman)
    978    return 1;
    979 
    980  return 0;
    981 }
    982 
    983 GLOBAL(JOCTET *)
    984 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
    985                            int last_dc_val, c_derived_tbl *dctbl,
    986                            c_derived_tbl *actbl)
    987 {
    988 #ifndef NEON_INTRINSICS
    989  if (simd_features & JSIMD_FASTTBL)
    990 #endif
    991    return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
    992                                            dctbl, actbl);
    993 #ifndef NEON_INTRINSICS
    994  else
    995    return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block,
    996                                                    last_dc_val, dctbl, actbl);
    997 #endif
    998 }
    999 
   1000 GLOBAL(int)
   1001 jsimd_can_encode_mcu_AC_first_prepare(void)
   1002 {
   1003  init_simd();
   1004 
   1005  if (DCTSIZE != 8)
   1006    return 0;
   1007  if (sizeof(JCOEF) != 2)
   1008    return 0;
   1009  if (SIZEOF_SIZE_T != 8)
   1010    return 0;
   1011 
   1012  if (simd_support & JSIMD_NEON)
   1013    return 1;
   1014 
   1015  return 0;
   1016 }
   1017 
   1018 GLOBAL(void)
   1019 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
   1020                                  const int *jpeg_natural_order_start, int Sl,
   1021                                  int Al, UJCOEF *values, size_t *zerobits)
   1022 {
   1023  jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
   1024                                         Sl, Al, values, zerobits);
   1025 }
   1026 
   1027 GLOBAL(int)
   1028 jsimd_can_encode_mcu_AC_refine_prepare(void)
   1029 {
   1030  init_simd();
   1031 
   1032  if (DCTSIZE != 8)
   1033    return 0;
   1034  if (sizeof(JCOEF) != 2)
   1035    return 0;
   1036  if (SIZEOF_SIZE_T != 8)
   1037    return 0;
   1038 
   1039  if (simd_support & JSIMD_NEON)
   1040    return 1;
   1041 
   1042  return 0;
   1043 }
   1044 
   1045 GLOBAL(int)
   1046 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
   1047                                   const int *jpeg_natural_order_start, int Sl,
   1048                                   int Al, UJCOEF *absvalues, size_t *bits)
   1049 {
   1050  return jsimd_encode_mcu_AC_refine_prepare_neon(block,
   1051                                                 jpeg_natural_order_start,
   1052                                                 Sl, Al, absvalues, bits);
   1053 }