jsimd.c (24260B)
1 /* 2 * jsimd_arm64.c 3 * 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 5 * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies). 6 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander. 7 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. 8 * Copyright (C) 2020, Arm Limited. 9 * 10 * Based on the x86 SIMD extension for IJG JPEG library, 11 * Copyright (C) 1999-2006, MIYASAKA Masaru. 12 * For conditions of distribution and use, see copyright notice in jsimdext.inc 13 * 14 * This file contains the interface between the "normal" portions 15 * of the library and the SIMD implementations when running on a 16 * 64-bit Arm architecture. 17 */ 18 19 #define JPEG_INTERNALS 20 #include "../../../jinclude.h" 21 #include "../../../jpeglib.h" 22 #include "../../../jsimd.h" 23 #include "../../../jdct.h" 24 #include "../../../jsimddct.h" 25 #include "../../jsimd.h" 26 27 #include <ctype.h> 28 29 #define JSIMD_FASTLD3 1 30 #define JSIMD_FASTST3 2 31 #define JSIMD_FASTTBL 4 32 33 static THREAD_LOCAL unsigned int simd_support = ~0; 34 static THREAD_LOCAL unsigned int simd_huffman = 1; 35 static THREAD_LOCAL unsigned int simd_features = JSIMD_FASTLD3 | 36 JSIMD_FASTST3 | JSIMD_FASTTBL; 37 38 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) 39 40 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) 41 42 LOCAL(int) 43 check_cpuinfo(char *buffer, const char *field, char *value) 44 { 45 char *p; 46 47 if (*value == 0) 48 return 0; 49 if (strncmp(buffer, field, strlen(field)) != 0) 50 return 0; 51 buffer += strlen(field); 52 while (isspace(*buffer)) 53 buffer++; 54 55 /* Check if 'value' is present in the buffer as a separate word */ 56 while ((p = strstr(buffer, value))) { 57 if (p > buffer && !isspace(*(p - 1))) { 58 buffer++; 59 continue; 60 } 61 p += strlen(value); 62 if (*p != 0 && !isspace(*p)) { 63 buffer++; 64 continue; 65 } 66 return 1; 67 } 68 return 0; 69 } 70 71 LOCAL(int) 72 parse_proc_cpuinfo(int bufsize) 73 { 74 char *buffer = (char *)malloc(bufsize); 75 FILE *fd; 76 77 if (!buffer) 78 return 0; 79 80 fd = fopen("/proc/cpuinfo", "r"); 81 if (fd) { 82 while (fgets(buffer, bufsize, fd)) { 83 if (!strchr(buffer, '\n') && !feof(fd)) { 84 /* "impossible" happened - insufficient size of the buffer! */ 85 fclose(fd); 86 free(buffer); 87 return 0; 88 } 89 if (check_cpuinfo(buffer, "CPU part", "0xd03") || 90 check_cpuinfo(buffer, "CPU part", "0xd07")) 91 /* The Cortex-A53 has a slow tbl implementation. We can gain a few 92 percent speedup by disabling the use of that instruction. The 93 speedup on Cortex-A57 is more subtle but still measurable. */ 94 simd_features &= ~JSIMD_FASTTBL; 95 else if (check_cpuinfo(buffer, "CPU part", "0x0a1")) 96 /* The SIMD version of Huffman encoding is slower than the C version on 97 Cavium ThunderX. Also, ld3 and st3 are abyssmally slow on that 98 CPU. */ 99 simd_huffman = simd_features = 0; 100 } 101 fclose(fd); 102 } 103 free(buffer); 104 return 1; 105 } 106 107 #endif 108 109 /* 110 * Check what SIMD accelerations are supported. 111 */ 112 113 /* 114 * Armv8 architectures support Neon extensions by default. 115 * It is no longer optional as it was with Armv7. 116 */ 117 118 119 LOCAL(void) 120 init_simd(void) 121 { 122 #ifndef NO_GETENV 123 char env[2] = { 0 }; 124 #endif 125 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) 126 int bufsize = 1024; /* an initial guess for the line buffer size limit */ 127 #endif 128 129 if (simd_support != ~0U) 130 return; 131 132 simd_support = 0; 133 134 simd_support |= JSIMD_NEON; 135 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) 136 while (!parse_proc_cpuinfo(bufsize)) { 137 bufsize *= 2; 138 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT) 139 break; 140 } 141 #endif 142 143 #ifndef NO_GETENV 144 /* Force different settings through environment variables */ 145 if (!GETENV_S(env, 2, "JSIMD_FORCENEON") && !strcmp(env, "1")) 146 simd_support = JSIMD_NEON; 147 if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1")) 148 simd_support = 0; 149 if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1")) 150 simd_huffman = 0; 151 if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "1")) 152 simd_features |= JSIMD_FASTLD3; 153 if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "0")) 154 simd_features &= ~JSIMD_FASTLD3; 155 if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "1")) 156 simd_features |= JSIMD_FASTST3; 157 if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "0")) 158 simd_features &= ~JSIMD_FASTST3; 159 #endif 160 } 161 162 GLOBAL(int) 163 jsimd_can_rgb_ycc(void) 164 { 165 init_simd(); 166 167 /* The code is optimised for these values only */ 168 if (BITS_IN_JSAMPLE != 8) 169 return 0; 170 if (sizeof(JDIMENSION) != 4) 171 return 0; 172 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 173 return 0; 174 175 if (simd_support & JSIMD_NEON) 176 return 1; 177 178 return 0; 179 } 180 181 GLOBAL(int) 182 jsimd_can_rgb_gray(void) 183 { 184 init_simd(); 185 186 /* The code is optimised for these values only */ 187 if (BITS_IN_JSAMPLE != 8) 188 return 0; 189 if (sizeof(JDIMENSION) != 4) 190 return 0; 191 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 192 return 0; 193 194 if (simd_support & JSIMD_NEON) 195 return 1; 196 197 return 0; 198 } 199 200 GLOBAL(int) 201 jsimd_can_ycc_rgb(void) 202 { 203 init_simd(); 204 205 /* The code is optimised for these values only */ 206 if (BITS_IN_JSAMPLE != 8) 207 return 0; 208 if (sizeof(JDIMENSION) != 4) 209 return 0; 210 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 211 return 0; 212 213 if (simd_support & JSIMD_NEON) 214 return 1; 215 216 return 0; 217 } 218 219 GLOBAL(int) 220 jsimd_can_ycc_rgb565(void) 221 { 222 init_simd(); 223 224 /* The code is optimised for these values only */ 225 if (BITS_IN_JSAMPLE != 8) 226 return 0; 227 if (sizeof(JDIMENSION) != 4) 228 return 0; 229 230 if (simd_support & JSIMD_NEON) 231 return 1; 232 233 return 0; 234 } 235 236 GLOBAL(void) 237 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, 238 JSAMPIMAGE output_buf, JDIMENSION output_row, 239 int num_rows) 240 { 241 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 242 243 switch (cinfo->in_color_space) { 244 case JCS_EXT_RGB: 245 #ifndef NEON_INTRINSICS 246 if (simd_features & JSIMD_FASTLD3) 247 #endif 248 neonfct = jsimd_extrgb_ycc_convert_neon; 249 #ifndef NEON_INTRINSICS 250 else 251 neonfct = jsimd_extrgb_ycc_convert_neon_slowld3; 252 #endif 253 break; 254 case JCS_EXT_RGBX: 255 case JCS_EXT_RGBA: 256 neonfct = jsimd_extrgbx_ycc_convert_neon; 257 break; 258 case JCS_EXT_BGR: 259 #ifndef NEON_INTRINSICS 260 if (simd_features & JSIMD_FASTLD3) 261 #endif 262 neonfct = jsimd_extbgr_ycc_convert_neon; 263 #ifndef NEON_INTRINSICS 264 else 265 neonfct = jsimd_extbgr_ycc_convert_neon_slowld3; 266 #endif 267 break; 268 case JCS_EXT_BGRX: 269 case JCS_EXT_BGRA: 270 neonfct = jsimd_extbgrx_ycc_convert_neon; 271 break; 272 case JCS_EXT_XBGR: 273 case JCS_EXT_ABGR: 274 neonfct = jsimd_extxbgr_ycc_convert_neon; 275 break; 276 case JCS_EXT_XRGB: 277 case JCS_EXT_ARGB: 278 neonfct = jsimd_extxrgb_ycc_convert_neon; 279 break; 280 default: 281 #ifndef NEON_INTRINSICS 282 if (simd_features & JSIMD_FASTLD3) 283 #endif 284 neonfct = jsimd_extrgb_ycc_convert_neon; 285 #ifndef NEON_INTRINSICS 286 else 287 neonfct = jsimd_extrgb_ycc_convert_neon_slowld3; 288 #endif 289 break; 290 } 291 292 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 293 } 294 295 GLOBAL(void) 296 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, 297 JSAMPIMAGE output_buf, JDIMENSION output_row, 298 int num_rows) 299 { 300 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 301 302 switch (cinfo->in_color_space) { 303 case JCS_EXT_RGB: 304 neonfct = jsimd_extrgb_gray_convert_neon; 305 break; 306 case JCS_EXT_RGBX: 307 case JCS_EXT_RGBA: 308 neonfct = jsimd_extrgbx_gray_convert_neon; 309 break; 310 case JCS_EXT_BGR: 311 neonfct = jsimd_extbgr_gray_convert_neon; 312 break; 313 case JCS_EXT_BGRX: 314 case JCS_EXT_BGRA: 315 neonfct = jsimd_extbgrx_gray_convert_neon; 316 break; 317 case JCS_EXT_XBGR: 318 case JCS_EXT_ABGR: 319 neonfct = jsimd_extxbgr_gray_convert_neon; 320 break; 321 case JCS_EXT_XRGB: 322 case JCS_EXT_ARGB: 323 neonfct = jsimd_extxrgb_gray_convert_neon; 324 break; 325 default: 326 neonfct = jsimd_extrgb_gray_convert_neon; 327 break; 328 } 329 330 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 331 } 332 333 GLOBAL(void) 334 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, 335 JDIMENSION input_row, JSAMPARRAY output_buf, 336 int num_rows) 337 { 338 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 339 340 switch (cinfo->out_color_space) { 341 case JCS_EXT_RGB: 342 #ifndef NEON_INTRINSICS 343 if (simd_features & JSIMD_FASTST3) 344 #endif 345 neonfct = jsimd_ycc_extrgb_convert_neon; 346 #ifndef NEON_INTRINSICS 347 else 348 neonfct = jsimd_ycc_extrgb_convert_neon_slowst3; 349 #endif 350 break; 351 case JCS_EXT_RGBX: 352 case JCS_EXT_RGBA: 353 neonfct = jsimd_ycc_extrgbx_convert_neon; 354 break; 355 case JCS_EXT_BGR: 356 #ifndef NEON_INTRINSICS 357 if (simd_features & JSIMD_FASTST3) 358 #endif 359 neonfct = jsimd_ycc_extbgr_convert_neon; 360 #ifndef NEON_INTRINSICS 361 else 362 neonfct = jsimd_ycc_extbgr_convert_neon_slowst3; 363 #endif 364 break; 365 case JCS_EXT_BGRX: 366 case JCS_EXT_BGRA: 367 neonfct = jsimd_ycc_extbgrx_convert_neon; 368 break; 369 case JCS_EXT_XBGR: 370 case JCS_EXT_ABGR: 371 neonfct = jsimd_ycc_extxbgr_convert_neon; 372 break; 373 case JCS_EXT_XRGB: 374 case JCS_EXT_ARGB: 375 neonfct = jsimd_ycc_extxrgb_convert_neon; 376 break; 377 default: 378 #ifndef NEON_INTRINSICS 379 if (simd_features & JSIMD_FASTST3) 380 #endif 381 neonfct = jsimd_ycc_extrgb_convert_neon; 382 #ifndef NEON_INTRINSICS 383 else 384 neonfct = jsimd_ycc_extrgb_convert_neon_slowst3; 385 #endif 386 break; 387 } 388 389 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); 390 } 391 392 GLOBAL(void) 393 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, 394 JDIMENSION input_row, JSAMPARRAY output_buf, 395 int num_rows) 396 { 397 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, 398 output_buf, num_rows); 399 } 400 401 GLOBAL(int) 402 jsimd_can_h2v2_downsample(void) 403 { 404 init_simd(); 405 406 /* The code is optimised for these values only */ 407 if (BITS_IN_JSAMPLE != 8) 408 return 0; 409 if (DCTSIZE != 8) 410 return 0; 411 if (sizeof(JDIMENSION) != 4) 412 return 0; 413 414 if (simd_support & JSIMD_NEON) 415 return 1; 416 417 return 0; 418 } 419 420 GLOBAL(int) 421 jsimd_can_h2v1_downsample(void) 422 { 423 init_simd(); 424 425 /* The code is optimised for these values only */ 426 if (BITS_IN_JSAMPLE != 8) 427 return 0; 428 if (DCTSIZE != 8) 429 return 0; 430 if (sizeof(JDIMENSION) != 4) 431 return 0; 432 433 if (simd_support & JSIMD_NEON) 434 return 1; 435 436 return 0; 437 } 438 439 GLOBAL(void) 440 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, 441 JSAMPARRAY input_data, JSAMPARRAY output_data) 442 { 443 jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor, 444 compptr->v_samp_factor, compptr->width_in_blocks, 445 input_data, output_data); 446 } 447 448 GLOBAL(void) 449 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, 450 JSAMPARRAY input_data, JSAMPARRAY output_data) 451 { 452 jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor, 453 compptr->v_samp_factor, compptr->width_in_blocks, 454 input_data, output_data); 455 } 456 457 GLOBAL(int) 458 jsimd_can_h2v2_upsample(void) 459 { 460 init_simd(); 461 462 /* The code is optimised for these values only */ 463 if (BITS_IN_JSAMPLE != 8) 464 return 0; 465 if (sizeof(JDIMENSION) != 4) 466 return 0; 467 468 if (simd_support & JSIMD_NEON) 469 return 1; 470 471 return 0; 472 } 473 474 GLOBAL(int) 475 jsimd_can_h2v1_upsample(void) 476 { 477 init_simd(); 478 479 /* The code is optimised for these values only */ 480 if (BITS_IN_JSAMPLE != 8) 481 return 0; 482 if (sizeof(JDIMENSION) != 4) 483 return 0; 484 if (simd_support & JSIMD_NEON) 485 return 1; 486 487 return 0; 488 } 489 490 GLOBAL(void) 491 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, 492 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) 493 { 494 jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width, 495 input_data, output_data_ptr); 496 } 497 498 GLOBAL(void) 499 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, 500 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) 501 { 502 jsimd_h2v1_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width, 503 input_data, output_data_ptr); 504 } 505 506 GLOBAL(int) 507 jsimd_can_h2v2_fancy_upsample(void) 508 { 509 init_simd(); 510 511 /* The code is optimised for these values only */ 512 if (BITS_IN_JSAMPLE != 8) 513 return 0; 514 if (sizeof(JDIMENSION) != 4) 515 return 0; 516 517 if (simd_support & JSIMD_NEON) 518 return 1; 519 520 return 0; 521 } 522 523 GLOBAL(int) 524 jsimd_can_h2v1_fancy_upsample(void) 525 { 526 init_simd(); 527 528 /* The code is optimised for these values only */ 529 if (BITS_IN_JSAMPLE != 8) 530 return 0; 531 if (sizeof(JDIMENSION) != 4) 532 return 0; 533 534 if (simd_support & JSIMD_NEON) 535 return 1; 536 537 return 0; 538 } 539 540 GLOBAL(int) 541 jsimd_can_h1v2_fancy_upsample(void) 542 { 543 init_simd(); 544 545 /* The code is optimised for these values only */ 546 if (BITS_IN_JSAMPLE != 8) 547 return 0; 548 if (sizeof(JDIMENSION) != 4) 549 return 0; 550 551 if (simd_support & JSIMD_NEON) 552 return 1; 553 554 return 0; 555 } 556 557 GLOBAL(void) 558 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, 559 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) 560 { 561 jsimd_h2v2_fancy_upsample_neon(cinfo->max_v_samp_factor, 562 compptr->downsampled_width, input_data, 563 output_data_ptr); 564 } 565 566 GLOBAL(void) 567 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, 568 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) 569 { 570 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor, 571 compptr->downsampled_width, input_data, 572 output_data_ptr); 573 } 574 575 GLOBAL(void) 576 jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, 577 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) 578 { 579 jsimd_h1v2_fancy_upsample_neon(cinfo->max_v_samp_factor, 580 compptr->downsampled_width, input_data, 581 output_data_ptr); 582 } 583 584 GLOBAL(int) 585 jsimd_can_h2v2_merged_upsample(void) 586 { 587 init_simd(); 588 589 /* The code is optimised for these values only */ 590 if (BITS_IN_JSAMPLE != 8) 591 return 0; 592 if (sizeof(JDIMENSION) != 4) 593 return 0; 594 595 if (simd_support & JSIMD_NEON) 596 return 1; 597 598 return 0; 599 } 600 601 GLOBAL(int) 602 jsimd_can_h2v1_merged_upsample(void) 603 { 604 init_simd(); 605 606 /* The code is optimised for these values only */ 607 if (BITS_IN_JSAMPLE != 8) 608 return 0; 609 if (sizeof(JDIMENSION) != 4) 610 return 0; 611 612 if (simd_support & JSIMD_NEON) 613 return 1; 614 615 return 0; 616 } 617 618 GLOBAL(void) 619 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, 620 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) 621 { 622 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 623 624 switch (cinfo->out_color_space) { 625 case JCS_EXT_RGB: 626 neonfct = jsimd_h2v2_extrgb_merged_upsample_neon; 627 break; 628 case JCS_EXT_RGBX: 629 case JCS_EXT_RGBA: 630 neonfct = jsimd_h2v2_extrgbx_merged_upsample_neon; 631 break; 632 case JCS_EXT_BGR: 633 neonfct = jsimd_h2v2_extbgr_merged_upsample_neon; 634 break; 635 case JCS_EXT_BGRX: 636 case JCS_EXT_BGRA: 637 neonfct = jsimd_h2v2_extbgrx_merged_upsample_neon; 638 break; 639 case JCS_EXT_XBGR: 640 case JCS_EXT_ABGR: 641 neonfct = jsimd_h2v2_extxbgr_merged_upsample_neon; 642 break; 643 case JCS_EXT_XRGB: 644 case JCS_EXT_ARGB: 645 neonfct = jsimd_h2v2_extxrgb_merged_upsample_neon; 646 break; 647 default: 648 neonfct = jsimd_h2v2_extrgb_merged_upsample_neon; 649 break; 650 } 651 652 neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 653 } 654 655 GLOBAL(void) 656 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, 657 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) 658 { 659 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 660 661 switch (cinfo->out_color_space) { 662 case JCS_EXT_RGB: 663 neonfct = jsimd_h2v1_extrgb_merged_upsample_neon; 664 break; 665 case JCS_EXT_RGBX: 666 case JCS_EXT_RGBA: 667 neonfct = jsimd_h2v1_extrgbx_merged_upsample_neon; 668 break; 669 case JCS_EXT_BGR: 670 neonfct = jsimd_h2v1_extbgr_merged_upsample_neon; 671 break; 672 case JCS_EXT_BGRX: 673 case JCS_EXT_BGRA: 674 neonfct = jsimd_h2v1_extbgrx_merged_upsample_neon; 675 break; 676 case JCS_EXT_XBGR: 677 case JCS_EXT_ABGR: 678 neonfct = jsimd_h2v1_extxbgr_merged_upsample_neon; 679 break; 680 case JCS_EXT_XRGB: 681 case JCS_EXT_ARGB: 682 neonfct = jsimd_h2v1_extxrgb_merged_upsample_neon; 683 break; 684 default: 685 neonfct = jsimd_h2v1_extrgb_merged_upsample_neon; 686 break; 687 } 688 689 neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); 690 } 691 692 GLOBAL(int) 693 jsimd_can_convsamp(void) 694 { 695 init_simd(); 696 697 /* The code is optimised for these values only */ 698 if (DCTSIZE != 8) 699 return 0; 700 if (BITS_IN_JSAMPLE != 8) 701 return 0; 702 if (sizeof(JDIMENSION) != 4) 703 return 0; 704 if (sizeof(DCTELEM) != 2) 705 return 0; 706 707 if (simd_support & JSIMD_NEON) 708 return 1; 709 710 return 0; 711 } 712 713 GLOBAL(int) 714 jsimd_can_convsamp_float(void) 715 { 716 return 0; 717 } 718 719 GLOBAL(void) 720 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, 721 DCTELEM *workspace) 722 { 723 jsimd_convsamp_neon(sample_data, start_col, workspace); 724 } 725 726 GLOBAL(void) 727 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, 728 FAST_FLOAT *workspace) 729 { 730 } 731 732 GLOBAL(int) 733 jsimd_can_fdct_islow(void) 734 { 735 init_simd(); 736 737 /* The code is optimised for these values only */ 738 if (DCTSIZE != 8) 739 return 0; 740 if (sizeof(DCTELEM) != 2) 741 return 0; 742 743 if (simd_support & JSIMD_NEON) 744 return 1; 745 746 return 0; 747 } 748 749 GLOBAL(int) 750 jsimd_can_fdct_ifast(void) 751 { 752 init_simd(); 753 754 /* The code is optimised for these values only */ 755 if (DCTSIZE != 8) 756 return 0; 757 if (sizeof(DCTELEM) != 2) 758 return 0; 759 760 if (simd_support & JSIMD_NEON) 761 return 1; 762 763 return 0; 764 } 765 766 GLOBAL(int) 767 jsimd_can_fdct_float(void) 768 { 769 return 0; 770 } 771 772 GLOBAL(void) 773 jsimd_fdct_islow(DCTELEM *data) 774 { 775 jsimd_fdct_islow_neon(data); 776 } 777 778 GLOBAL(void) 779 jsimd_fdct_ifast(DCTELEM *data) 780 { 781 jsimd_fdct_ifast_neon(data); 782 } 783 784 GLOBAL(void) 785 jsimd_fdct_float(FAST_FLOAT *data) 786 { 787 } 788 789 GLOBAL(int) 790 jsimd_can_quantize(void) 791 { 792 init_simd(); 793 794 /* The code is optimised for these values only */ 795 if (DCTSIZE != 8) 796 return 0; 797 if (sizeof(JCOEF) != 2) 798 return 0; 799 if (sizeof(DCTELEM) != 2) 800 return 0; 801 802 if (simd_support & JSIMD_NEON) 803 return 1; 804 805 return 0; 806 } 807 808 GLOBAL(int) 809 jsimd_can_quantize_float(void) 810 { 811 return 0; 812 } 813 814 GLOBAL(void) 815 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) 816 { 817 jsimd_quantize_neon(coef_block, divisors, workspace); 818 } 819 820 GLOBAL(void) 821 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, 822 FAST_FLOAT *workspace) 823 { 824 } 825 826 GLOBAL(int) 827 jsimd_can_idct_2x2(void) 828 { 829 init_simd(); 830 831 /* The code is optimised for these values only */ 832 if (DCTSIZE != 8) 833 return 0; 834 if (sizeof(JCOEF) != 2) 835 return 0; 836 if (BITS_IN_JSAMPLE != 8) 837 return 0; 838 if (sizeof(JDIMENSION) != 4) 839 return 0; 840 if (sizeof(ISLOW_MULT_TYPE) != 2) 841 return 0; 842 843 if (simd_support & JSIMD_NEON) 844 return 1; 845 846 return 0; 847 } 848 849 GLOBAL(int) 850 jsimd_can_idct_4x4(void) 851 { 852 init_simd(); 853 854 /* The code is optimised for these values only */ 855 if (DCTSIZE != 8) 856 return 0; 857 if (sizeof(JCOEF) != 2) 858 return 0; 859 if (BITS_IN_JSAMPLE != 8) 860 return 0; 861 if (sizeof(JDIMENSION) != 4) 862 return 0; 863 if (sizeof(ISLOW_MULT_TYPE) != 2) 864 return 0; 865 866 if (simd_support & JSIMD_NEON) 867 return 1; 868 869 return 0; 870 } 871 872 GLOBAL(void) 873 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, 874 JCOEFPTR coef_block, JSAMPARRAY output_buf, 875 JDIMENSION output_col) 876 { 877 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col); 878 } 879 880 GLOBAL(void) 881 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, 882 JCOEFPTR coef_block, JSAMPARRAY output_buf, 883 JDIMENSION output_col) 884 { 885 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col); 886 } 887 888 GLOBAL(int) 889 jsimd_can_idct_islow(void) 890 { 891 init_simd(); 892 893 /* The code is optimised for these values only */ 894 if (DCTSIZE != 8) 895 return 0; 896 if (sizeof(JCOEF) != 2) 897 return 0; 898 if (BITS_IN_JSAMPLE != 8) 899 return 0; 900 if (sizeof(JDIMENSION) != 4) 901 return 0; 902 if (sizeof(ISLOW_MULT_TYPE) != 2) 903 return 0; 904 905 if (simd_support & JSIMD_NEON) 906 return 1; 907 908 return 0; 909 } 910 911 GLOBAL(int) 912 jsimd_can_idct_ifast(void) 913 { 914 init_simd(); 915 916 /* The code is optimised for these values only */ 917 if (DCTSIZE != 8) 918 return 0; 919 if (sizeof(JCOEF) != 2) 920 return 0; 921 if (BITS_IN_JSAMPLE != 8) 922 return 0; 923 if (sizeof(JDIMENSION) != 4) 924 return 0; 925 if (sizeof(IFAST_MULT_TYPE) != 2) 926 return 0; 927 if (IFAST_SCALE_BITS != 2) 928 return 0; 929 930 if (simd_support & JSIMD_NEON) 931 return 1; 932 933 return 0; 934 } 935 936 GLOBAL(int) 937 jsimd_can_idct_float(void) 938 { 939 return 0; 940 } 941 942 GLOBAL(void) 943 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, 944 JCOEFPTR coef_block, JSAMPARRAY output_buf, 945 JDIMENSION output_col) 946 { 947 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, 948 output_col); 949 } 950 951 GLOBAL(void) 952 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, 953 JCOEFPTR coef_block, JSAMPARRAY output_buf, 954 JDIMENSION output_col) 955 { 956 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, 957 output_col); 958 } 959 960 GLOBAL(void) 961 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, 962 JCOEFPTR coef_block, JSAMPARRAY output_buf, 963 JDIMENSION output_col) 964 { 965 } 966 967 GLOBAL(int) 968 jsimd_can_huff_encode_one_block(void) 969 { 970 init_simd(); 971 972 if (DCTSIZE != 8) 973 return 0; 974 if (sizeof(JCOEF) != 2) 975 return 0; 976 977 if (simd_support & JSIMD_NEON && simd_huffman) 978 return 1; 979 980 return 0; 981 } 982 983 GLOBAL(JOCTET *) 984 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block, 985 int last_dc_val, c_derived_tbl *dctbl, 986 c_derived_tbl *actbl) 987 { 988 #ifndef NEON_INTRINSICS 989 if (simd_features & JSIMD_FASTTBL) 990 #endif 991 return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val, 992 dctbl, actbl); 993 #ifndef NEON_INTRINSICS 994 else 995 return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block, 996 last_dc_val, dctbl, actbl); 997 #endif 998 } 999 1000 GLOBAL(int) 1001 jsimd_can_encode_mcu_AC_first_prepare(void) 1002 { 1003 init_simd(); 1004 1005 if (DCTSIZE != 8) 1006 return 0; 1007 if (sizeof(JCOEF) != 2) 1008 return 0; 1009 if (SIZEOF_SIZE_T != 8) 1010 return 0; 1011 1012 if (simd_support & JSIMD_NEON) 1013 return 1; 1014 1015 return 0; 1016 } 1017 1018 GLOBAL(void) 1019 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, 1020 const int *jpeg_natural_order_start, int Sl, 1021 int Al, UJCOEF *values, size_t *zerobits) 1022 { 1023 jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start, 1024 Sl, Al, values, zerobits); 1025 } 1026 1027 GLOBAL(int) 1028 jsimd_can_encode_mcu_AC_refine_prepare(void) 1029 { 1030 init_simd(); 1031 1032 if (DCTSIZE != 8) 1033 return 0; 1034 if (sizeof(JCOEF) != 2) 1035 return 0; 1036 if (SIZEOF_SIZE_T != 8) 1037 return 0; 1038 1039 if (simd_support & JSIMD_NEON) 1040 return 1; 1041 1042 return 0; 1043 } 1044 1045 GLOBAL(int) 1046 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, 1047 const int *jpeg_natural_order_start, int Sl, 1048 int Al, UJCOEF *absvalues, size_t *bits) 1049 { 1050 return jsimd_encode_mcu_AC_refine_prepare_neon(block, 1051 jpeg_natural_order_start, 1052 Sl, Al, absvalues, bits); 1053 }