tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

jcsample-altivec.c (5617B)


      1 /*
      2 * AltiVec optimizations for libjpeg-turbo
      3 *
      4 * Copyright (C) 2015, D. R. Commander.  All Rights Reserved.
      5 *
      6 * This software is provided 'as-is', without any express or implied
      7 * warranty.  In no event will the authors be held liable for any damages
      8 * arising from the use of this software.
      9 *
     10 * Permission is granted to anyone to use this software for any purpose,
     11 * including commercial applications, and to alter it and redistribute it
     12 * freely, subject to the following restrictions:
     13 *
     14 * 1. The origin of this software must not be misrepresented; you must not
     15 *    claim that you wrote the original software. If you use this software
     16 *    in a product, an acknowledgment in the product documentation would be
     17 *    appreciated but is not required.
     18 * 2. Altered source versions must be plainly marked as such, and must not be
     19 *    misrepresented as being the original software.
     20 * 3. This notice may not be removed or altered from any source distribution.
     21 */
     22 
     23 /* CHROMA DOWNSAMPLING */
     24 
     25 #include "jsimd_altivec.h"
     26 #include "jcsample.h"
     27 
     28 
     29 void jsimd_h2v1_downsample_altivec(JDIMENSION image_width,
     30                                   int max_v_samp_factor,
     31                                   JDIMENSION v_samp_factor,
     32                                   JDIMENSION width_in_blocks,
     33                                   JSAMPARRAY input_data,
     34                                   JSAMPARRAY output_data)
     35 {
     36  int outrow, outcol;
     37  JDIMENSION output_cols = width_in_blocks * DCTSIZE;
     38  JSAMPROW inptr, outptr;
     39 
     40  __vector unsigned char this0, next0, out;
     41  __vector unsigned short this0e, this0o, next0e, next0o, outl, outh;
     42 
     43  /* Constants */
     44  __vector unsigned short pw_bias = { __4X2(0, 1) },
     45    pw_one = { __8X(1) };
     46  __vector unsigned char even_odd_index =
     47    {  0,  2,  4,  6,  8, 10, 12, 14,  1,  3,  5,  7,  9, 11, 13, 15 },
     48    pb_zero = { __16X(0) };
     49 
     50  expand_right_edge(input_data, max_v_samp_factor, image_width,
     51                    output_cols * 2);
     52 
     53  for (outrow = 0; outrow < v_samp_factor; outrow++) {
     54    outptr = output_data[outrow];
     55    inptr = input_data[outrow];
     56 
     57    for (outcol = output_cols; outcol > 0;
     58         outcol -= 16, inptr += 32, outptr += 16) {
     59 
     60      this0 = vec_ld(0, inptr);
     61      this0 = vec_perm(this0, this0, even_odd_index);
     62      this0e = (__vector unsigned short)VEC_UNPACKHU(this0);
     63      this0o = (__vector unsigned short)VEC_UNPACKLU(this0);
     64      outl = vec_add(this0e, this0o);
     65      outl = vec_add(outl, pw_bias);
     66      outl = vec_sr(outl, pw_one);
     67 
     68      if (outcol > 8) {
     69        next0 = vec_ld(16, inptr);
     70        next0 = vec_perm(next0, next0, even_odd_index);
     71        next0e = (__vector unsigned short)VEC_UNPACKHU(next0);
     72        next0o = (__vector unsigned short)VEC_UNPACKLU(next0);
     73        outh = vec_add(next0e, next0o);
     74        outh = vec_add(outh, pw_bias);
     75        outh = vec_sr(outh, pw_one);
     76      } else
     77        outh = vec_splat_u16(0);
     78 
     79      out = vec_pack(outl, outh);
     80      vec_st(out, 0, outptr);
     81    }
     82  }
     83 }
     84 
     85 
     86 void
     87 jsimd_h2v2_downsample_altivec(JDIMENSION image_width, int max_v_samp_factor,
     88                              JDIMENSION v_samp_factor,
     89                              JDIMENSION width_in_blocks,
     90                              JSAMPARRAY input_data, JSAMPARRAY output_data)
     91 {
     92  int inrow, outrow, outcol;
     93  JDIMENSION output_cols = width_in_blocks * DCTSIZE;
     94  JSAMPROW inptr0, inptr1, outptr;
     95 
     96  __vector unsigned char this0, next0, this1, next1, out;
     97  __vector unsigned short this0e, this0o, next0e, next0o, this1e, this1o,
     98    next1e, next1o, out0l, out0h, out1l, out1h, outl, outh;
     99 
    100  /* Constants */
    101  __vector unsigned short pw_bias = { __4X2(1, 2) },
    102    pw_two = { __8X(2) };
    103  __vector unsigned char even_odd_index =
    104    {  0,  2,  4,  6,  8, 10, 12, 14,  1,  3,  5,  7,  9, 11, 13, 15 },
    105    pb_zero = { __16X(0) };
    106 
    107  expand_right_edge(input_data, max_v_samp_factor, image_width,
    108                    output_cols * 2);
    109 
    110  for (inrow = 0, outrow = 0; outrow < v_samp_factor;
    111       inrow += 2, outrow++) {
    112 
    113    inptr0 = input_data[inrow];
    114    inptr1 = input_data[inrow + 1];
    115    outptr = output_data[outrow];
    116 
    117    for (outcol = output_cols; outcol > 0;
    118         outcol -= 16, inptr0 += 32, inptr1 += 32, outptr += 16) {
    119 
    120      this0 = vec_ld(0, inptr0);
    121      this0 = vec_perm(this0, this0, even_odd_index);
    122      this0e = (__vector unsigned short)VEC_UNPACKHU(this0);
    123      this0o = (__vector unsigned short)VEC_UNPACKLU(this0);
    124      out0l = vec_add(this0e, this0o);
    125 
    126      this1 = vec_ld(0, inptr1);
    127      this1 = vec_perm(this1, this1, even_odd_index);
    128      this1e = (__vector unsigned short)VEC_UNPACKHU(this1);
    129      this1o = (__vector unsigned short)VEC_UNPACKLU(this1);
    130      out1l = vec_add(this1e, this1o);
    131 
    132      outl = vec_add(out0l, out1l);
    133      outl = vec_add(outl, pw_bias);
    134      outl = vec_sr(outl, pw_two);
    135 
    136      if (outcol > 8) {
    137        next0 = vec_ld(16, inptr0);
    138        next0 = vec_perm(next0, next0, even_odd_index);
    139        next0e = (__vector unsigned short)VEC_UNPACKHU(next0);
    140        next0o = (__vector unsigned short)VEC_UNPACKLU(next0);
    141        out0h = vec_add(next0e, next0o);
    142 
    143        next1 = vec_ld(16, inptr1);
    144        next1 = vec_perm(next1, next1, even_odd_index);
    145        next1e = (__vector unsigned short)VEC_UNPACKHU(next1);
    146        next1o = (__vector unsigned short)VEC_UNPACKLU(next1);
    147        out1h = vec_add(next1e, next1o);
    148 
    149        outh = vec_add(out0h, out1h);
    150        outh = vec_add(outh, pw_bias);
    151        outh = vec_sr(outh, pw_two);
    152      } else
    153        outh = vec_splat_u16(0);
    154 
    155      out = vec_pack(outl, outh);
    156      vec_st(out, 0, outptr);
    157    }
    158  }
    159 }