tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

jfdctfst.c (11232B)


      1 /*
      2 * This file is part of the Independent JPEG Group's software.
      3 *
      4 * The authors make NO WARRANTY or representation, either express or implied,
      5 * with respect to this software, its quality, accuracy, merchantability, or
      6 * fitness for a particular purpose.  This software is provided "AS IS", and
      7 * you, its user, assume the entire risk as to its quality and accuracy.
      8 *
      9 * This software is copyright (C) 1994-1996, Thomas G. Lane.
     10 * All Rights Reserved except as specified below.
     11 *
     12 * Permission is hereby granted to use, copy, modify, and distribute this
     13 * software (or portions thereof) for any purpose, without fee, subject to
     14 * these conditions:
     15 * (1) If any part of the source code for this software is distributed, then
     16 * this README file must be included, with this copyright and no-warranty
     17 * notice unaltered; and any additions, deletions, or changes to the original
     18 * files must be clearly indicated in accompanying documentation.
     19 * (2) If only executable code is distributed, then the accompanying
     20 * documentation must state that "this software is based in part on the work
     21 * of the Independent JPEG Group".
     22 * (3) Permission for use of this software is granted only if the user accepts
     23 * full responsibility for any undesirable consequences; the authors accept
     24 * NO LIABILITY for damages of any kind.
     25 *
     26 * These conditions apply to any software derived from or based on the IJG
     27 * code, not just to the unmodified library.  If you use our work, you ought
     28 * to acknowledge us.
     29 *
     30 * Permission is NOT granted for the use of any IJG author's name or company
     31 * name in advertising or publicity relating to this software or products
     32 * derived from it.  This software may be referred to only as "the Independent
     33 * JPEG Group's software".
     34 *
     35 * We specifically permit and encourage the use of this software as the basis
     36 * of commercial products, provided that all warranty or liability claims are
     37 * assumed by the product vendor.
     38 *
     39 * This file contains a fast, not so accurate integer implementation of the
     40 * forward DCT (Discrete Cosine Transform).
     41 *
     42 * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
     43 * on each column.  Direct algorithms are also available, but they are
     44 * much more complex and seem not to be any faster when reduced to code.
     45 *
     46 * This implementation is based on Arai, Agui, and Nakajima's algorithm for
     47 * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
     48 * Japanese, but the algorithm is described in the Pennebaker & Mitchell
     49 * JPEG textbook (see REFERENCES section in file README).  The following code
     50 * is based directly on figure 4-8 in P&M.
     51 * While an 8-point DCT cannot be done in less than 11 multiplies, it is
     52 * possible to arrange the computation so that many of the multiplies are
     53 * simple scalings of the final outputs.  These multiplies can then be
     54 * folded into the multiplications or divisions by the JPEG quantization
     55 * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
     56 * to be done in the DCT itself.
     57 * The primary disadvantage of this method is that with fixed-point math,
     58 * accuracy is lost due to imprecise representation of the scaled
     59 * quantization values.  The smaller the quantization table entry, the less
     60 * precise the scaled value, so this implementation does worse with high-
     61 * quality-setting files than with low-quality ones.
     62 */
     63 
     64 /**
     65 * @file
     66 * Independent JPEG Group's fast AAN dct.
     67 */
     68 
     69 #include <stdint.h>
     70 #include "libavutil/attributes.h"
     71 #include "fdctdsp.h"
     72 
     73 #define DCTSIZE 8
     74 #define GLOBAL(x) x
     75 #define RIGHT_SHIFT(x, n) ((x) >> (n))
     76 
     77 /*
     78 * This module is specialized to the case DCTSIZE = 8.
     79 */
     80 
     81 #if DCTSIZE != 8
     82  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
     83 #endif
     84 
     85 
     86 /* Scaling decisions are generally the same as in the LL&M algorithm;
     87 * see jfdctint.c for more details.  However, we choose to descale
     88 * (right shift) multiplication products as soon as they are formed,
     89 * rather than carrying additional fractional bits into subsequent additions.
     90 * This compromises accuracy slightly, but it lets us save a few shifts.
     91 * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
     92 * everywhere except in the multiplications proper; this saves a good deal
     93 * of work on 16-bit-int machines.
     94 *
     95 * Again to save a few shifts, the intermediate results between pass 1 and
     96 * pass 2 are not upscaled, but are represented only to integral precision.
     97 *
     98 * A final compromise is to represent the multiplicative constants to only
     99 * 8 fractional bits, rather than 13.  This saves some shifting work on some
    100 * machines, and may also reduce the cost of multiplication (since there
    101 * are fewer one-bits in the constants).
    102 */
    103 
    104 #define CONST_BITS  8
    105 
    106 
    107 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
    108 * causing a lot of useless floating-point operations at run time.
    109 * To get around this we use the following pre-calculated constants.
    110 * If you change CONST_BITS you may want to add appropriate values.
    111 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
    112 */
    113 
    114 #if CONST_BITS == 8
    115 #define FIX_0_382683433  ((int32_t)   98)       /* FIX(0.382683433) */
    116 #define FIX_0_541196100  ((int32_t)  139)       /* FIX(0.541196100) */
    117 #define FIX_0_707106781  ((int32_t)  181)       /* FIX(0.707106781) */
    118 #define FIX_1_306562965  ((int32_t)  334)       /* FIX(1.306562965) */
    119 #else
    120 #define FIX_0_382683433  FIX(0.382683433)
    121 #define FIX_0_541196100  FIX(0.541196100)
    122 #define FIX_0_707106781  FIX(0.707106781)
    123 #define FIX_1_306562965  FIX(1.306562965)
    124 #endif
    125 
    126 
    127 /* We can gain a little more speed, with a further compromise in accuracy,
    128 * by omitting the addition in a descaling shift.  This yields an incorrectly
    129 * rounded result half the time...
    130 */
    131 
    132 #ifndef USE_ACCURATE_ROUNDING
    133 #undef DESCALE
    134 #define DESCALE(x,n)  RIGHT_SHIFT(x, n)
    135 #endif
    136 
    137 
    138 /* Multiply a int16_t variable by an int32_t constant, and immediately
    139 * descale to yield a int16_t result.
    140 */
    141 
    142 #define MULTIPLY(var,const)  ((int16_t) DESCALE((var) * (const), CONST_BITS))
    143 
    144 static av_always_inline void row_fdct(int16_t * data){
    145  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    146  int tmp10, tmp11, tmp12, tmp13;
    147  int z1, z2, z3, z4, z5, z11, z13;
    148  int16_t *dataptr;
    149  int ctr;
    150 
    151  /* Pass 1: process rows. */
    152 
    153  dataptr = data;
    154  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
    155    tmp0 = dataptr[0] + dataptr[7];
    156    tmp7 = dataptr[0] - dataptr[7];
    157    tmp1 = dataptr[1] + dataptr[6];
    158    tmp6 = dataptr[1] - dataptr[6];
    159    tmp2 = dataptr[2] + dataptr[5];
    160    tmp5 = dataptr[2] - dataptr[5];
    161    tmp3 = dataptr[3] + dataptr[4];
    162    tmp4 = dataptr[3] - dataptr[4];
    163 
    164    /* Even part */
    165 
    166    tmp10 = tmp0 + tmp3;        /* phase 2 */
    167    tmp13 = tmp0 - tmp3;
    168    tmp11 = tmp1 + tmp2;
    169    tmp12 = tmp1 - tmp2;
    170 
    171    dataptr[0] = tmp10 + tmp11; /* phase 3 */
    172    dataptr[4] = tmp10 - tmp11;
    173 
    174    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
    175    dataptr[2] = tmp13 + z1;    /* phase 5 */
    176    dataptr[6] = tmp13 - z1;
    177 
    178    /* Odd part */
    179 
    180    tmp10 = tmp4 + tmp5;        /* phase 2 */
    181    tmp11 = tmp5 + tmp6;
    182    tmp12 = tmp6 + tmp7;
    183 
    184    /* The rotator is modified from fig 4-8 to avoid extra negations. */
    185    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
    186    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;    /* c2-c6 */
    187    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;    /* c2+c6 */
    188    z3 = MULTIPLY(tmp11, FIX_0_707106781);         /* c4 */
    189 
    190    z11 = tmp7 + z3;            /* phase 5 */
    191    z13 = tmp7 - z3;
    192 
    193    dataptr[5] = z13 + z2;      /* phase 6 */
    194    dataptr[3] = z13 - z2;
    195    dataptr[1] = z11 + z4;
    196    dataptr[7] = z11 - z4;
    197 
    198    dataptr += DCTSIZE;         /* advance pointer to next row */
    199  }
    200 }
    201 
    202 /*
    203 * Perform the forward DCT on one block of samples.
    204 */
    205 
    206 GLOBAL(void)
    207 ff_fdct_ifast (int16_t * data)
    208 {
    209  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    210  int tmp10, tmp11, tmp12, tmp13;
    211  int z1, z2, z3, z4, z5, z11, z13;
    212  int16_t *dataptr;
    213  int ctr;
    214 
    215  row_fdct(data);
    216 
    217  /* Pass 2: process columns. */
    218 
    219  dataptr = data;
    220  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
    221    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
    222    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
    223    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
    224    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
    225    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
    226    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
    227    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
    228    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
    229 
    230    /* Even part */
    231 
    232    tmp10 = tmp0 + tmp3;        /* phase 2 */
    233    tmp13 = tmp0 - tmp3;
    234    tmp11 = tmp1 + tmp2;
    235    tmp12 = tmp1 - tmp2;
    236 
    237    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
    238    dataptr[DCTSIZE*4] = tmp10 - tmp11;
    239 
    240    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
    241    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
    242    dataptr[DCTSIZE*6] = tmp13 - z1;
    243 
    244    /* Odd part */
    245 
    246    tmp10 = tmp4 + tmp5;        /* phase 2 */
    247    tmp11 = tmp5 + tmp6;
    248    tmp12 = tmp6 + tmp7;
    249 
    250    /* The rotator is modified from fig 4-8 to avoid extra negations. */
    251    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
    252    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
    253    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
    254    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
    255 
    256    z11 = tmp7 + z3;            /* phase 5 */
    257    z13 = tmp7 - z3;
    258 
    259    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
    260    dataptr[DCTSIZE*3] = z13 - z2;
    261    dataptr[DCTSIZE*1] = z11 + z4;
    262    dataptr[DCTSIZE*7] = z11 - z4;
    263 
    264    dataptr++;                  /* advance pointer to next column */
    265  }
    266 }
    267 
    268 /*
    269 * Perform the forward 2-4-8 DCT on one block of samples.
    270 */
    271 
    272 GLOBAL(void)
    273 ff_fdct_ifast248 (int16_t * data)
    274 {
    275  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    276  int tmp10, tmp11, tmp12, tmp13;
    277  int z1;
    278  int16_t *dataptr;
    279  int ctr;
    280 
    281  row_fdct(data);
    282 
    283  /* Pass 2: process columns. */
    284 
    285  dataptr = data;
    286  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
    287    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
    288    tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
    289    tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
    290    tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
    291    tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
    292    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
    293    tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
    294    tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
    295 
    296    /* Even part */
    297 
    298    tmp10 = tmp0 + tmp3;
    299    tmp11 = tmp1 + tmp2;
    300    tmp12 = tmp1 - tmp2;
    301    tmp13 = tmp0 - tmp3;
    302 
    303    dataptr[DCTSIZE*0] = tmp10 + tmp11;
    304    dataptr[DCTSIZE*4] = tmp10 - tmp11;
    305 
    306    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
    307    dataptr[DCTSIZE*2] = tmp13 + z1;
    308    dataptr[DCTSIZE*6] = tmp13 - z1;
    309 
    310    tmp10 = tmp4 + tmp7;
    311    tmp11 = tmp5 + tmp6;
    312    tmp12 = tmp5 - tmp6;
    313    tmp13 = tmp4 - tmp7;
    314 
    315    dataptr[DCTSIZE*1] = tmp10 + tmp11;
    316    dataptr[DCTSIZE*5] = tmp10 - tmp11;
    317 
    318    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
    319    dataptr[DCTSIZE*3] = tmp13 + z1;
    320    dataptr[DCTSIZE*7] = tmp13 - z1;
    321 
    322    dataptr++;                        /* advance pointer to next column */
    323  }
    324 }
    325 
    326 
    327 #undef GLOBAL
    328 #undef CONST_BITS
    329 #undef DESCALE
    330 #undef FIX_0_541196100
    331 #undef FIX_1_306562965