tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rescaler_mips_dsp_r2.c (16016B)


      1 // Copyright 2014 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // MIPS version of rescaling functions
     11 //
     12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
     13 
     14 #include "src/dsp/dsp.h"
     15 
     16 #if defined(WEBP_USE_MIPS_DSP_R2) && !defined(WEBP_REDUCE_SIZE)
     17 
     18 #include <assert.h>
     19 #include "src/utils/rescaler_utils.h"
     20 
     21 #define ROUNDER (WEBP_RESCALER_ONE >> 1)
     22 #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
     23 #define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)
     24 
     25 //------------------------------------------------------------------------------
     26 // Row export
     27 
     28 #if 0  // disabled for now. TODO(skal): make match the C-code
     29 static void ExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
     30  int i;
     31  const int x_out_max = wrk->dst_width * wrk->num_channels;
     32  uint8_t* dst = wrk->dst;
     33  rescaler_t* irow = wrk->irow;
     34  const rescaler_t* frow = wrk->frow;
     35  const int yscale = wrk->fy_scale * (-wrk->y_accum);
     36  int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
     37  const int temp7 = (int)wrk->fxy_scale;
     38  const int temp6 = (x_out_max & ~0x3) << 2;
     39  assert(!WebPRescalerOutputDone(wrk));
     40  assert(wrk->y_accum <= 0);
     41  assert(!wrk->y_expand);
     42  assert(wrk->fxy_scale != 0);
     43  if (yscale) {
     44    if (x_out_max >= 4) {
     45      int temp8, temp9, temp10, temp11;
     46      __asm__ volatile (
     47        "li       %[temp3],    0x10000                    \n\t"
     48        "li       %[temp4],    0x8000                     \n\t"
     49        "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
     50      "1:                                                 \n\t"
     51        "lw       %[temp0],    0(%[frow])                 \n\t"
     52        "lw       %[temp1],    4(%[frow])                 \n\t"
     53        "lw       %[temp2],    8(%[frow])                 \n\t"
     54        "lw       %[temp5],    12(%[frow])                \n\t"
     55        "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
     56        "maddu    $ac0,        %[temp0],    %[yscale]     \n\t"
     57        "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
     58        "maddu    $ac1,        %[temp1],    %[yscale]     \n\t"
     59        "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
     60        "maddu    $ac2,        %[temp2],    %[yscale]     \n\t"
     61        "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
     62        "maddu    $ac3,        %[temp5],    %[yscale]     \n\t"
     63        "addiu    %[frow],     %[frow],     16            \n\t"
     64        "mfhi     %[temp0],    $ac0                       \n\t"
     65        "mfhi     %[temp1],    $ac1                       \n\t"
     66        "mfhi     %[temp2],    $ac2                       \n\t"
     67        "mfhi     %[temp5],    $ac3                       \n\t"
     68        "lw       %[temp8],    0(%[irow])                 \n\t"
     69        "lw       %[temp9],    4(%[irow])                 \n\t"
     70        "lw       %[temp10],   8(%[irow])                 \n\t"
     71        "lw       %[temp11],   12(%[irow])                \n\t"
     72        "addiu    %[dst],      %[dst],      4             \n\t"
     73        "addiu    %[irow],     %[irow],     16            \n\t"
     74        "subu     %[temp8],    %[temp8],    %[temp0]      \n\t"
     75        "subu     %[temp9],    %[temp9],    %[temp1]      \n\t"
     76        "subu     %[temp10],   %[temp10],   %[temp2]      \n\t"
     77        "subu     %[temp11],   %[temp11],   %[temp5]      \n\t"
     78        "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
     79        "maddu    $ac0,        %[temp8],    %[temp7]      \n\t"
     80        "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
     81        "maddu    $ac1,        %[temp9],    %[temp7]      \n\t"
     82        "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
     83        "maddu    $ac2,        %[temp10],   %[temp7]      \n\t"
     84        "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
     85        "maddu    $ac3,        %[temp11],   %[temp7]      \n\t"
     86        "mfhi     %[temp8],    $ac0                       \n\t"
     87        "mfhi     %[temp9],    $ac1                       \n\t"
     88        "mfhi     %[temp10],   $ac2                       \n\t"
     89        "mfhi     %[temp11],   $ac3                       \n\t"
     90        "sw       %[temp0],    -16(%[irow])               \n\t"
     91        "sw       %[temp1],    -12(%[irow])               \n\t"
     92        "sw       %[temp2],    -8(%[irow])                \n\t"
     93        "sw       %[temp5],    -4(%[irow])                \n\t"
     94        "sb       %[temp8],    -4(%[dst])                 \n\t"
     95        "sb       %[temp9],    -3(%[dst])                 \n\t"
     96        "sb       %[temp10],   -2(%[dst])                 \n\t"
     97        "sb       %[temp11],   -1(%[dst])                 \n\t"
     98        "bne      %[frow],     %[loop_end], 1b            \n\t"
     99        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
    100          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
    101          [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
    102          [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
    103          [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
    104        : [temp7]"r"(temp7), [yscale]"r"(yscale), [temp6]"r"(temp6)
    105        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
    106          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
    107      );
    108    }
    109    for (i = 0; i < (x_out_max & 0x3); ++i) {
    110      const uint32_t frac = (uint32_t)MULT_FIX_FLOOR(*frow++, yscale);
    111      const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale);
    112      *dst++ = (v > 255) ? 255u : (uint8_t)v;
    113      *irow++ = frac;   // new fractional start
    114    }
    115  } else {
    116    if (x_out_max >= 4) {
    117      __asm__ volatile (
    118        "li       %[temp3],    0x10000                    \n\t"
    119        "li       %[temp4],    0x8000                     \n\t"
    120        "addu     %[loop_end], %[irow],     %[temp6]      \n\t"
    121      "1:                                                 \n\t"
    122        "lw       %[temp0],    0(%[irow])                 \n\t"
    123        "lw       %[temp1],    4(%[irow])                 \n\t"
    124        "lw       %[temp2],    8(%[irow])                 \n\t"
    125        "lw       %[temp5],    12(%[irow])                \n\t"
    126        "addiu    %[dst],      %[dst],      4             \n\t"
    127        "addiu    %[irow],     %[irow],     16            \n\t"
    128        "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
    129        "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
    130        "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
    131        "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
    132        "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
    133        "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
    134        "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
    135        "maddu    $ac3,        %[temp5],    %[temp7]      \n\t"
    136        "mfhi     %[temp0],    $ac0                       \n\t"
    137        "mfhi     %[temp1],    $ac1                       \n\t"
    138        "mfhi     %[temp2],    $ac2                       \n\t"
    139        "mfhi     %[temp5],    $ac3                       \n\t"
    140        "sw       $zero,       -16(%[irow])               \n\t"
    141        "sw       $zero,       -12(%[irow])               \n\t"
    142        "sw       $zero,       -8(%[irow])                \n\t"
    143        "sw       $zero,       -4(%[irow])                \n\t"
    144        "sb       %[temp0],    -4(%[dst])                 \n\t"
    145        "sb       %[temp1],    -3(%[dst])                 \n\t"
    146        "sb       %[temp2],    -2(%[dst])                 \n\t"
    147        "sb       %[temp5],    -1(%[dst])                 \n\t"
    148        "bne      %[irow],     %[loop_end], 1b            \n\t"
    149        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
    150          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
    151          [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
    152        : [temp7]"r"(temp7), [temp6]"r"(temp6)
    153        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
    154          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
    155      );
    156    }
    157    for (i = 0; i < (x_out_max & 0x3); ++i) {
    158      const int v = (int)MULT_FIX_FLOOR(*irow, wrk->fxy_scale);
    159      *dst++ = (v > 255) ? 255u : (uint8_t)v;
    160      *irow++ = 0;
    161    }
    162  }
    163 }
    164 #endif  // 0
    165 
    166 static void ExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
    167  int i;
    168  uint8_t* dst = wrk->dst;
    169  rescaler_t* irow = wrk->irow;
    170  const int x_out_max = wrk->dst_width * wrk->num_channels;
    171  const rescaler_t* frow = wrk->frow;
    172  int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
    173  const int temp6 = (x_out_max & ~0x3) << 2;
    174  const int temp7 = (int)wrk->fy_scale;
    175  assert(!WebPRescalerOutputDone(wrk));
    176  assert(wrk->y_accum <= 0);
    177  assert(wrk->y_expand);
    178  assert(wrk->y_sub != 0);
    179  if (wrk->y_accum == 0) {
    180    if (x_out_max >= 4) {
    181      __asm__ volatile (
    182        "li       %[temp4],    0x10000                    \n\t"
    183        "li       %[temp5],    0x8000                     \n\t"
    184        "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
    185      "1:                                                 \n\t"
    186        "lw       %[temp0],    0(%[frow])                 \n\t"
    187        "lw       %[temp1],    4(%[frow])                 \n\t"
    188        "lw       %[temp2],    8(%[frow])                 \n\t"
    189        "lw       %[temp3],    12(%[frow])                \n\t"
    190        "addiu    %[dst],      %[dst],      4             \n\t"
    191        "addiu    %[frow],     %[frow],     16            \n\t"
    192        "mult     $ac0,        %[temp4],    %[temp5]      \n\t"
    193        "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
    194        "mult     $ac1,        %[temp4],    %[temp5]      \n\t"
    195        "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
    196        "mult     $ac2,        %[temp4],    %[temp5]      \n\t"
    197        "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
    198        "mult     $ac3,        %[temp4],    %[temp5]      \n\t"
    199        "maddu    $ac3,        %[temp3],    %[temp7]      \n\t"
    200        "mfhi     %[temp0],    $ac0                       \n\t"
    201        "mfhi     %[temp1],    $ac1                       \n\t"
    202        "mfhi     %[temp2],    $ac2                       \n\t"
    203        "mfhi     %[temp3],    $ac3                       \n\t"
    204        "sb       %[temp0],    -4(%[dst])                 \n\t"
    205        "sb       %[temp1],    -3(%[dst])                 \n\t"
    206        "sb       %[temp2],    -2(%[dst])                 \n\t"
    207        "sb       %[temp3],    -1(%[dst])                 \n\t"
    208        "bne      %[frow],     %[loop_end], 1b            \n\t"
    209        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
    210          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
    211          [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
    212        : [temp7]"r"(temp7), [temp6]"r"(temp6)
    213        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
    214          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
    215      );
    216    }
    217    for (i = 0; i < (x_out_max & 0x3); ++i) {
    218      const uint32_t J = *frow++;
    219      const int v = (int)MULT_FIX(J, wrk->fy_scale);
    220      *dst++ = (v > 255) ? 255u : (uint8_t)v;
    221    }
    222  } else {
    223    const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
    224    const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
    225    if (x_out_max >= 4) {
    226      int temp8, temp9, temp10, temp11;
    227      __asm__ volatile (
    228        "li       %[temp8],    0x10000                    \n\t"
    229        "li       %[temp9],    0x8000                     \n\t"
    230        "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
    231      "1:                                                 \n\t"
    232        "lw       %[temp0],    0(%[frow])                 \n\t"
    233        "lw       %[temp1],    4(%[frow])                 \n\t"
    234        "lw       %[temp2],    8(%[frow])                 \n\t"
    235        "lw       %[temp3],    12(%[frow])                \n\t"
    236        "lw       %[temp4],    0(%[irow])                 \n\t"
    237        "lw       %[temp5],    4(%[irow])                 \n\t"
    238        "lw       %[temp10],   8(%[irow])                 \n\t"
    239        "lw       %[temp11],   12(%[irow])                \n\t"
    240        "addiu    %[dst],      %[dst],      4             \n\t"
    241        "mult     $ac0,        %[temp8],    %[temp9]      \n\t"
    242        "maddu    $ac0,        %[A],        %[temp0]      \n\t"
    243        "maddu    $ac0,        %[B],        %[temp4]      \n\t"
    244        "mult     $ac1,        %[temp8],    %[temp9]      \n\t"
    245        "maddu    $ac1,        %[A],        %[temp1]      \n\t"
    246        "maddu    $ac1,        %[B],        %[temp5]      \n\t"
    247        "mult     $ac2,        %[temp8],    %[temp9]      \n\t"
    248        "maddu    $ac2,        %[A],        %[temp2]      \n\t"
    249        "maddu    $ac2,        %[B],        %[temp10]     \n\t"
    250        "mult     $ac3,        %[temp8],    %[temp9]      \n\t"
    251        "maddu    $ac3,        %[A],        %[temp3]      \n\t"
    252        "maddu    $ac3,        %[B],        %[temp11]     \n\t"
    253        "addiu    %[frow],     %[frow],     16            \n\t"
    254        "addiu    %[irow],     %[irow],     16            \n\t"
    255        "mfhi     %[temp0],    $ac0                       \n\t"
    256        "mfhi     %[temp1],    $ac1                       \n\t"
    257        "mfhi     %[temp2],    $ac2                       \n\t"
    258        "mfhi     %[temp3],    $ac3                       \n\t"
    259        "mult     $ac0,        %[temp8],    %[temp9]      \n\t"
    260        "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
    261        "mult     $ac1,        %[temp8],    %[temp9]      \n\t"
    262        "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
    263        "mult     $ac2,        %[temp8],    %[temp9]      \n\t"
    264        "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
    265        "mult     $ac3,        %[temp8],    %[temp9]      \n\t"
    266        "maddu    $ac3,        %[temp3],    %[temp7]      \n\t"
    267        "mfhi     %[temp0],    $ac0                       \n\t"
    268        "mfhi     %[temp1],    $ac1                       \n\t"
    269        "mfhi     %[temp2],    $ac2                       \n\t"
    270        "mfhi     %[temp3],    $ac3                       \n\t"
    271        "sb       %[temp0],    -4(%[dst])                 \n\t"
    272        "sb       %[temp1],    -3(%[dst])                 \n\t"
    273        "sb       %[temp2],    -2(%[dst])                 \n\t"
    274        "sb       %[temp3],    -1(%[dst])                 \n\t"
    275        "bne      %[frow],     %[loop_end], 1b            \n\t"
    276        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
    277          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
    278          [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
    279          [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
    280          [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
    281        : [temp7]"r"(temp7), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
    282        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
    283          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
    284      );
    285    }
    286    for (i = 0; i < (x_out_max & 0x3); ++i) {
    287      const uint64_t I = (uint64_t)A * *frow++
    288                       + (uint64_t)B * *irow++;
    289      const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
    290      const int v = (int)MULT_FIX(J, wrk->fy_scale);
    291      *dst++ = (v > 255) ? 255u : (uint8_t)v;
    292    }
    293  }
    294 }
    295 
    296 #undef MULT_FIX_FLOOR
    297 #undef MULT_FIX
    298 #undef ROUNDER
    299 
    300 //------------------------------------------------------------------------------
    301 // Entry point
    302 
    303 extern void WebPRescalerDspInitMIPSdspR2(void);
    304 
    305 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
    306  WebPRescalerExportRowExpand = ExportRowExpand_MIPSdspR2;
    307 //  WebPRescalerExportRowShrink = ExportRowShrink_MIPSdspR2;
    308 }
    309 
    310 #else  // !WEBP_USE_MIPS_DSP_R2
    311 
    312 WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2)
    313 
    314 #endif  // WEBP_USE_MIPS_DSP_R2