[ tor-browser ].git.dasho

lossless_enc_mips32.c (12865B)
      1 // Copyright 2015 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // MIPS version of lossless functions
     11 //
     12 // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
     13 //             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
     14 
     15 #include "src/dsp/dsp.h"
     16 #include "src/dsp/lossless.h"
     17 #include "src/dsp/lossless_common.h"
     18 
     19 #if defined(WEBP_USE_MIPS32)
     20 
     21 #include <assert.h>
     22 #include <math.h>
     23 #include <stdlib.h>
     24 #include <string.h>
     25 
     26 static uint64_t FastSLog2Slow_MIPS32(uint32_t v) {
     27  assert(v >= LOG_LOOKUP_IDX_MAX);
     28  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
     29    uint32_t log_cnt, y;
     30    uint64_t correction;
     31    const int c24 = 24;
     32    uint32_t temp;
     33 
     34    // Xf = 256 = 2^8
     35    // log_cnt is index of leading one in upper 24 bits
     36    __asm__ volatile(
     37      "clz      %[log_cnt], %[v]                      \n\t"
     38      "addiu    %[y],       $zero,        1           \n\t"
     39      "subu     %[log_cnt], %[c24],       %[log_cnt]  \n\t"
     40      "sllv     %[y],       %[y],         %[log_cnt]  \n\t"
     41      "srlv     %[temp],    %[v],         %[log_cnt]  \n\t"
     42      : [log_cnt]"=&r"(log_cnt), [y]"=&r"(y),
     43        [temp]"=r"(temp)
     44      : [c24]"r"(c24), [v]"r"(v)
     45    );
     46 
     47    // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
     48    // Xf = floor(Xf) * (1 + (v % y) / v)
     49    // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
     50    // The correction factor: log(1 + d) ~ d; for very small d values, so
     51    // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
     52 
     53    // (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1)
     54    correction = LOG_2_RECIPROCAL_FIXED * (v & (y - 1));
     55    return (uint64_t)v * (kLog2Table[temp] +
     56                          ((uint64_t)log_cnt << LOG_2_PRECISION_BITS)) +
     57           correction;
     58  } else {
     59    return (uint64_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * v * log((double)v) + .5);
     60  }
     61 }
     62 
     63 static uint32_t FastLog2Slow_MIPS32(uint32_t v) {
     64  assert(v >= LOG_LOOKUP_IDX_MAX);
     65  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
     66    uint32_t log_cnt, y;
     67    const int c24 = 24;
     68    uint32_t log_2;
     69    uint32_t temp;
     70 
     71    __asm__ volatile(
     72      "clz      %[log_cnt], %[v]                      \n\t"
     73      "addiu    %[y],       $zero,        1           \n\t"
     74      "subu     %[log_cnt], %[c24],       %[log_cnt]  \n\t"
     75      "sllv     %[y],       %[y],         %[log_cnt]  \n\t"
     76      "srlv     %[temp],    %[v],         %[log_cnt]  \n\t"
     77      : [log_cnt]"=&r"(log_cnt), [y]"=&r"(y),
     78        [temp]"=r"(temp)
     79      : [c24]"r"(c24), [v]"r"(v)
     80    );
     81 
     82    log_2 = kLog2Table[temp] + (log_cnt << LOG_2_PRECISION_BITS);
     83    if (v >= APPROX_LOG_MAX) {
     84      // Since the division is still expensive, add this correction factor only
     85      // for large values of 'v'.
     86      const uint64_t correction = LOG_2_RECIPROCAL_FIXED * (v & (y - 1));
     87      log_2 += (uint32_t)DivRound(correction, v);
     88    }
     89    return log_2;
     90  } else {
     91    return (uint32_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * log((double)v) + .5);
     92  }
     93 }
     94 
     95 // C version of this function:
     96 //   int i = 0;
     97 //   int64_t cost = 0;
     98 //   const uint32_t* pop = &population[4];
     99 //   const uint32_t* LoopEnd = &population[length];
    100 //   while (pop != LoopEnd) {
    101 //     ++i;
    102 //     cost += i * *pop;
    103 //     cost += i * *(pop + 1);
    104 //     pop += 2;
    105 //   }
    106 //   return cost;
    107 static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) {
    108  int i, temp0, temp1;
    109  const uint32_t* pop = &population[4];
    110  const uint32_t* const LoopEnd = &population[length];
    111 
    112  __asm__ volatile(
    113    "mult   $zero,    $zero                  \n\t"
    114    "xor    %[i],     %[i],       %[i]       \n\t"
    115    "beq    %[pop],   %[LoopEnd], 2f         \n\t"
    116  "1:                                        \n\t"
    117    "lw     %[temp0], 0(%[pop])              \n\t"
    118    "lw     %[temp1], 4(%[pop])              \n\t"
    119    "addiu  %[i],     %[i],       1          \n\t"
    120    "addiu  %[pop],   %[pop],     8          \n\t"
    121    "madd   %[i],     %[temp0]               \n\t"
    122    "madd   %[i],     %[temp1]               \n\t"
    123    "bne    %[pop],   %[LoopEnd], 1b         \n\t"
    124  "2:                                        \n\t"
    125    "mfhi   %[temp0]                         \n\t"
    126    "mflo   %[temp1]                         \n\t"
    127    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
    128      [i]"=&r"(i), [pop]"+r"(pop)
    129    : [LoopEnd]"r"(LoopEnd)
    130    : "memory", "hi", "lo"
    131  );
    132 
    133  return ((int64_t)temp0 << 32 | temp1);
    134 }
    135 
    136 #define HUFFMAN_COST_PASS                                 \
    137  __asm__ volatile(                                       \
    138    "sll   %[temp1],  %[temp0],    3           \n\t"      \
    139    "addiu %[temp3],  %[streak],   -3          \n\t"      \
    140    "addu  %[temp2],  %[pstreaks], %[temp1]    \n\t"      \
    141    "blez  %[temp3],  1f                       \n\t"      \
    142    "srl   %[temp1],  %[temp1],    1           \n\t"      \
    143    "addu  %[temp3],  %[pcnts],    %[temp1]    \n\t"      \
    144    "lw    %[temp0],  4(%[temp2])              \n\t"      \
    145    "lw    %[temp1],  0(%[temp3])              \n\t"      \
    146    "addu  %[temp0],  %[temp0],    %[streak]   \n\t"      \
    147    "addiu %[temp1],  %[temp1],    1           \n\t"      \
    148    "sw    %[temp0],  4(%[temp2])              \n\t"      \
    149    "sw    %[temp1],  0(%[temp3])              \n\t"      \
    150    "b     2f                                  \n\t"      \
    151  "1:                                          \n\t"      \
    152    "lw    %[temp0],  0(%[temp2])              \n\t"      \
    153    "addu  %[temp0],  %[temp0],    %[streak]   \n\t"      \
    154    "sw    %[temp0],  0(%[temp2])              \n\t"      \
    155  "2:                                          \n\t"      \
    156    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),           \
    157      [temp3]"=&r"(temp3), [temp0]"+r"(temp0)             \
    158    : [pstreaks]"r"(pstreaks), [pcnts]"r"(pcnts),         \
    159      [streak]"r"(streak)                                 \
    160    : "memory"                                            \
    161  );
    162 
    163 // Returns the various RLE counts
    164 static WEBP_INLINE void GetEntropyUnrefinedHelper(
    165    uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev,
    166    int* WEBP_RESTRICT const i_prev,
    167    VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
    168    VP8LStreaks* WEBP_RESTRICT const stats) {
    169  int* const pstreaks = &stats->streaks[0][0];
    170  int* const pcnts = &stats->counts[0];
    171  int temp0, temp1, temp2, temp3;
    172  const int streak = i - *i_prev;
    173 
    174  // Gather info for the bit entropy.
    175  if (*val_prev != 0) {
    176    bit_entropy->sum += (*val_prev) * streak;
    177    bit_entropy->nonzeros += streak;
    178    bit_entropy->nonzero_code = *i_prev;
    179    bit_entropy->entropy += VP8LFastSLog2(*val_prev) * streak;
    180    if (bit_entropy->max_val < *val_prev) {
    181      bit_entropy->max_val = *val_prev;
    182    }
    183  }
    184 
    185  // Gather info for the Huffman cost.
    186  temp0 = (*val_prev != 0);
    187  HUFFMAN_COST_PASS
    188 
    189  *val_prev = val;
    190  *i_prev = i;
    191 }
    192 
    193 static void GetEntropyUnrefined_MIPS32(
    194    const uint32_t X[], int length,
    195    VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
    196    VP8LStreaks* WEBP_RESTRICT const stats) {
    197  int i;
    198  int i_prev = 0;
    199  uint32_t x_prev = X[0];
    200 
    201  memset(stats, 0, sizeof(*stats));
    202  VP8LBitEntropyInit(bit_entropy);
    203 
    204  for (i = 1; i < length; ++i) {
    205    const uint32_t x = X[i];
    206    if (x != x_prev) {
    207      GetEntropyUnrefinedHelper(x, i, &x_prev, &i_prev, bit_entropy, stats);
    208    }
    209  }
    210  GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);
    211 
    212  bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
    213 }
    214 
    215 static void GetCombinedEntropyUnrefined_MIPS32(
    216    const uint32_t X[], const uint32_t Y[], int length,
    217    VP8LBitEntropy* WEBP_RESTRICT const entropy,
    218    VP8LStreaks* WEBP_RESTRICT const stats) {
    219  int i = 1;
    220  int i_prev = 0;
    221  uint32_t xy_prev = X[0] + Y[0];
    222 
    223  memset(stats, 0, sizeof(*stats));
    224  VP8LBitEntropyInit(entropy);
    225 
    226  for (i = 1; i < length; ++i) {
    227    const uint32_t xy = X[i] + Y[i];
    228    if (xy != xy_prev) {
    229      GetEntropyUnrefinedHelper(xy, i, &xy_prev, &i_prev, entropy, stats);
    230    }
    231  }
    232  GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, entropy, stats);
    233 
    234  entropy->entropy = VP8LFastSLog2(entropy->sum) - entropy->entropy;
    235 }
    236 
    237 #define ASM_START                                       \
    238  __asm__ volatile(                                     \
    239    ".set   push                            \n\t"       \
    240    ".set   at                              \n\t"       \
    241    ".set   macro                           \n\t"       \
    242  "1:                                       \n\t"
    243 
    244 // P2 = P0 + P1
    245 // A..D - offsets
    246 // E - temp variable to tell macro
    247 //     if pointer should be incremented
    248 // 'literal' and successive histograms could be unaligned
    249 // so we must use ulw and usw
    250 #define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2)           \
    251    "ulw    %[temp0], " #A "(%[" #P0 "])    \n\t"       \
    252    "ulw    %[temp1], " #B "(%[" #P0 "])    \n\t"       \
    253    "ulw    %[temp2], " #C "(%[" #P0 "])    \n\t"       \
    254    "ulw    %[temp3], " #D "(%[" #P0 "])    \n\t"       \
    255    "ulw    %[temp4], " #A "(%[" #P1 "])    \n\t"       \
    256    "ulw    %[temp5], " #B "(%[" #P1 "])    \n\t"       \
    257    "ulw    %[temp6], " #C "(%[" #P1 "])    \n\t"       \
    258    "ulw    %[temp7], " #D "(%[" #P1 "])    \n\t"       \
    259    "addu   %[temp4], %[temp4],   %[temp0]  \n\t"       \
    260    "addu   %[temp5], %[temp5],   %[temp1]  \n\t"       \
    261    "addu   %[temp6], %[temp6],   %[temp2]  \n\t"       \
    262    "addu   %[temp7], %[temp7],   %[temp3]  \n\t"       \
    263    "addiu  %[" #P0 "],  %[" #P0 "],  16    \n\t"       \
    264  ".if " #E " == 1                          \n\t"       \
    265    "addiu  %[" #P1 "],  %[" #P1 "],  16    \n\t"       \
    266  ".endif                                   \n\t"       \
    267    "usw    %[temp4], " #A "(%[" #P2 "])    \n\t"       \
    268    "usw    %[temp5], " #B "(%[" #P2 "])    \n\t"       \
    269    "usw    %[temp6], " #C "(%[" #P2 "])    \n\t"       \
    270    "usw    %[temp7], " #D "(%[" #P2 "])    \n\t"       \
    271    "addiu  %[" #P2 "], %[" #P2 "],   16    \n\t"       \
    272    "bne    %[" #P0 "], %[LoopEnd], 1b      \n\t"       \
    273    ".set   pop                             \n\t"       \
    274 
    275 #define ASM_END_COMMON_0                                \
    276    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),         \
    277      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),         \
    278      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),         \
    279      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),         \
    280      [pa]"+r"(pa), [pout]"+r"(pout)
    281 
    282 #define ASM_END_COMMON_1                                \
    283    : [LoopEnd]"r"(LoopEnd)                             \
    284    : "memory", "at"                                    \
    285  );
    286 
    287 #define ASM_END_0                                       \
    288    ASM_END_COMMON_0                                    \
    289      , [pb]"+r"(pb)                                    \
    290    ASM_END_COMMON_1
    291 
    292 #define ASM_END_1                                       \
    293    ASM_END_COMMON_0                                    \
    294    ASM_END_COMMON_1
    295 
    296 static void AddVector_MIPS32(const uint32_t* WEBP_RESTRICT pa,
    297                             const uint32_t* WEBP_RESTRICT pb,
    298                             uint32_t* WEBP_RESTRICT pout, int size) {
    299  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
    300  const int end = ((size) / 4) * 4;
    301  const uint32_t* const LoopEnd = pa + end;
    302  int i;
    303  ASM_START
    304  ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)
    305  ASM_END_0
    306  for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];
    307 }
    308 
    309 static void AddVectorEq_MIPS32(const uint32_t* WEBP_RESTRICT pa,
    310                               uint32_t* WEBP_RESTRICT pout, int size) {
    311  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
    312  const int end = ((size) / 4) * 4;
    313  const uint32_t* const LoopEnd = pa + end;
    314  int i;
    315  ASM_START
    316  ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)
    317  ASM_END_1
    318  for (i = 0; i < size - end; ++i) pout[i] += pa[i];
    319 }
    320 
    321 #undef ASM_END_1
    322 #undef ASM_END_0
    323 #undef ASM_END_COMMON_1
    324 #undef ASM_END_COMMON_0
    325 #undef ADD_TO_OUT
    326 #undef ASM_START
    327 
    328 //------------------------------------------------------------------------------
    329 // Entry point
    330 
    331 extern void VP8LEncDspInitMIPS32(void);
    332 
    333 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPS32(void) {
    334  VP8LFastSLog2Slow = FastSLog2Slow_MIPS32;
    335  VP8LFastLog2Slow = FastLog2Slow_MIPS32;
    336  VP8LExtraCost = ExtraCost_MIPS32;
    337  VP8LGetEntropyUnrefined = GetEntropyUnrefined_MIPS32;
    338  VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_MIPS32;
    339  VP8LAddVector = AddVector_MIPS32;
    340  VP8LAddVectorEq = AddVectorEq_MIPS32;
    341 }
    342 
    343 #else  // !WEBP_USE_MIPS32
    344 
    345 WEBP_DSP_INIT_STUB(VP8LEncDspInitMIPS32)
    346 
    347 #endif  // WEBP_USE_MIPS32
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE