tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

pitch_mipsr1.h (10276B)


      1 /* Copyright (c) 2007-2008 CSIRO
      2   Copyright (c) 2007-2009 Xiph.Org Foundation
      3   Written by Jean-Marc Valin */
      4 /**
      5   @file pitch.h
      6   @brief Pitch analysis
      7 */
      8 
      9 /*
     10   Redistribution and use in source and binary forms, with or without
     11   modification, are permitted provided that the following conditions
     12   are met:
     13 
     14   - Redistributions of source code must retain the above copyright
     15   notice, this list of conditions and the following disclaimer.
     16 
     17   - Redistributions in binary form must reproduce the above copyright
     18   notice, this list of conditions and the following disclaimer in the
     19   documentation and/or other materials provided with the distribution.
     20 
     21   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     22   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     23   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     24   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
     25   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     26   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     27   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     28   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     29   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     30   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     31   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     32 */
     33 
     34 #ifndef PITCH_MIPSR1_H
     35 #define PITCH_MIPSR1_H
     36 
     37 #include "fixed_generic_mipsr1.h"
     38 
     39 #if defined (__mips_dsp) && __mips == 32
     40 
     41 #define accumulator_t opus_int64
     42 #define MIPS_MAC(acc,a,b) \
     43    __builtin_mips_madd((acc), (int)(a), (int)(b))
     44 
     45 #define MIPS_MAC16x16_2X(acc,a2x,b2x) \
     46    __builtin_mips_dpaq_s_w_ph((acc), (a2x), (b2x))
     47 
     48 #define OVERRIDE_CELT_INNER_PROD
     49 #define OVERRIDE_DUAL_INNER_PROD
     50 #define OVERRIDE_XCORR_KERNEL
     51 
     52 #else /* any other MIPS */
     53 
     54 /* using madd is slower due to single accumulator */
     55 #define accumulator_t opus_int32
     56 #define MIPS_MAC MAC16_16
     57 
     58 #define OVERRIDE_CELT_INNER_PROD
     59 #define OVERRIDE_DUAL_INNER_PROD
     60 #define OVERRIDE_XCORR_KERNEL
     61 
     62 #endif /* any other MIPS */
     63 
     64 
     65 #if defined(OVERRIDE_CELT_INNER_PROD)
     66 
     67 static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x,
     68      const opus_val16 *y, int N, int arch)
     69 {
     70   int j;
     71   accumulator_t acc = 0;
     72 
     73 #if defined (MIPS_MAC16x16_2X)
     74   const v2i16 *x2x;
     75   const v2i16 *y2x;
     76   int loops;
     77 
     78   /* misaligned */
     79   if (((long)x | (long)y) & 3)
     80       goto fallback;
     81 
     82   x2x = __builtin_assume_aligned(x, 4);
     83   y2x = __builtin_assume_aligned(y, 4);
     84   loops = N / 8;
     85   for (j = 0; j < loops; j++)
     86   {
     87      acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]);
     88      acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]);
     89      acc = MIPS_MAC16x16_2X(acc, x2x[2], y2x[2]);
     90      acc = MIPS_MAC16x16_2X(acc, x2x[3], y2x[3]);
     91      x2x += 4; y2x += 4;
     92   }
     93 
     94   switch (N & 7) {
     95   case 7:
     96      acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]);
     97      acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]);
     98      acc = MIPS_MAC16x16_2X(acc, x2x[2], y2x[2]);
     99      acc = MIPS_MAC(acc, x[N-1], y[N-1]);
    100      break;
    101   case 6:
    102      acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]);
    103      acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]);
    104      acc = MIPS_MAC16x16_2X(acc, x2x[2], y2x[2]);
    105      break;
    106   case 5:
    107      acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]);
    108      acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]);
    109      acc = MIPS_MAC(acc, x[N-1], y[N-1]);
    110      break;
    111   case 4:
    112      acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]);
    113      acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]);
    114      break;
    115   case 3:
    116      acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]);
    117      acc = MIPS_MAC(acc, x[N-1], y[N-1]);
    118      break;
    119   case 2:
    120      acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]);
    121      break;
    122   case 1:
    123      acc = MIPS_MAC(acc, x[N-1], y[N-1]);
    124      break;
    125   case 0:
    126      break;
    127   }
    128   return __builtin_mips_extr_w(acc, 1);
    129 
    130 fallback:
    131 #endif
    132   for (j = 0; j < N - 3; j += 4)
    133   {
    134      acc = MIPS_MAC(acc, x[j],   y[j]);
    135      acc = MIPS_MAC(acc, x[j+1], y[j+1]);
    136      acc = MIPS_MAC(acc, x[j+2], y[j+2]);
    137      acc = MIPS_MAC(acc, x[j+3], y[j+3]);
    138   }
    139 
    140   switch (N & 3) {
    141   case 3:
    142      acc = MIPS_MAC(acc, x[j],   y[j]);
    143      acc = MIPS_MAC(acc, x[j+1], y[j+1]);
    144      acc = MIPS_MAC(acc, x[j+2], y[j+2]);
    145      break;
    146   case 2:
    147      acc = MIPS_MAC(acc, x[j],   y[j]);
    148      acc = MIPS_MAC(acc, x[j+1], y[j+1]);
    149      break;
    150   case 1:
    151      acc = MIPS_MAC(acc, x[j],   y[j]);
    152      break;
    153   case 0:
    154      break;
    155   }
    156 
    157   (void)arch;
    158 
    159   return (opus_val32)acc;
    160 }
    161 #endif /* OVERRIDE_CELT_INNER_PROD */
    162 
    163 #if defined(OVERRIDE_DUAL_INNER_PROD)
    164 static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
    165      int N, opus_val32 *xy1, opus_val32 *xy2, int arch)
    166 {
    167   int j;
    168   accumulator_t acc1 = 0;
    169   accumulator_t acc2 = 0;
    170 
    171 #if defined (MIPS_MAC16x16_2X)
    172   const v2i16 *x2x;
    173   const v2i16 *y01_2x;
    174   const v2i16 *y02_2x;
    175 
    176   /* misaligned */
    177   if (((long)x | (long)y01 | (long)y02) & 3)
    178       goto fallback;
    179 
    180   x2x = __builtin_assume_aligned(x, 4);
    181   y01_2x = __builtin_assume_aligned(y01, 4);
    182   y02_2x = __builtin_assume_aligned(y02, 4);
    183   N /= 2;
    184 
    185   for (j = 0; j < N - 3; j += 4)
    186   {
    187      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j],   y01_2x[j]);
    188      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j],   y02_2x[j]);
    189      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+1], y01_2x[j+1]);
    190      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+1], y02_2x[j+1]);
    191      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+2], y01_2x[j+2]);
    192      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+2], y02_2x[j+2]);
    193      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+3], y01_2x[j+3]);
    194      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+3], y02_2x[j+3]);
    195   }
    196 
    197   switch (N & 3) {
    198   case 3:
    199      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j],   y01_2x[j]);
    200      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j],   y02_2x[j]);
    201      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+1], y01_2x[j+1]);
    202      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+1], y02_2x[j+1]);
    203      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+2], y01_2x[j+2]);
    204      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+2], y02_2x[j+2]);
    205      break;
    206   case 2:
    207      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j],   y01_2x[j]);
    208      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j],   y02_2x[j]);
    209      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+1], y01_2x[j+1]);
    210      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+1], y02_2x[j+1]);
    211      break;
    212   case 1:
    213      acc1 = MIPS_MAC16x16_2X(acc1, x2x[j],   y01_2x[j]);
    214      acc2 = MIPS_MAC16x16_2X(acc2, x2x[j],   y02_2x[j]);
    215      break;
    216   case 0:
    217      break;
    218   }
    219 
    220   *xy1 = __builtin_mips_extr_w(acc1, 1);
    221   *xy2 = __builtin_mips_extr_w(acc2, 1);
    222   return;
    223 
    224 fallback:
    225 #endif
    226   /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
    227   for (j = 0; j < N - 3; j += 4)
    228   {
    229      acc1 = MIPS_MAC(acc1, x[j],   y01[j]);
    230      acc2 = MIPS_MAC(acc2, x[j],   y02[j]);
    231      acc1 = MIPS_MAC(acc1, x[j+1], y01[j+1]);
    232      acc2 = MIPS_MAC(acc2, x[j+1], y02[j+1]);
    233      acc1 = MIPS_MAC(acc1, x[j+2], y01[j+2]);
    234      acc2 = MIPS_MAC(acc2, x[j+2], y02[j+2]);
    235      acc1 = MIPS_MAC(acc1, x[j+3], y01[j+3]);
    236      acc2 = MIPS_MAC(acc2, x[j+3], y02[j+3]);
    237   }
    238 
    239   if (j < N) {
    240      acc1 = MIPS_MAC(acc1, x[j],   y01[j]);
    241      acc2 = MIPS_MAC(acc2, x[j],   y02[j]);
    242      acc1 = MIPS_MAC(acc1, x[j+1], y01[j+1]);
    243      acc2 = MIPS_MAC(acc2, x[j+1], y02[j+1]);
    244   }
    245 
    246   (void)arch;
    247 
    248   *xy1 = (opus_val32)acc1;
    249   *xy2 = (opus_val32)acc2;
    250 }
    251 #endif /* OVERRIDE_DUAL_INNER_PROD */
    252 
    253 #if defined(OVERRIDE_XCORR_KERNEL)
    254 
    255 static inline void xcorr_kernel_mips(const opus_val16 * x,
    256      const opus_val16 * y, opus_val32 sum[4], int len)
    257 {
    258   int j;
    259   opus_val16 y_0, y_1, y_2, y_3;
    260 
    261    accumulator_t sum_0, sum_1, sum_2, sum_3;
    262    sum_0 =  (accumulator_t)sum[0];
    263    sum_1 =  (accumulator_t)sum[1];
    264    sum_2 =  (accumulator_t)sum[2];
    265    sum_3 =  (accumulator_t)sum[3];
    266 
    267    y_0=*y++;
    268    y_1=*y++;
    269    y_2=*y++;
    270    for (j=0;j<len-3;j+=4)
    271    {
    272        opus_val16 tmp;
    273        tmp = *x++;
    274        y_3=*y++;
    275 
    276        sum_0 = MIPS_MAC(sum_0, tmp, y_0);
    277        sum_1 = MIPS_MAC(sum_1, tmp, y_1);
    278        sum_2 = MIPS_MAC(sum_2, tmp, y_2);
    279        sum_3 = MIPS_MAC(sum_3, tmp, y_3);
    280 
    281        tmp=*x++;
    282        y_0=*y++;
    283 
    284        sum_0 = MIPS_MAC(sum_0, tmp, y_1);
    285        sum_1 = MIPS_MAC(sum_1, tmp, y_2);
    286        sum_2 = MIPS_MAC(sum_2, tmp, y_3);
    287        sum_3 = MIPS_MAC(sum_3, tmp, y_0);
    288 
    289       tmp=*x++;
    290       y_1=*y++;
    291 
    292       sum_0 = MIPS_MAC(sum_0, tmp, y_2);
    293       sum_1 = MIPS_MAC(sum_1, tmp, y_3);
    294       sum_2 = MIPS_MAC(sum_2, tmp, y_0);
    295       sum_3 = MIPS_MAC(sum_3, tmp, y_1);
    296 
    297 
    298      tmp=*x++;
    299      y_2=*y++;
    300 
    301      sum_0 = MIPS_MAC(sum_0, tmp, y_3);
    302      sum_1 = MIPS_MAC(sum_1, tmp, y_0);
    303      sum_2 = MIPS_MAC(sum_2, tmp, y_1);
    304      sum_3 = MIPS_MAC(sum_3, tmp, y_2);
    305   }
    306 
    307   switch (len & 3) {
    308   case 3:
    309      sum_0 = MIPS_MAC(sum_0, x[2], y_2);
    310      sum_1 = MIPS_MAC(sum_1, x[2], y[0]);
    311      sum_2 = MIPS_MAC(sum_2, x[2], y[1]);
    312      sum_3 = MIPS_MAC(sum_3, x[2], y[2]);
    313 
    314      sum_0 = MIPS_MAC(sum_0, x[1], y_1);
    315      sum_1 = MIPS_MAC(sum_1, x[1], y_2);
    316      sum_2 = MIPS_MAC(sum_2, x[1], y[0]);
    317      sum_3 = MIPS_MAC(sum_3, x[1], y[1]);
    318 
    319      sum_0 = MIPS_MAC(sum_0, x[0], y_0);
    320      sum_1 = MIPS_MAC(sum_1, x[0], y_1);
    321      sum_2 = MIPS_MAC(sum_2, x[0], y_2);
    322      sum_3 = MIPS_MAC(sum_3, x[0], y[0]);
    323      break;
    324   case 2:
    325      sum_0 = MIPS_MAC(sum_0, x[1], y_1);
    326      sum_1 = MIPS_MAC(sum_1, x[1], y_2);
    327      sum_2 = MIPS_MAC(sum_2, x[1], y[0]);
    328      sum_3 = MIPS_MAC(sum_3, x[1], y[1]);
    329 
    330      sum_0 = MIPS_MAC(sum_0, x[0], y_0);
    331      sum_1 = MIPS_MAC(sum_1, x[0], y_1);
    332      sum_2 = MIPS_MAC(sum_2, x[0], y_2);
    333      sum_3 = MIPS_MAC(sum_3, x[0], y[0]);
    334      break;
    335   case 1:
    336      sum_0 = MIPS_MAC(sum_0, x[0], y_0);
    337      sum_1 = MIPS_MAC(sum_1, x[0], y_1);
    338      sum_2 = MIPS_MAC(sum_2, x[0], y_2);
    339      sum_3 = MIPS_MAC(sum_3, x[0], y[0]);
    340      break;
    341   case 0:
    342      break;
    343   }
    344 
    345   sum[0] = (opus_val32)sum_0;
    346   sum[1] = (opus_val32)sum_1;
    347   sum[2] = (opus_val32)sum_2;
    348   sum[3] = (opus_val32)sum_3;
    349 }
    350 
    351 #define xcorr_kernel(x, y, sum, len, arch) \
    352    ((void)(arch), xcorr_kernel_mips(x, y, sum, len))
    353 
    354 #undef accumulator_t
    355 #undef MIPS_MAC
    356 
    357 #endif /* OVERRIDE_XCORR_KERNEL */
    358 
    359 #endif /* PITCH_MIPSR1_H */