tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

main_sse.h (20671B)


      1 /* Copyright (c) 2014, Cisco Systems, INC
      2   Written by XiangMingZhu WeiZhou MinPeng YanWang
      3 
      4   Redistribution and use in source and binary forms, with or without
      5   modification, are permitted provided that the following conditions
      6   are met:
      7 
      8   - Redistributions of source code must retain the above copyright
      9   notice, this list of conditions and the following disclaimer.
     10 
     11   - Redistributions in binary form must reproduce the above copyright
     12   notice, this list of conditions and the following disclaimer in the
     13   documentation and/or other materials provided with the distribution.
     14 
     15   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     18   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
     19   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     20   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     22   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     23   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     24   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     25   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 */
     27 
     28 #ifndef MAIN_SSE_H
     29 # define MAIN_SSE_H
     30 
     31 # ifdef HAVE_CONFIG_H
     32 #  include "config.h"
     33 # endif
     34 
     35 # if defined(OPUS_X86_MAY_HAVE_SSE4_1)
     36 
     37 void silk_VQ_WMat_EC_sse4_1(
     38    opus_int8                   *ind,                           /* O    index of best codebook vector               */
     39    opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
     40    opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate                          */
     41    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
     42    const opus_int32            *XX_Q17,                        /* I    correlation matrix                          */
     43    const opus_int32            *xX_Q17,                        /* I    correlation vector                          */
     44    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
     45    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
     46    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
     47    const opus_int              subfr_len,                      /* I    number of samples per subframe              */
     48    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
     49    const opus_int              L                               /* I    number of vectors in codebook               */
     50 );
     51 
     52 #  if defined OPUS_X86_PRESUME_SSE4_1
     53 
     54 #   define OVERRIDE_silk_VQ_WMat_EC
     55 #   define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
     56                           subfr_len, max_gain_Q7, L, arch) \
     57    ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
     58                          subfr_len, max_gain_Q7, L))
     59 
     60 #  elif defined(OPUS_HAVE_RTCD)
     61 
     62 extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
     63    opus_int8                   *ind,                           /* O    index of best codebook vector               */
     64    opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
     65    opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate                          */
     66    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
     67    const opus_int32            *XX_Q17,                        /* I    correlation matrix                          */
     68    const opus_int32            *xX_Q17,                        /* I    correlation vector                          */
     69    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
     70    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
     71    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
     72    const opus_int              subfr_len,                      /* I    number of samples per subframe              */
     73    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
     74    const opus_int              L                               /* I    number of vectors in codebook               */
     75 );
     76 
     77 #   define OVERRIDE_silk_VQ_WMat_EC
     78 #   define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
     79                           subfr_len, max_gain_Q7, L, arch) \
     80    ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
     81                          subfr_len, max_gain_Q7, L))
     82 
     83 #  endif
     84 
     85 void silk_NSQ_sse4_1(
     86    const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
     87    silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
     88    SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
     89    const opus_int16            x16[],                                        /* I    Input                           */
     90    opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
     91    const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
     92    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
     93    const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
     94    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
     95    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
     96    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
     97    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
     98    const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
     99    const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
    100    const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
    101 );
    102 
    103 #  if defined OPUS_X86_PRESUME_SSE4_1
    104 
    105 #   define OVERRIDE_silk_NSQ
    106 #   define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
    107                    HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
    108    ((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
    109                   HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
    110 
    111 #  elif defined(OPUS_HAVE_RTCD)
    112 
    113 extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
    114    const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
    115    silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
    116    SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
    117    const opus_int16            x16[],                                        /* I    Input                           */
    118    opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
    119    const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
    120    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
    121    const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
    122    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
    123    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
    124    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
    125    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
    126    const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
    127    const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
    128    const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
    129 );
    130 
    131 #   define OVERRIDE_silk_NSQ
    132 #   define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
    133                    HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
    134    ((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
    135                   HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
    136 
    137 #  endif
    138 
    139 void silk_NSQ_del_dec_sse4_1(
    140    const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
    141    silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
    142    SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
    143    const opus_int16            x16[],                                        /* I    Input                           */
    144    opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
    145    const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
    146    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
    147    const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
    148    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
    149    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
    150    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
    151    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
    152    const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
    153    const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
    154    const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
    155 );
    156 
    157 void silk_NSQ_del_dec_avx2(
    158    const silk_encoder_state *psEncC,                            /* I    Encoder State               */
    159    silk_nsq_state *NSQ,                                         /* I/O  NSQ state                   */
    160    SideInfoIndices *psIndices,                                  /* I/O  Quantization Indices        */
    161    const opus_int16 x16[],                                      /* I    Input                       */
    162    opus_int8 pulses[],                                          /* O    Quantized pulse signal      */
    163    const opus_int16 *PredCoef_Q12,                              /* I    Short term prediction coefs */
    164    const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],      /* I    Long term prediction coefs  */
    165    const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER], /* I    Noise shaping coefs         */
    166    const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],              /* I    Long term shaping coefs     */
    167    const opus_int Tilt_Q14[MAX_NB_SUBFR],                       /* I    Spectral tilt               */
    168    const opus_int32 LF_shp_Q14[MAX_NB_SUBFR],                   /* I    Low frequency shaping coefs */
    169    const opus_int32 Gains_Q16[MAX_NB_SUBFR],                    /* I    Quantization step sizes     */
    170    const opus_int32 pitchL[MAX_NB_SUBFR],                       /* I    Pitch lags                  */
    171    const opus_int Lambda_Q10,                                   /* I    Rate/distortion tradeoff    */
    172    const opus_int LTP_scale_Q14                                 /* I    LTP state scaling           */
    173 );
    174 
    175 #  if defined (OPUS_X86_PRESUME_AVX2)
    176 
    177 #   define OVERRIDE_silk_NSQ_del_dec
    178 #   define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
    179                            HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
    180    ((void)(arch),silk_NSQ_del_dec_avx2(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
    181                           HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
    182 
    183 #  elif defined (OPUS_X86_PRESUME_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
    184 
    185 #   define OVERRIDE_silk_NSQ_del_dec
    186 #   define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
    187                            HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
    188    ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
    189                           HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
    190 
    191 #  elif defined(OPUS_HAVE_RTCD)
    192 
    193 extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
    194    const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
    195    silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
    196    SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
    197    const opus_int16            x16[],                                        /* I    Input                           */
    198    opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
    199    const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
    200    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
    201    const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
    202    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
    203    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
    204    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
    205    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
    206    const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
    207    const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
    208    const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
    209 );
    210 
    211 #   define OVERRIDE_silk_NSQ_del_dec
    212 #   define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
    213                            HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
    214    ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
    215                           HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
    216 
    217 #  endif
    218 
    219 void silk_noise_shape_quantizer(
    220    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
    221    opus_int            signalType,             /* I    Signal type                     */
    222    const opus_int32    x_sc_Q10[],             /* I                                    */
    223    opus_int8           pulses[],               /* O                                    */
    224    opus_int16          xq[],                   /* O                                    */
    225    opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
    226    const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
    227    const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
    228    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping AR coefs          */
    229    opus_int            lag,                    /* I    Pitch lag                       */
    230    opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
    231    opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
    232    opus_int32          LF_shp_Q14,             /* I                                    */
    233    opus_int32          Gain_Q16,               /* I                                    */
    234    opus_int            Lambda_Q10,             /* I                                    */
    235    opus_int            offset_Q10,             /* I                                    */
    236    opus_int            length,                 /* I    Input length                    */
    237    opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
    238    opus_int            predictLPCOrder,        /* I    Prediction filter order         */
    239    int                 arch                    /* I    Architecture                    */
    240 );
    241 
    242 /**************************/
    243 /* Noise level estimation */
    244 /**************************/
    245 void silk_VAD_GetNoiseLevels(
    246    const opus_int32            pX[ VAD_N_BANDS ],  /* I    subband energies                            */
    247    silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
    248 );
    249 
    250 opus_int silk_VAD_GetSA_Q8_sse4_1(
    251    silk_encoder_state *psEnC,
    252    const opus_int16   pIn[]
    253 );
    254 
    255 #  if defined(OPUS_X86_PRESUME_SSE4_1)
    256 
    257 #   define OVERRIDE_silk_VAD_GetSA_Q8
    258 #   define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn))
    259 
    260 #  elif defined(OPUS_HAVE_RTCD)
    261 
    262 extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(
    263     silk_encoder_state *psEnC,
    264     const opus_int16   pIn[]);
    265 
    266 #   define OVERRIDE_silk_VAD_GetSA_Q8
    267 #   define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
    268      ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
    269 
    270 #  endif
    271 
    272 #ifndef FIXED_POINT
    273 double silk_inner_product_FLP_avx2(
    274    const silk_float    *data1,
    275    const silk_float    *data2,
    276    opus_int            dataSize
    277 );
    278 
    279 #if defined (OPUS_X86_PRESUME_AVX2)
    280 
    281 #define OVERRIDE_inner_product_FLP
    282 #define silk_inner_product_FLP(data1, data2, dataSize, arch) ((void)arch,silk_inner_product_FLP_avx2(data1, data2, dataSize))
    283 
    284 #elif defined(OPUS_HAVE_RTCD) && defined(OPUS_X86_MAY_HAVE_AVX2)
    285 
    286 #define OVERRIDE_inner_product_FLP
    287 extern double (*const SILK_INNER_PRODUCT_FLP_IMPL[OPUS_ARCHMASK + 1])(
    288    const silk_float    *data1,
    289    const silk_float    *data2,
    290    opus_int            dataSize
    291 );
    292 
    293 #define silk_inner_product_FLP(data1, data2, dataSize, arch) ((void)arch,(*SILK_INNER_PRODUCT_FLP_IMPL[(arch) & OPUS_ARCHMASK])(data1, data2, dataSize))
    294 
    295 #endif
    296 #endif
    297 
    298 # endif
    299 #endif