celt.c (11029B)
1 /* Copyright (c) 2007-2008 CSIRO 2 Copyright (c) 2007-2010 Xiph.Org Foundation 3 Copyright (c) 2008 Gregory Maxwell 4 Written by Jean-Marc Valin and Gregory Maxwell */ 5 /* 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions 8 are met: 9 10 - Redistributions of source code must retain the above copyright 11 notice, this list of conditions and the following disclaimer. 12 13 - Redistributions in binary form must reproduce the above copyright 14 notice, this list of conditions and the following disclaimer in the 15 documentation and/or other materials provided with the distribution. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 21 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 22 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 23 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #ifdef HAVE_CONFIG_H 31 #include "config.h" 32 #endif 33 34 #define CELT_C 35 36 #include "os_support.h" 37 #include "mdct.h" 38 #include <math.h> 39 #include "celt.h" 40 #include "pitch.h" 41 #include "bands.h" 42 #include "modes.h" 43 #include "entcode.h" 44 #include "quant_bands.h" 45 #include "rate.h" 46 #include "stack_alloc.h" 47 #include "mathops.h" 48 #include "float_cast.h" 49 #include <stdarg.h> 50 #include "celt_lpc.h" 51 #include "vq.h" 52 53 #ifndef PACKAGE_VERSION 54 #define PACKAGE_VERSION "unknown" 55 #endif 56 57 #if defined(FIXED_POINT) && defined(__mips) 58 #include "mips/celt_mipsr1.h" 59 #endif 60 61 62 int resampling_factor(opus_int32 rate) 63 { 64 int ret; 65 switch (rate) 66 { 67 #ifdef ENABLE_QEXT 68 case 96000: 69 #endif 70 case 48000: 71 ret = 1; 72 break; 73 case 24000: 74 ret = 2; 75 break; 76 case 16000: 77 ret = 3; 78 break; 79 case 12000: 80 ret = 4; 81 break; 82 case 8000: 83 ret = 6; 84 break; 85 default: 86 #ifndef CUSTOM_MODES 87 celt_assert(0); 88 #endif 89 ret = 0; 90 break; 91 } 92 return ret; 93 } 94 95 96 #if !defined(OVERRIDE_COMB_FILTER_CONST) || defined(NON_STATIC_COMB_FILTER_CONST_C) 97 /* This version should be faster on ARM */ 98 #ifdef OPUS_ARM_ASM 99 #ifndef NON_STATIC_COMB_FILTER_CONST_C 100 static 101 #endif 102 void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, 103 celt_coef g10, celt_coef g11, celt_coef g12) 104 { 105 opus_val32 x0, x1, x2, x3, x4; 106 int i; 107 x4 = SHL32(x[-T-2], 1); 108 x3 = SHL32(x[-T-1], 1); 109 x2 = SHL32(x[-T], 1); 110 x1 = SHL32(x[-T+1], 1); 111 for (i=0;i<N-4;i+=5) 112 { 113 opus_val32 t; 114 x0=SHL32(x[i-T+2],1); 115 t = MAC_COEF_32_ARM(x[i], g10, x2); 116 t = MAC_COEF_32_ARM(t, g11, ADD32(x1,x3)); 117 t = MAC_COEF_32_ARM(t, g12, ADD32(x0,x4)); 118 t = SATURATE(t, SIG_SAT); 119 y[i] = t; 120 x4=SHL32(x[i-T+3],1); 121 t = MAC_COEF_32_ARM(x[i+1], g10, x1); 122 t = MAC_COEF_32_ARM(t, g11, ADD32(x0,x2)); 123 t = MAC_COEF_32_ARM(t, g12, ADD32(x4,x3)); 124 t = SATURATE(t, SIG_SAT); 125 y[i+1] = t; 126 x3=SHL32(x[i-T+4],1); 127 t = MAC_COEF_32_ARM(x[i+2], g10, x0); 128 t = MAC_COEF_32_ARM(t, g11, ADD32(x4,x1)); 129 t = MAC_COEF_32_ARM(t, g12, ADD32(x3,x2)); 130 t = SATURATE(t, SIG_SAT); 131 y[i+2] = t; 132 x2=SHL32(x[i-T+5],1); 133 t = MAC_COEF_32_ARM(x[i+3], g10, x4); 134 t = MAC_COEF_32_ARM(t, g11, ADD32(x3,x0)); 135 t = MAC_COEF_32_ARM(t, g12, ADD32(x2,x1)); 136 t = SATURATE(t, SIG_SAT); 137 y[i+3] = t; 138 x1=SHL32(x[i-T+6],1); 139 t = MAC_COEF_32_ARM(x[i+4], g10, x3); 140 t = MAC_COEF_32_ARM(t, g11, ADD32(x2,x4)); 141 t = MAC_COEF_32_ARM(t, g12, ADD32(x1,x0)); 142 t = SATURATE(t, SIG_SAT); 143 y[i+4] = t; 144 } 145 #ifdef CUSTOM_MODES 146 for (;i<N;i++) 147 { 148 opus_val32 t; 149 x0=SHL32(x[i-T+2],1); 150 t = MAC_COEF_32_ARM(x[i], g10, x2); 151 t = MAC_COEF_32_ARM(t, g11, ADD32(x1,x3)); 152 t = MAC_COEF_32_ARM(t, g12, ADD32(x0,x4)); 153 t = SATURATE(t, SIG_SAT); 154 y[i] = t; 155 x4=x3; 156 x3=x2; 157 x2=x1; 158 x1=x0; 159 } 160 #endif 161 } 162 #else 163 #ifndef NON_STATIC_COMB_FILTER_CONST_C 164 static 165 #endif 166 void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, 167 celt_coef g10, celt_coef g11, celt_coef g12) 168 { 169 opus_val32 x0, x1, x2, x3, x4; 170 int i; 171 x4 = x[-T-2]; 172 x3 = x[-T-1]; 173 x2 = x[-T]; 174 x1 = x[-T+1]; 175 for (i=0;i<N;i++) 176 { 177 x0=x[i-T+2]; 178 y[i] = x[i] 179 + MULT_COEF_32(g10,x2) 180 + MULT_COEF_32(g11,ADD32(x1,x3)) 181 + MULT_COEF_32(g12,ADD32(x0,x4)); 182 #ifdef FIXED_POINT 183 /* A bit of bias seems to help here. */ 184 y[i] = SUB32(y[i], 1); 185 #endif 186 y[i] = SATURATE(y[i], SIG_SAT); 187 x4=x3; 188 x3=x2; 189 x2=x1; 190 x1=x0; 191 } 192 193 } 194 #endif 195 #endif 196 197 #ifdef ENABLE_QEXT 198 void comb_filter_qext(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, 199 opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, 200 const celt_coef *window, int overlap, int arch) 201 { 202 VARDECL(opus_val32, mem_buf); 203 VARDECL(opus_val32, buf); 204 celt_coef new_window[120]; 205 int s; 206 int i; 207 int N2; 208 int overlap2; 209 SAVE_STACK; 210 /* Using ALLOC() instead of a regular stack allocation to minimize real stack use when using the pseudostack. 211 This is useful on some embedded systems. */ 212 ALLOC(mem_buf, COMBFILTER_MAXPERIOD+960, opus_val32); 213 ALLOC(buf, COMBFILTER_MAXPERIOD+960, opus_val32); 214 N2 = N/2; 215 overlap2=overlap/2; 216 /* At 96 kHz, we double the period and the spacing between taps, which is equivalent 217 to creating a mirror image of the filter around 24 kHz. It also means we can process 218 the even and odd samples completely independently. */ 219 for (s=0;s<2;s++) { 220 opus_val32 *yptr; 221 for (i=0;i<overlap2;i++) new_window[i] = window[2*i+s]; 222 for (i=0;i<COMBFILTER_MAXPERIOD+N2;i++) mem_buf[i] = x[2*i+s-2*COMBFILTER_MAXPERIOD]; 223 if (x==y) { 224 yptr = mem_buf+COMBFILTER_MAXPERIOD; 225 } else { 226 for (i=0;i<N2;i++) buf[i] = y[2*i+s]; 227 yptr = buf; 228 } 229 comb_filter(yptr, mem_buf+COMBFILTER_MAXPERIOD, T0, T1, N2, g0, g1, tapset0, tapset1, new_window, overlap2, arch); 230 for (i=0;i<N2;i++) y[2*i+s] = yptr[i]; 231 } 232 RESTORE_STACK; 233 return; 234 } 235 #endif 236 237 #ifndef OVERRIDE_comb_filter 238 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, 239 opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, 240 const celt_coef *window, int overlap, int arch) 241 { 242 int i; 243 /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */ 244 celt_coef g00, g01, g02, g10, g11, g12; 245 opus_val32 x0, x1, x2, x3, x4; 246 static const opus_val16 gains[3][3] = { 247 {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)}, 248 {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)}, 249 {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}}; 250 #ifdef ENABLE_QEXT 251 if (overlap==240) { 252 comb_filter_qext(y, x, T0, T1, N, g0, g1, tapset0, tapset1, window, overlap, arch); 253 return; 254 } 255 #endif 256 if (g0==0 && g1==0) 257 { 258 /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */ 259 if (x!=y) 260 OPUS_MOVE(y, x, N); 261 return; 262 } 263 /* When the gain is zero, T0 and/or T1 is set to zero. We need 264 to have then be at least 2 to avoid processing garbage data. */ 265 T0 = IMAX(T0, COMBFILTER_MINPERIOD); 266 T1 = IMAX(T1, COMBFILTER_MINPERIOD); 267 g00 = MULT_COEF_TAPS(g0, gains[tapset0][0]); 268 g01 = MULT_COEF_TAPS(g0, gains[tapset0][1]); 269 g02 = MULT_COEF_TAPS(g0, gains[tapset0][2]); 270 g10 = MULT_COEF_TAPS(g1, gains[tapset1][0]); 271 g11 = MULT_COEF_TAPS(g1, gains[tapset1][1]); 272 g12 = MULT_COEF_TAPS(g1, gains[tapset1][2]); 273 x1 = x[-T1+1]; 274 x2 = x[-T1 ]; 275 x3 = x[-T1-1]; 276 x4 = x[-T1-2]; 277 /* If the filter didn't change, we don't need the overlap */ 278 if (g0==g1 && T0==T1 && tapset0==tapset1) 279 overlap=0; 280 for (i=0;i<overlap;i++) 281 { 282 celt_coef f; 283 x0=x[i-T1+2]; 284 f = MULT_COEF(window[i],window[i]); 285 y[i] = x[i] 286 + MULT_COEF_32(MULT_COEF((COEF_ONE-f),g00),x[i-T0]) 287 + MULT_COEF_32(MULT_COEF((COEF_ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1])) 288 + MULT_COEF_32(MULT_COEF((COEF_ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2])) 289 + MULT_COEF_32(MULT_COEF(f,g10),x2) 290 + MULT_COEF_32(MULT_COEF(f,g11),ADD32(x1,x3)) 291 + MULT_COEF_32(MULT_COEF(f,g12),ADD32(x0,x4)); 292 #ifdef FIXED_POINT 293 /* A bit of bias seems to help here. */ 294 y[i] = SUB32(y[i], 3); 295 #endif 296 y[i] = SATURATE(y[i], SIG_SAT); 297 x4=x3; 298 x3=x2; 299 x2=x1; 300 x1=x0; 301 302 } 303 if (g1==0) 304 { 305 /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */ 306 if (x!=y) 307 OPUS_MOVE(y+overlap, x+overlap, N-overlap); 308 return; 309 } 310 311 /* Compute the part with the constant filter. */ 312 comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12, arch); 313 } 314 #endif /* OVERRIDE_comb_filter */ 315 316 /* TF change table. Positive values mean better frequency resolution (longer 317 effective window), whereas negative values mean better time resolution 318 (shorter effective window). The second index is computed as: 319 4*isTransient + 2*tf_select + per_band_flag */ 320 const signed char tf_select_table[4][8] = { 321 /*isTransient=0 isTransient=1 */ 322 {0, -1, 0, -1, 0,-1, 0,-1}, /* 2.5 ms */ 323 {0, -1, 0, -2, 1, 0, 1,-1}, /* 5 ms */ 324 {0, -2, 0, -3, 2, 0, 1,-1}, /* 10 ms */ 325 {0, -2, 0, -3, 3, 0, 1,-1}, /* 20 ms */ 326 }; 327 328 329 void init_caps(const CELTMode *m,int *cap,int LM,int C) 330 { 331 int i; 332 for (i=0;i<m->nbEBands;i++) 333 { 334 int N; 335 N=(m->eBands[i+1]-m->eBands[i])<<LM; 336 cap[i] = (m->cache.caps[m->nbEBands*(2*LM+C-1)+i]+64)*C*N>>2; 337 } 338 } 339 340 341 342 const char *opus_strerror(int error) 343 { 344 static const char * const error_strings[8] = { 345 "success", 346 "invalid argument", 347 "buffer too small", 348 "internal error", 349 "corrupted stream", 350 "request not implemented", 351 "invalid state", 352 "memory allocation failed" 353 }; 354 if (error > 0 || error < -7) 355 return "unknown error"; 356 else 357 return error_strings[-error]; 358 } 359 360 const char *opus_get_version_string(void) 361 { 362 return "libopus " PACKAGE_VERSION 363 /* Applications may rely on the presence of this substring in the version 364 string to determine if they have a fixed-point or floating-point build 365 at runtime. */ 366 #ifdef FIXED_POINT 367 "-fixed" 368 #endif 369 #ifdef FUZZING 370 "-fuzzing" 371 #endif 372 ; 373 }