mem.c (13099B)
1 /* 2 * default memory allocator for libavutil 3 * Copyright (c) 2002 Fabrice Bellard 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22 /** 23 * @file 24 * default memory allocator for libavutil 25 */ 26 27 #define _XOPEN_SOURCE 600 28 29 #include "config.h" 30 31 #include <limits.h> 32 #include <stdint.h> 33 #include <stdlib.h> 34 #include <stdatomic.h> 35 #include <string.h> 36 #if HAVE_MALLOC_H 37 #include <malloc.h> 38 #endif 39 40 #include "attributes.h" 41 #include "avassert.h" 42 #include "dynarray.h" 43 #include "error.h" 44 #include "internal.h" 45 #include "intreadwrite.h" 46 #include "macros.h" 47 #include "mem.h" 48 49 #ifdef MALLOC_PREFIX 50 51 #define malloc AV_JOIN(MALLOC_PREFIX, malloc) 52 #define memalign AV_JOIN(MALLOC_PREFIX, memalign) 53 #define posix_memalign AV_JOIN(MALLOC_PREFIX, posix_memalign) 54 #define realloc AV_JOIN(MALLOC_PREFIX, realloc) 55 #define free AV_JOIN(MALLOC_PREFIX, free) 56 57 void *malloc(size_t size); 58 void *memalign(size_t align, size_t size); 59 int posix_memalign(void **ptr, size_t align, size_t size); 60 void *realloc(void *ptr, size_t size); 61 void free(void *ptr); 62 63 #endif /* MALLOC_PREFIX */ 64 65 #define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : (HAVE_SIMD_ALIGN_32 ? 32 : 16)) 66 67 #define FF_MEMORY_POISON 0x2a 68 69 /* NOTE: if you want to override these functions with your own 70 * implementations (not recommended) you have to link libav* as 71 * dynamic libraries and remove -Wl,-Bsymbolic from the linker flags. 72 * Note that this will cost performance. */ 73 74 static atomic_size_t max_alloc_size = INT_MAX; 75 76 void av_max_alloc(size_t max){ 77 atomic_store_explicit(&max_alloc_size, max, memory_order_relaxed); 78 } 79 80 static int size_mult(size_t a, size_t b, size_t *r) 81 { 82 size_t t; 83 84 #if (!defined(__INTEL_COMPILER) && AV_GCC_VERSION_AT_LEAST(5,1)) || AV_HAS_BUILTIN(__builtin_mul_overflow) 85 if (__builtin_mul_overflow(a, b, &t)) 86 return AVERROR(EINVAL); 87 #else 88 t = a * b; 89 /* Hack inspired from glibc: don't try the division if nelem and elsize 90 * are both less than sqrt(SIZE_MAX). */ 91 if ((a | b) >= ((size_t)1 << (sizeof(size_t) * 4)) && a && t / a != b) 92 return AVERROR(EINVAL); 93 #endif 94 *r = t; 95 return 0; 96 } 97 98 void *av_malloc(size_t size) 99 { 100 void *ptr = NULL; 101 102 if (size > atomic_load_explicit(&max_alloc_size, memory_order_relaxed)) 103 return NULL; 104 105 #if HAVE_POSIX_MEMALIGN 106 if (size) //OS X on SDK 10.6 has a broken posix_memalign implementation 107 if (posix_memalign(&ptr, ALIGN, size)) 108 ptr = NULL; 109 #elif HAVE_ALIGNED_MALLOC 110 ptr = _aligned_malloc(size, ALIGN); 111 #elif HAVE_MEMALIGN 112 #ifndef __DJGPP__ 113 ptr = memalign(ALIGN, size); 114 #else 115 ptr = memalign(size, ALIGN); 116 #endif 117 /* Why 64? 118 * Indeed, we should align it: 119 * on 4 for 386 120 * on 16 for 486 121 * on 32 for 586, PPro - K6-III 122 * on 64 for K7 (maybe for P3 too). 123 * Because L1 and L2 caches are aligned on those values. 124 * But I don't want to code such logic here! 125 */ 126 /* Why 32? 127 * For AVX ASM. SSE / NEON needs only 16. 128 * Why not larger? Because I did not see a difference in benchmarks ... 129 */ 130 /* benchmarks with P3 131 * memalign(64) + 1 3071, 3051, 3032 132 * memalign(64) + 2 3051, 3032, 3041 133 * memalign(64) + 4 2911, 2896, 2915 134 * memalign(64) + 8 2545, 2554, 2550 135 * memalign(64) + 16 2543, 2572, 2563 136 * memalign(64) + 32 2546, 2545, 2571 137 * memalign(64) + 64 2570, 2533, 2558 138 * 139 * BTW, malloc seems to do 8-byte alignment by default here. 140 */ 141 #else 142 ptr = malloc(size); 143 #endif 144 if(!ptr && !size) { 145 size = 1; 146 ptr= av_malloc(1); 147 } 148 #if CONFIG_MEMORY_POISONING 149 if (ptr) 150 memset(ptr, FF_MEMORY_POISON, size); 151 #endif 152 return ptr; 153 } 154 155 void *av_realloc(void *ptr, size_t size) 156 { 157 void *ret; 158 if (size > atomic_load_explicit(&max_alloc_size, memory_order_relaxed)) 159 return NULL; 160 161 #if HAVE_ALIGNED_MALLOC 162 ret = _aligned_realloc(ptr, size + !size, ALIGN); 163 #else 164 ret = realloc(ptr, size + !size); 165 #endif 166 #if CONFIG_MEMORY_POISONING 167 if (ret && !ptr) 168 memset(ret, FF_MEMORY_POISON, size); 169 #endif 170 return ret; 171 } 172 173 void *av_realloc_f(void *ptr, size_t nelem, size_t elsize) 174 { 175 size_t size; 176 void *r; 177 178 if (size_mult(elsize, nelem, &size)) { 179 av_free(ptr); 180 return NULL; 181 } 182 r = av_realloc(ptr, size); 183 if (!r) 184 av_free(ptr); 185 return r; 186 } 187 188 int av_reallocp(void *ptr, size_t size) 189 { 190 void *val; 191 192 if (!size) { 193 av_freep(ptr); 194 return 0; 195 } 196 197 memcpy(&val, ptr, sizeof(val)); 198 val = av_realloc(val, size); 199 200 if (!val) { 201 av_freep(ptr); 202 return AVERROR(ENOMEM); 203 } 204 205 memcpy(ptr, &val, sizeof(val)); 206 return 0; 207 } 208 209 void *av_malloc_array(size_t nmemb, size_t size) 210 { 211 size_t result; 212 if (size_mult(nmemb, size, &result) < 0) 213 return NULL; 214 return av_malloc(result); 215 } 216 217 void *av_realloc_array(void *ptr, size_t nmemb, size_t size) 218 { 219 size_t result; 220 if (size_mult(nmemb, size, &result) < 0) 221 return NULL; 222 return av_realloc(ptr, result); 223 } 224 225 int av_reallocp_array(void *ptr, size_t nmemb, size_t size) 226 { 227 void *val; 228 229 memcpy(&val, ptr, sizeof(val)); 230 val = av_realloc_f(val, nmemb, size); 231 memcpy(ptr, &val, sizeof(val)); 232 if (!val && nmemb && size) 233 return AVERROR(ENOMEM); 234 235 return 0; 236 } 237 238 void av_free(void *ptr) 239 { 240 #if HAVE_ALIGNED_MALLOC 241 _aligned_free(ptr); 242 #else 243 free(ptr); 244 #endif 245 } 246 247 void av_freep(void *arg) 248 { 249 void *val; 250 251 memcpy(&val, arg, sizeof(val)); 252 memcpy(arg, &(void *){ NULL }, sizeof(val)); 253 av_free(val); 254 } 255 256 void *av_mallocz(size_t size) 257 { 258 void *ptr = av_malloc(size); 259 if (ptr) 260 memset(ptr, 0, size); 261 return ptr; 262 } 263 264 void *av_calloc(size_t nmemb, size_t size) 265 { 266 size_t result; 267 if (size_mult(nmemb, size, &result) < 0) 268 return NULL; 269 return av_mallocz(result); 270 } 271 272 char *av_strdup(const char *s) 273 { 274 char *ptr = NULL; 275 if (s) { 276 size_t len = strlen(s) + 1; 277 ptr = av_realloc(NULL, len); 278 if (ptr) 279 memcpy(ptr, s, len); 280 } 281 return ptr; 282 } 283 284 char *av_strndup(const char *s, size_t len) 285 { 286 char *ret = NULL, *end; 287 288 if (!s) 289 return NULL; 290 291 end = memchr(s, 0, len); 292 if (end) 293 len = end - s; 294 295 ret = av_realloc(NULL, len + 1); 296 if (!ret) 297 return NULL; 298 299 memcpy(ret, s, len); 300 ret[len] = 0; 301 return ret; 302 } 303 304 void *av_memdup(const void *p, size_t size) 305 { 306 void *ptr = NULL; 307 if (p) { 308 ptr = av_malloc(size); 309 if (ptr) 310 memcpy(ptr, p, size); 311 } 312 return ptr; 313 } 314 315 int av_dynarray_add_nofree(void *tab_ptr, int *nb_ptr, void *elem) 316 { 317 void **tab; 318 memcpy(&tab, tab_ptr, sizeof(tab)); 319 320 FF_DYNARRAY_ADD(INT_MAX, sizeof(*tab), tab, *nb_ptr, { 321 tab[*nb_ptr] = elem; 322 memcpy(tab_ptr, &tab, sizeof(tab)); 323 }, { 324 return AVERROR(ENOMEM); 325 }); 326 return 0; 327 } 328 329 void av_dynarray_add(void *tab_ptr, int *nb_ptr, void *elem) 330 { 331 void **tab; 332 memcpy(&tab, tab_ptr, sizeof(tab)); 333 334 FF_DYNARRAY_ADD(INT_MAX, sizeof(*tab), tab, *nb_ptr, { 335 tab[*nb_ptr] = elem; 336 memcpy(tab_ptr, &tab, sizeof(tab)); 337 }, { 338 *nb_ptr = 0; 339 av_freep(tab_ptr); 340 }); 341 } 342 343 void *av_dynarray2_add(void **tab_ptr, int *nb_ptr, size_t elem_size, 344 const uint8_t *elem_data) 345 { 346 uint8_t *tab_elem_data = NULL; 347 348 FF_DYNARRAY_ADD(INT_MAX, elem_size, *tab_ptr, *nb_ptr, { 349 tab_elem_data = (uint8_t *)*tab_ptr + (*nb_ptr) * elem_size; 350 if (elem_data) 351 memcpy(tab_elem_data, elem_data, elem_size); 352 else if (CONFIG_MEMORY_POISONING) 353 memset(tab_elem_data, FF_MEMORY_POISON, elem_size); 354 }, { 355 av_freep(tab_ptr); 356 *nb_ptr = 0; 357 }); 358 return tab_elem_data; 359 } 360 361 static void fill16(uint8_t *dst, int len) 362 { 363 uint32_t v = AV_RN16(dst - 2); 364 365 v |= v << 16; 366 367 while (len >= 4) { 368 AV_WN32(dst, v); 369 dst += 4; 370 len -= 4; 371 } 372 373 while (len--) { 374 *dst = dst[-2]; 375 dst++; 376 } 377 } 378 379 static void fill24(uint8_t *dst, int len) 380 { 381 #if HAVE_BIGENDIAN 382 uint32_t v = AV_RB24(dst - 3); 383 uint32_t a = v << 8 | v >> 16; 384 uint32_t b = v << 16 | v >> 8; 385 uint32_t c = v << 24 | v; 386 #else 387 uint32_t v = AV_RL24(dst - 3); 388 uint32_t a = v | v << 24; 389 uint32_t b = v >> 8 | v << 16; 390 uint32_t c = v >> 16 | v << 8; 391 #endif 392 393 while (len >= 12) { 394 AV_WN32(dst, a); 395 AV_WN32(dst + 4, b); 396 AV_WN32(dst + 8, c); 397 dst += 12; 398 len -= 12; 399 } 400 401 if (len >= 4) { 402 AV_WN32(dst, a); 403 dst += 4; 404 len -= 4; 405 } 406 407 if (len >= 4) { 408 AV_WN32(dst, b); 409 dst += 4; 410 len -= 4; 411 } 412 413 while (len--) { 414 *dst = dst[-3]; 415 dst++; 416 } 417 } 418 419 static void fill32(uint8_t *dst, int len) 420 { 421 uint32_t v = AV_RN32(dst - 4); 422 423 #if HAVE_FAST_64BIT 424 uint64_t v2= v + ((uint64_t)v<<32); 425 while (len >= 32) { 426 AV_WN64(dst , v2); 427 AV_WN64(dst+ 8, v2); 428 AV_WN64(dst+16, v2); 429 AV_WN64(dst+24, v2); 430 dst += 32; 431 len -= 32; 432 } 433 #endif 434 435 while (len >= 4) { 436 AV_WN32(dst, v); 437 dst += 4; 438 len -= 4; 439 } 440 441 while (len--) { 442 *dst = dst[-4]; 443 dst++; 444 } 445 } 446 447 void av_memcpy_backptr(uint8_t *dst, int back, int cnt) 448 { 449 const uint8_t *src = &dst[-back]; 450 if (!back) 451 return; 452 453 if (back == 1) { 454 memset(dst, *src, cnt); 455 } else if (back == 2) { 456 fill16(dst, cnt); 457 } else if (back == 3) { 458 fill24(dst, cnt); 459 } else if (back == 4) { 460 fill32(dst, cnt); 461 } else { 462 if (cnt >= 16) { 463 int blocklen = back; 464 while (cnt > blocklen) { 465 memcpy(dst, src, blocklen); 466 dst += blocklen; 467 cnt -= blocklen; 468 blocklen <<= 1; 469 } 470 memcpy(dst, src, cnt); 471 return; 472 } 473 if (cnt >= 8) { 474 AV_COPY32U(dst, src); 475 AV_COPY32U(dst + 4, src + 4); 476 src += 8; 477 dst += 8; 478 cnt -= 8; 479 } 480 if (cnt >= 4) { 481 AV_COPY32U(dst, src); 482 src += 4; 483 dst += 4; 484 cnt -= 4; 485 } 486 if (cnt >= 2) { 487 AV_COPY16U(dst, src); 488 src += 2; 489 dst += 2; 490 cnt -= 2; 491 } 492 if (cnt) 493 *dst = *src; 494 } 495 } 496 497 void *av_fast_realloc(void *ptr, unsigned int *size, size_t min_size) 498 { 499 size_t max_size; 500 501 if (min_size <= *size) 502 return ptr; 503 504 max_size = atomic_load_explicit(&max_alloc_size, memory_order_relaxed); 505 /* *size is an unsigned, so the real maximum is <= UINT_MAX. */ 506 max_size = FFMIN(max_size, UINT_MAX); 507 508 if (min_size > max_size) { 509 *size = 0; 510 return NULL; 511 } 512 513 min_size = FFMIN(max_size, FFMAX(min_size + min_size / 16 + 32, min_size)); 514 515 ptr = av_realloc(ptr, min_size); 516 /* we could set this to the unmodified min_size but this is safer 517 * if the user lost the ptr and uses NULL now 518 */ 519 if (!ptr) 520 min_size = 0; 521 522 *size = min_size; 523 524 return ptr; 525 } 526 527 static inline void fast_malloc(void *ptr, unsigned int *size, size_t min_size, int zero_realloc) 528 { 529 size_t max_size; 530 void *val; 531 532 memcpy(&val, ptr, sizeof(val)); 533 if (min_size <= *size) { 534 av_assert0(val || !min_size); 535 return; 536 } 537 538 max_size = atomic_load_explicit(&max_alloc_size, memory_order_relaxed); 539 /* *size is an unsigned, so the real maximum is <= UINT_MAX. */ 540 max_size = FFMIN(max_size, UINT_MAX); 541 542 if (min_size > max_size) { 543 av_freep(ptr); 544 *size = 0; 545 return; 546 } 547 min_size = FFMIN(max_size, FFMAX(min_size + min_size / 16 + 32, min_size)); 548 av_freep(ptr); 549 val = zero_realloc ? av_mallocz(min_size) : av_malloc(min_size); 550 memcpy(ptr, &val, sizeof(val)); 551 if (!val) 552 min_size = 0; 553 *size = min_size; 554 return; 555 } 556 557 void av_fast_malloc(void *ptr, unsigned int *size, size_t min_size) 558 { 559 fast_malloc(ptr, size, min_size, 0); 560 } 561 562 void av_fast_mallocz(void *ptr, unsigned int *size, size_t min_size) 563 { 564 fast_malloc(ptr, size, min_size, 1); 565 } 566 567 int av_size_mult(size_t a, size_t b, size_t *r) 568 { 569 return size_mult(a, b, r); 570 }