tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

elf_mem_image.cc (13577B)


      1 // Copyright 2017 The Abseil Authors.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      https://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // Allow dynamic symbol lookup in an in-memory Elf image.
     16 //
     17 
     18 #include "absl/debugging/internal/elf_mem_image.h"
     19 
     20 #ifdef ABSL_HAVE_ELF_MEM_IMAGE  // defined in elf_mem_image.h
     21 
     22 #include <string.h>
     23 
     24 #include <cassert>
     25 #include <cstddef>
     26 #include <cstdint>
     27 
     28 #include "absl/base/config.h"
     29 #include "absl/base/internal/raw_logging.h"
     30 
     31 // From binutils/include/elf/common.h (this doesn't appear to be documented
     32 // anywhere else).
     33 //
     34 //   /* This flag appears in a Versym structure.  It means that the symbol
     35 //      is hidden, and is only visible with an explicit version number.
     36 //      This is a GNU extension.  */
     37 //   #define VERSYM_HIDDEN           0x8000
     38 //
     39 //   /* This is the mask for the rest of the Versym information.  */
     40 //   #define VERSYM_VERSION          0x7fff
     41 
     42 #define VERSYM_VERSION 0x7fff
     43 
     44 namespace absl {
     45 ABSL_NAMESPACE_BEGIN
     46 namespace debugging_internal {
     47 
     48 namespace {
     49 
     50 #if __SIZEOF_POINTER__ == 4
     51 const int kElfClass = ELFCLASS32;
     52 int ElfBind(const ElfW(Sym) *symbol) { return ELF32_ST_BIND(symbol->st_info); }
     53 int ElfType(const ElfW(Sym) *symbol) { return ELF32_ST_TYPE(symbol->st_info); }
     54 #elif __SIZEOF_POINTER__ == 8
     55 const int kElfClass = ELFCLASS64;
     56 int ElfBind(const ElfW(Sym) *symbol) { return ELF64_ST_BIND(symbol->st_info); }
     57 int ElfType(const ElfW(Sym) *symbol) { return ELF64_ST_TYPE(symbol->st_info); }
     58 #else
     59 const int kElfClass = -1;
     60 int ElfBind(const ElfW(Sym) *) {
     61  ABSL_RAW_LOG(FATAL, "Unexpected word size");
     62  return 0;
     63 }
     64 int ElfType(const ElfW(Sym) *) {
     65  ABSL_RAW_LOG(FATAL, "Unexpected word size");
     66  return 0;
     67 }
     68 #endif
     69 
     70 // Extract an element from one of the ELF tables, cast it to desired type.
     71 // This is just a simple arithmetic and a glorified cast.
     72 // Callers are responsible for bounds checking.
     73 template <typename T>
     74 const T *GetTableElement(const ElfW(Ehdr) * ehdr, ElfW(Off) table_offset,
     75                         ElfW(Word) element_size, size_t index) {
     76  return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr)
     77                                    + table_offset
     78                                    + index * element_size);
     79 }
     80 
     81 }  // namespace
     82 
     83 // The value of this variable doesn't matter; it's used only for its
     84 // unique address.
     85 const int ElfMemImage::kInvalidBaseSentinel = 0;
     86 
     87 ElfMemImage::ElfMemImage(const void *base) {
     88  ABSL_RAW_CHECK(base != kInvalidBase, "bad pointer");
     89  Init(base);
     90 }
     91 
     92 uint32_t ElfMemImage::GetNumSymbols() const { return num_syms_; }
     93 
     94 const ElfW(Sym) * ElfMemImage::GetDynsym(uint32_t index) const {
     95  ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
     96  return dynsym_ + index;
     97 }
     98 
     99 const ElfW(Versym) *ElfMemImage::GetVersym(uint32_t index) const {
    100  ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
    101  return versym_ + index;
    102 }
    103 
    104 const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const {
    105  ABSL_RAW_CHECK(index >= 0 && index < ehdr_->e_phnum, "index out of range");
    106  return GetTableElement<ElfW(Phdr)>(ehdr_, ehdr_->e_phoff, ehdr_->e_phentsize,
    107                                     static_cast<size_t>(index));
    108 }
    109 
    110 const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const {
    111  ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
    112  return dynstr_ + offset;
    113 }
    114 
    115 const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const {
    116  if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) {
    117    // Symbol corresponds to "special" (e.g. SHN_ABS) section.
    118    return reinterpret_cast<const void *>(sym->st_value);
    119  }
    120  ABSL_RAW_CHECK(link_base_ < sym->st_value, "symbol out of range");
    121  return GetTableElement<char>(ehdr_, 0, 1, sym->st_value - link_base_);
    122 }
    123 
    124 const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const {
    125  ABSL_RAW_CHECK(0 <= index && static_cast<size_t>(index) <= verdefnum_,
    126                 "index out of range");
    127  const ElfW(Verdef) *version_definition = verdef_;
    128  while (version_definition->vd_ndx < index && version_definition->vd_next) {
    129    const char *const version_definition_as_char =
    130        reinterpret_cast<const char *>(version_definition);
    131    version_definition =
    132        reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char +
    133                                               version_definition->vd_next);
    134  }
    135  return version_definition->vd_ndx == index ? version_definition : nullptr;
    136 }
    137 
    138 const ElfW(Verdaux) *ElfMemImage::GetVerdefAux(
    139    const ElfW(Verdef) *verdef) const {
    140  return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1);
    141 }
    142 
    143 const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const {
    144  ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
    145  return dynstr_ + offset;
    146 }
    147 
    148 void ElfMemImage::Init(const void *base) {
    149  ehdr_      = nullptr;
    150  dynsym_    = nullptr;
    151  dynstr_    = nullptr;
    152  versym_    = nullptr;
    153  verdef_    = nullptr;
    154  num_syms_ = 0;
    155  strsize_   = 0;
    156  verdefnum_ = 0;
    157  // Sentinel: PT_LOAD .p_vaddr can't possibly be this.
    158  link_base_ = ~ElfW(Addr){0};  // NOLINT(readability/braces)
    159  if (!base) {
    160    return;
    161  }
    162  const char *const base_as_char = reinterpret_cast<const char *>(base);
    163  if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 ||
    164      base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) {
    165    assert(false);
    166    return;
    167  }
    168  int elf_class = base_as_char[EI_CLASS];
    169  if (elf_class != kElfClass) {
    170    assert(false);
    171    return;
    172  }
    173  switch (base_as_char[EI_DATA]) {
    174    case ELFDATA2LSB: {
    175 #ifndef ABSL_IS_LITTLE_ENDIAN
    176      assert(false);
    177      return;
    178 #endif
    179      break;
    180    }
    181    case ELFDATA2MSB: {
    182 #ifndef ABSL_IS_BIG_ENDIAN
    183      assert(false);
    184      return;
    185 #endif
    186      break;
    187    }
    188    default: {
    189      assert(false);
    190      return;
    191    }
    192  }
    193 
    194  ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base);
    195  const ElfW(Phdr) *dynamic_program_header = nullptr;
    196  for (int i = 0; i < ehdr_->e_phnum; ++i) {
    197    const ElfW(Phdr) *const program_header = GetPhdr(i);
    198    switch (program_header->p_type) {
    199      case PT_LOAD:
    200        if (!~link_base_) {
    201          link_base_ = program_header->p_vaddr;
    202        }
    203        break;
    204      case PT_DYNAMIC:
    205        dynamic_program_header = program_header;
    206        break;
    207    }
    208  }
    209  if (!~link_base_ || !dynamic_program_header) {
    210    assert(false);
    211    // Mark this image as not present. Can not recur infinitely.
    212    Init(nullptr);
    213    return;
    214  }
    215  ptrdiff_t relocation =
    216      base_as_char - reinterpret_cast<const char *>(link_base_);
    217  ElfW(Dyn)* dynamic_entry = reinterpret_cast<ElfW(Dyn)*>(
    218      static_cast<intptr_t>(dynamic_program_header->p_vaddr) + relocation);
    219  uint32_t *sysv_hash = nullptr;
    220  uint32_t *gnu_hash = nullptr;
    221  for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
    222    const auto value =
    223        static_cast<intptr_t>(dynamic_entry->d_un.d_val) + relocation;
    224    switch (dynamic_entry->d_tag) {
    225      case DT_HASH:
    226        sysv_hash = reinterpret_cast<uint32_t *>(value);
    227        break;
    228      case DT_GNU_HASH:
    229        gnu_hash = reinterpret_cast<uint32_t *>(value);
    230        break;
    231      case DT_SYMTAB:
    232        dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
    233        break;
    234      case DT_STRTAB:
    235        dynstr_ = reinterpret_cast<const char *>(value);
    236        break;
    237      case DT_VERSYM:
    238        versym_ = reinterpret_cast<ElfW(Versym) *>(value);
    239        break;
    240      case DT_VERDEF:
    241        verdef_ = reinterpret_cast<ElfW(Verdef) *>(value);
    242        break;
    243      case DT_VERDEFNUM:
    244        verdefnum_ = static_cast<size_t>(dynamic_entry->d_un.d_val);
    245        break;
    246      case DT_STRSZ:
    247        strsize_ = static_cast<size_t>(dynamic_entry->d_un.d_val);
    248        break;
    249      default:
    250        // Unrecognized entries explicitly ignored.
    251        break;
    252    }
    253  }
    254  if ((!sysv_hash && !gnu_hash) || !dynsym_ || !dynstr_ || !versym_ ||
    255      !verdef_ || !verdefnum_ || !strsize_) {
    256    assert(false);  // invalid VDSO
    257    // Mark this image as not present. Can not recur infinitely.
    258    Init(nullptr);
    259    return;
    260  }
    261  if (sysv_hash) {
    262    num_syms_ = sysv_hash[1];
    263  } else {
    264    assert(gnu_hash);
    265    // Compute the number of symbols for DT_GNU_HASH, which is specified by
    266    // https://sourceware.org/gnu-gabi/program-loading-and-dynamic-linking.txt
    267    uint32_t nbuckets = gnu_hash[0];
    268    // The buckets array is located after the header (4 uint32) and the bloom
    269    // filter (size_t array of gnu_hash[2] elements).
    270    uint32_t *buckets = gnu_hash + 4 + sizeof(size_t) / 4 * gnu_hash[2];
    271    // Find the chain of the last non-empty bucket.
    272    uint32_t idx = 0;
    273    for (uint32_t i = nbuckets; i > 0;) {
    274      idx = buckets[--i];
    275      if (idx != 0) break;
    276    }
    277    if (idx != 0) {
    278      // Find the last element of the chain, which has an odd value.
    279      // Add one to get the number of symbols.
    280      uint32_t *chain = buckets + nbuckets - gnu_hash[1];
    281      while (chain[idx++] % 2 == 0) {
    282      }
    283    }
    284    num_syms_ = idx;
    285  }
    286 }
    287 
    288 bool ElfMemImage::LookupSymbol(const char *name,
    289                               const char *version,
    290                               int type,
    291                               SymbolInfo *info_out) const {
    292  for (const SymbolInfo& info : *this) {
    293    if (strcmp(info.name, name) == 0 && strcmp(info.version, version) == 0 &&
    294        ElfType(info.symbol) == type) {
    295      if (info_out) {
    296        *info_out = info;
    297      }
    298      return true;
    299    }
    300  }
    301  return false;
    302 }
    303 
    304 bool ElfMemImage::LookupSymbolByAddress(const void *address,
    305                                        SymbolInfo *info_out) const {
    306  for (const SymbolInfo& info : *this) {
    307    const char *const symbol_start =
    308        reinterpret_cast<const char *>(info.address);
    309    const char *const symbol_end = symbol_start + info.symbol->st_size;
    310    if (symbol_start <= address && address < symbol_end) {
    311      if (info_out) {
    312        // Client wants to know details for that symbol (the usual case).
    313        if (ElfBind(info.symbol) == STB_GLOBAL) {
    314          // Strong symbol; just return it.
    315          *info_out = info;
    316          return true;
    317        } else {
    318          // Weak or local. Record it, but keep looking for a strong one.
    319          *info_out = info;
    320        }
    321      } else {
    322        // Client only cares if there is an overlapping symbol.
    323        return true;
    324      }
    325    }
    326  }
    327  return false;
    328 }
    329 
    330 ElfMemImage::SymbolIterator::SymbolIterator(const void *const image,
    331                                            uint32_t index)
    332    : index_(index), image_(image) {}
    333 
    334 const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
    335  return &info_;
    336 }
    337 
    338 const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const {
    339  return info_;
    340 }
    341 
    342 bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const {
    343  return this->image_ == rhs.image_ && this->index_ == rhs.index_;
    344 }
    345 
    346 bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const {
    347  return !(*this == rhs);
    348 }
    349 
    350 ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() {
    351  this->Update(1);
    352  return *this;
    353 }
    354 
    355 ElfMemImage::SymbolIterator ElfMemImage::begin() const {
    356  SymbolIterator it(this, 0);
    357  it.Update(0);
    358  return it;
    359 }
    360 
    361 ElfMemImage::SymbolIterator ElfMemImage::end() const {
    362  return SymbolIterator(this, GetNumSymbols());
    363 }
    364 
    365 void ElfMemImage::SymbolIterator::Update(uint32_t increment) {
    366  const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
    367  ABSL_RAW_CHECK(image->IsPresent() || increment == 0, "");
    368  if (!image->IsPresent()) {
    369    return;
    370  }
    371  index_ += increment;
    372  if (index_ >= image->GetNumSymbols()) {
    373    index_ = image->GetNumSymbols();
    374    return;
    375  }
    376  const ElfW(Sym)    *symbol = image->GetDynsym(index_);
    377  const ElfW(Versym) *version_symbol = image->GetVersym(index_);
    378  ABSL_RAW_CHECK(symbol && version_symbol, "");
    379  const char *const symbol_name = image->GetDynstr(symbol->st_name);
    380 #if defined(__NetBSD__)
    381  const int version_index = version_symbol->vs_vers & VERSYM_VERSION;
    382 #else
    383  const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION;
    384 #endif
    385  const ElfW(Verdef) *version_definition = nullptr;
    386  const char *version_name = "";
    387  if (symbol->st_shndx == SHN_UNDEF) {
    388    // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and
    389    // version_index could well be greater than verdefnum_, so calling
    390    // GetVerdef(version_index) may trigger assertion.
    391  } else {
    392    version_definition = image->GetVerdef(version_index);
    393  }
    394  if (version_definition) {
    395    // I am expecting 1 or 2 auxiliary entries: 1 for the version itself,
    396    // optional 2nd if the version has a parent.
    397    ABSL_RAW_CHECK(
    398        version_definition->vd_cnt == 1 || version_definition->vd_cnt == 2,
    399        "wrong number of entries");
    400    const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition);
    401    version_name = image->GetVerstr(version_aux->vda_name);
    402  }
    403  info_.name    = symbol_name;
    404  info_.version = version_name;
    405  info_.address = image->GetSymAddr(symbol);
    406  info_.symbol  = symbol;
    407 }
    408 
    409 }  // namespace debugging_internal
    410 ABSL_NAMESPACE_END
    411 }  // namespace absl
    412 
    413 #endif  // ABSL_HAVE_ELF_MEM_IMAGE