elf_mem_image.cc (13577B)
1 // Copyright 2017 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Allow dynamic symbol lookup in an in-memory Elf image. 16 // 17 18 #include "absl/debugging/internal/elf_mem_image.h" 19 20 #ifdef ABSL_HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h 21 22 #include <string.h> 23 24 #include <cassert> 25 #include <cstddef> 26 #include <cstdint> 27 28 #include "absl/base/config.h" 29 #include "absl/base/internal/raw_logging.h" 30 31 // From binutils/include/elf/common.h (this doesn't appear to be documented 32 // anywhere else). 33 // 34 // /* This flag appears in a Versym structure. It means that the symbol 35 // is hidden, and is only visible with an explicit version number. 36 // This is a GNU extension. */ 37 // #define VERSYM_HIDDEN 0x8000 38 // 39 // /* This is the mask for the rest of the Versym information. */ 40 // #define VERSYM_VERSION 0x7fff 41 42 #define VERSYM_VERSION 0x7fff 43 44 namespace absl { 45 ABSL_NAMESPACE_BEGIN 46 namespace debugging_internal { 47 48 namespace { 49 50 #if __SIZEOF_POINTER__ == 4 51 const int kElfClass = ELFCLASS32; 52 int ElfBind(const ElfW(Sym) *symbol) { return ELF32_ST_BIND(symbol->st_info); } 53 int ElfType(const ElfW(Sym) *symbol) { return ELF32_ST_TYPE(symbol->st_info); } 54 #elif __SIZEOF_POINTER__ == 8 55 const int kElfClass = ELFCLASS64; 56 int ElfBind(const ElfW(Sym) *symbol) { return ELF64_ST_BIND(symbol->st_info); } 57 int ElfType(const ElfW(Sym) *symbol) { return ELF64_ST_TYPE(symbol->st_info); } 58 #else 59 const int kElfClass = -1; 60 int ElfBind(const ElfW(Sym) *) { 61 ABSL_RAW_LOG(FATAL, "Unexpected word size"); 62 return 0; 63 } 64 int ElfType(const ElfW(Sym) *) { 65 ABSL_RAW_LOG(FATAL, "Unexpected word size"); 66 return 0; 67 } 68 #endif 69 70 // Extract an element from one of the ELF tables, cast it to desired type. 71 // This is just a simple arithmetic and a glorified cast. 72 // Callers are responsible for bounds checking. 73 template <typename T> 74 const T *GetTableElement(const ElfW(Ehdr) * ehdr, ElfW(Off) table_offset, 75 ElfW(Word) element_size, size_t index) { 76 return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr) 77 + table_offset 78 + index * element_size); 79 } 80 81 } // namespace 82 83 // The value of this variable doesn't matter; it's used only for its 84 // unique address. 85 const int ElfMemImage::kInvalidBaseSentinel = 0; 86 87 ElfMemImage::ElfMemImage(const void *base) { 88 ABSL_RAW_CHECK(base != kInvalidBase, "bad pointer"); 89 Init(base); 90 } 91 92 uint32_t ElfMemImage::GetNumSymbols() const { return num_syms_; } 93 94 const ElfW(Sym) * ElfMemImage::GetDynsym(uint32_t index) const { 95 ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range"); 96 return dynsym_ + index; 97 } 98 99 const ElfW(Versym) *ElfMemImage::GetVersym(uint32_t index) const { 100 ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range"); 101 return versym_ + index; 102 } 103 104 const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const { 105 ABSL_RAW_CHECK(index >= 0 && index < ehdr_->e_phnum, "index out of range"); 106 return GetTableElement<ElfW(Phdr)>(ehdr_, ehdr_->e_phoff, ehdr_->e_phentsize, 107 static_cast<size_t>(index)); 108 } 109 110 const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const { 111 ABSL_RAW_CHECK(offset < strsize_, "offset out of range"); 112 return dynstr_ + offset; 113 } 114 115 const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const { 116 if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) { 117 // Symbol corresponds to "special" (e.g. SHN_ABS) section. 118 return reinterpret_cast<const void *>(sym->st_value); 119 } 120 ABSL_RAW_CHECK(link_base_ < sym->st_value, "symbol out of range"); 121 return GetTableElement<char>(ehdr_, 0, 1, sym->st_value - link_base_); 122 } 123 124 const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const { 125 ABSL_RAW_CHECK(0 <= index && static_cast<size_t>(index) <= verdefnum_, 126 "index out of range"); 127 const ElfW(Verdef) *version_definition = verdef_; 128 while (version_definition->vd_ndx < index && version_definition->vd_next) { 129 const char *const version_definition_as_char = 130 reinterpret_cast<const char *>(version_definition); 131 version_definition = 132 reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char + 133 version_definition->vd_next); 134 } 135 return version_definition->vd_ndx == index ? version_definition : nullptr; 136 } 137 138 const ElfW(Verdaux) *ElfMemImage::GetVerdefAux( 139 const ElfW(Verdef) *verdef) const { 140 return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1); 141 } 142 143 const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const { 144 ABSL_RAW_CHECK(offset < strsize_, "offset out of range"); 145 return dynstr_ + offset; 146 } 147 148 void ElfMemImage::Init(const void *base) { 149 ehdr_ = nullptr; 150 dynsym_ = nullptr; 151 dynstr_ = nullptr; 152 versym_ = nullptr; 153 verdef_ = nullptr; 154 num_syms_ = 0; 155 strsize_ = 0; 156 verdefnum_ = 0; 157 // Sentinel: PT_LOAD .p_vaddr can't possibly be this. 158 link_base_ = ~ElfW(Addr){0}; // NOLINT(readability/braces) 159 if (!base) { 160 return; 161 } 162 const char *const base_as_char = reinterpret_cast<const char *>(base); 163 if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 || 164 base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) { 165 assert(false); 166 return; 167 } 168 int elf_class = base_as_char[EI_CLASS]; 169 if (elf_class != kElfClass) { 170 assert(false); 171 return; 172 } 173 switch (base_as_char[EI_DATA]) { 174 case ELFDATA2LSB: { 175 #ifndef ABSL_IS_LITTLE_ENDIAN 176 assert(false); 177 return; 178 #endif 179 break; 180 } 181 case ELFDATA2MSB: { 182 #ifndef ABSL_IS_BIG_ENDIAN 183 assert(false); 184 return; 185 #endif 186 break; 187 } 188 default: { 189 assert(false); 190 return; 191 } 192 } 193 194 ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base); 195 const ElfW(Phdr) *dynamic_program_header = nullptr; 196 for (int i = 0; i < ehdr_->e_phnum; ++i) { 197 const ElfW(Phdr) *const program_header = GetPhdr(i); 198 switch (program_header->p_type) { 199 case PT_LOAD: 200 if (!~link_base_) { 201 link_base_ = program_header->p_vaddr; 202 } 203 break; 204 case PT_DYNAMIC: 205 dynamic_program_header = program_header; 206 break; 207 } 208 } 209 if (!~link_base_ || !dynamic_program_header) { 210 assert(false); 211 // Mark this image as not present. Can not recur infinitely. 212 Init(nullptr); 213 return; 214 } 215 ptrdiff_t relocation = 216 base_as_char - reinterpret_cast<const char *>(link_base_); 217 ElfW(Dyn)* dynamic_entry = reinterpret_cast<ElfW(Dyn)*>( 218 static_cast<intptr_t>(dynamic_program_header->p_vaddr) + relocation); 219 uint32_t *sysv_hash = nullptr; 220 uint32_t *gnu_hash = nullptr; 221 for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { 222 const auto value = 223 static_cast<intptr_t>(dynamic_entry->d_un.d_val) + relocation; 224 switch (dynamic_entry->d_tag) { 225 case DT_HASH: 226 sysv_hash = reinterpret_cast<uint32_t *>(value); 227 break; 228 case DT_GNU_HASH: 229 gnu_hash = reinterpret_cast<uint32_t *>(value); 230 break; 231 case DT_SYMTAB: 232 dynsym_ = reinterpret_cast<ElfW(Sym) *>(value); 233 break; 234 case DT_STRTAB: 235 dynstr_ = reinterpret_cast<const char *>(value); 236 break; 237 case DT_VERSYM: 238 versym_ = reinterpret_cast<ElfW(Versym) *>(value); 239 break; 240 case DT_VERDEF: 241 verdef_ = reinterpret_cast<ElfW(Verdef) *>(value); 242 break; 243 case DT_VERDEFNUM: 244 verdefnum_ = static_cast<size_t>(dynamic_entry->d_un.d_val); 245 break; 246 case DT_STRSZ: 247 strsize_ = static_cast<size_t>(dynamic_entry->d_un.d_val); 248 break; 249 default: 250 // Unrecognized entries explicitly ignored. 251 break; 252 } 253 } 254 if ((!sysv_hash && !gnu_hash) || !dynsym_ || !dynstr_ || !versym_ || 255 !verdef_ || !verdefnum_ || !strsize_) { 256 assert(false); // invalid VDSO 257 // Mark this image as not present. Can not recur infinitely. 258 Init(nullptr); 259 return; 260 } 261 if (sysv_hash) { 262 num_syms_ = sysv_hash[1]; 263 } else { 264 assert(gnu_hash); 265 // Compute the number of symbols for DT_GNU_HASH, which is specified by 266 // https://sourceware.org/gnu-gabi/program-loading-and-dynamic-linking.txt 267 uint32_t nbuckets = gnu_hash[0]; 268 // The buckets array is located after the header (4 uint32) and the bloom 269 // filter (size_t array of gnu_hash[2] elements). 270 uint32_t *buckets = gnu_hash + 4 + sizeof(size_t) / 4 * gnu_hash[2]; 271 // Find the chain of the last non-empty bucket. 272 uint32_t idx = 0; 273 for (uint32_t i = nbuckets; i > 0;) { 274 idx = buckets[--i]; 275 if (idx != 0) break; 276 } 277 if (idx != 0) { 278 // Find the last element of the chain, which has an odd value. 279 // Add one to get the number of symbols. 280 uint32_t *chain = buckets + nbuckets - gnu_hash[1]; 281 while (chain[idx++] % 2 == 0) { 282 } 283 } 284 num_syms_ = idx; 285 } 286 } 287 288 bool ElfMemImage::LookupSymbol(const char *name, 289 const char *version, 290 int type, 291 SymbolInfo *info_out) const { 292 for (const SymbolInfo& info : *this) { 293 if (strcmp(info.name, name) == 0 && strcmp(info.version, version) == 0 && 294 ElfType(info.symbol) == type) { 295 if (info_out) { 296 *info_out = info; 297 } 298 return true; 299 } 300 } 301 return false; 302 } 303 304 bool ElfMemImage::LookupSymbolByAddress(const void *address, 305 SymbolInfo *info_out) const { 306 for (const SymbolInfo& info : *this) { 307 const char *const symbol_start = 308 reinterpret_cast<const char *>(info.address); 309 const char *const symbol_end = symbol_start + info.symbol->st_size; 310 if (symbol_start <= address && address < symbol_end) { 311 if (info_out) { 312 // Client wants to know details for that symbol (the usual case). 313 if (ElfBind(info.symbol) == STB_GLOBAL) { 314 // Strong symbol; just return it. 315 *info_out = info; 316 return true; 317 } else { 318 // Weak or local. Record it, but keep looking for a strong one. 319 *info_out = info; 320 } 321 } else { 322 // Client only cares if there is an overlapping symbol. 323 return true; 324 } 325 } 326 } 327 return false; 328 } 329 330 ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, 331 uint32_t index) 332 : index_(index), image_(image) {} 333 334 const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const { 335 return &info_; 336 } 337 338 const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const { 339 return info_; 340 } 341 342 bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const { 343 return this->image_ == rhs.image_ && this->index_ == rhs.index_; 344 } 345 346 bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const { 347 return !(*this == rhs); 348 } 349 350 ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() { 351 this->Update(1); 352 return *this; 353 } 354 355 ElfMemImage::SymbolIterator ElfMemImage::begin() const { 356 SymbolIterator it(this, 0); 357 it.Update(0); 358 return it; 359 } 360 361 ElfMemImage::SymbolIterator ElfMemImage::end() const { 362 return SymbolIterator(this, GetNumSymbols()); 363 } 364 365 void ElfMemImage::SymbolIterator::Update(uint32_t increment) { 366 const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_); 367 ABSL_RAW_CHECK(image->IsPresent() || increment == 0, ""); 368 if (!image->IsPresent()) { 369 return; 370 } 371 index_ += increment; 372 if (index_ >= image->GetNumSymbols()) { 373 index_ = image->GetNumSymbols(); 374 return; 375 } 376 const ElfW(Sym) *symbol = image->GetDynsym(index_); 377 const ElfW(Versym) *version_symbol = image->GetVersym(index_); 378 ABSL_RAW_CHECK(symbol && version_symbol, ""); 379 const char *const symbol_name = image->GetDynstr(symbol->st_name); 380 #if defined(__NetBSD__) 381 const int version_index = version_symbol->vs_vers & VERSYM_VERSION; 382 #else 383 const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION; 384 #endif 385 const ElfW(Verdef) *version_definition = nullptr; 386 const char *version_name = ""; 387 if (symbol->st_shndx == SHN_UNDEF) { 388 // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and 389 // version_index could well be greater than verdefnum_, so calling 390 // GetVerdef(version_index) may trigger assertion. 391 } else { 392 version_definition = image->GetVerdef(version_index); 393 } 394 if (version_definition) { 395 // I am expecting 1 or 2 auxiliary entries: 1 for the version itself, 396 // optional 2nd if the version has a parent. 397 ABSL_RAW_CHECK( 398 version_definition->vd_cnt == 1 || version_definition->vd_cnt == 2, 399 "wrong number of entries"); 400 const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition); 401 version_name = image->GetVerstr(version_aux->vda_name); 402 } 403 info_.name = symbol_name; 404 info_.version = version_name; 405 info_.address = image->GetSymAddr(symbol); 406 info_.symbol = symbol; 407 } 408 409 } // namespace debugging_internal 410 ABSL_NAMESPACE_END 411 } // namespace absl 412 413 #endif // ABSL_HAVE_ELF_MEM_IMAGE