tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

symFileManager.py (13403B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 import itertools
      5 import os
      6 import re
      7 import threading
      8 import time
      9 from bisect import bisect
     10 
     11 from mozlog import get_proxy_logger
     12 
     13 LOG = get_proxy_logger("profiler")
     14 
     15 # Libraries to keep prefetched
     16 PREFETCHED_LIBS = ["xul.pdb", "firefox.pdb"]
     17 
     18 
     19 class SymbolInfo:
     20    def __init__(self, addressMap):
     21        self.sortedAddresses = sorted(addressMap.keys())
     22        self.sortedSymbols = [addressMap[address] for address in self.sortedAddresses]
     23        self.entryCount = len(self.sortedAddresses)
     24 
     25    # TODO: Add checks for address < funcEnd ?
     26    def Lookup(self, address):
     27        nearest = bisect(self.sortedAddresses, address) - 1
     28        if nearest < 0:
     29            return None
     30        return self.sortedSymbols[nearest]
     31 
     32    def GetEntryCount(self):
     33        return self.entryCount
     34 
     35 
     36 # Singleton for .sym / .nmsym file cache management
     37 
     38 
     39 class SymFileManager:
     40    """This class fetches symbols from files and caches the results.
     41 
     42    options (obj)
     43        symbolPaths : dictionary
     44            Paths to .SYM files, expressed internally as a mapping of app or platform
     45            names to directories. App & OS names from requests are converted to
     46            all-uppercase internally
     47            e.g. { "FIREFOX": "/tmp/path" }
     48        maxCacheEntries : number
     49            Maximum number of symbol files to keep in memory
     50        prefetchInterval : number
     51            Frequency of checking for recent symbols to cache (in hours)
     52        prefetchThreshold : number
     53            Oldest file age to prefetch (in hours)
     54        prefetchMaxSymbolsPerLib : (number)
     55            Maximum number of library versions to pre-fetch per library
     56    """
     57 
     58    sCache = {}
     59    sCacheCount = 0
     60    sCacheLock = threading.Lock()
     61    sMruSymbols = []
     62 
     63    sOptions = {}
     64    sCallbackTimer = None
     65 
     66    def __init__(self, options):
     67        self.sOptions = options
     68 
     69    def GetLibSymbolMap(self, libName, breakpadId, symbolSources):
     70        # Empty lib name means client couldn't associate frame with any lib
     71        if libName == "":
     72            return None
     73 
     74        # Check cache first
     75        libSymbolMap = None
     76        self.sCacheLock.acquire()
     77        try:
     78            if libName in self.sCache and breakpadId in self.sCache[libName]:
     79                libSymbolMap = self.sCache[libName][breakpadId]
     80                self.UpdateMruList(libName, breakpadId)
     81        finally:
     82            self.sCacheLock.release()
     83 
     84        if libSymbolMap is None:
     85            LOG.debug("Need to fetch PDB file for " + libName + " " + breakpadId)
     86 
     87            # Guess the name of the .sym or .nmsym file on disk
     88            if libName[-4:] == ".pdb":
     89                symFileNameWithoutExtension = re.sub(r"\.[^\.]+$", "", libName)
     90            else:
     91                symFileNameWithoutExtension = libName
     92 
     93            # Look in the symbol dirs for this .sym or .nmsym file
     94            for extension, source in itertools.product(
     95                [".sym", ".nmsym"], symbolSources
     96            ):
     97                symFileName = symFileNameWithoutExtension + extension
     98                pathSuffix = (
     99                    os.sep + libName + os.sep + breakpadId + os.sep + symFileName
    100                )
    101                path = self.sOptions["symbolPaths"][source] + pathSuffix
    102                libSymbolMap = self.FetchSymbolsFromFile(path)
    103                if libSymbolMap:
    104                    break
    105 
    106            if not libSymbolMap:
    107                LOG.debug("No matching sym files, tried " + str(symbolSources))
    108                return None
    109 
    110            LOG.debug(
    111                "Storing libSymbolMap under [" + libName + "][" + breakpadId + "]"
    112            )
    113            self.sCacheLock.acquire()
    114            try:
    115                self.MaybeEvict(libSymbolMap.GetEntryCount())
    116                if libName not in self.sCache:
    117                    self.sCache[libName] = {}
    118                self.sCache[libName][breakpadId] = libSymbolMap
    119                self.sCacheCount += libSymbolMap.GetEntryCount()
    120                self.UpdateMruList(libName, breakpadId)
    121                LOG.debug(
    122                    str(self.sCacheCount)
    123                    + " symbols in cache after fetching symbol file"
    124                )
    125            finally:
    126                self.sCacheLock.release()
    127 
    128        return libSymbolMap
    129 
    130    def FetchSymbolsFromFile(self, path):
    131        try:
    132            symFile = open(path)
    133        except Exception as e:
    134            LOG.debug("Error opening file " + path + ": " + str(e))
    135            return None
    136 
    137        LOG.debug("Parsing SYM file at " + path)
    138 
    139        try:
    140            symbolMap = {}
    141            lineNum = 0
    142            publicCount = 0
    143            funcCount = 0
    144            if path.endswith(".sym"):
    145                for line in symFile:
    146                    lineNum += 1
    147                    if line[0:7] == "PUBLIC ":
    148                        line = line.rstrip()
    149                        fields = line.split(" ")
    150                        if len(fields) < 4:
    151                            LOG.debug("Line " + str(lineNum) + " is messed")
    152                            continue
    153                        if fields[1] == "m":
    154                            address = int(fields[2], 16)
    155                            symbolMap[address] = " ".join(fields[4:])
    156                        else:
    157                            address = int(fields[1], 16)
    158                            symbolMap[address] = " ".join(fields[3:])
    159                        publicCount += 1
    160                    elif line[0:5] == "FUNC ":
    161                        line = line.rstrip()
    162                        fields = line.split(" ")
    163                        if len(fields) < 5:
    164                            LOG.debug("Line " + str(lineNum) + " is messed")
    165                            continue
    166                        if fields[1] == "m":
    167                            address = int(fields[2], 16)
    168                            symbolMap[address] = " ".join(fields[5:])
    169                        else:
    170                            address = int(fields[1], 16)
    171                            symbolMap[address] = " ".join(fields[4:])
    172                        funcCount += 1
    173            elif path.endswith(".nmsym"):
    174                addressLength = 0
    175                for line in symFile:
    176                    lineNum += 1
    177                    if line.startswith(" "):
    178                        continue
    179                    if addressLength == 0:
    180                        addressLength = line.find(" ")
    181                    address = int(line[0:addressLength], 16)
    182                    # Some lines have the form
    183                    # "address space letter space symbol",
    184                    # some have the form "address space symbol".
    185                    # The letter has a meaning, but we ignore it.
    186                    if line[addressLength + 2] == " ":
    187                        symbol = line[addressLength + 3 :].rstrip()
    188                    else:
    189                        symbol = line[addressLength + 1 :].rstrip()
    190                    symbolMap[address] = symbol
    191                    publicCount += 1
    192        except Exception:
    193            LOG.error("Error parsing SYM file " + path)
    194            return None
    195 
    196        logString = "Found " + str(len(symbolMap)) + " unique entries from "
    197        logString += (
    198            str(publicCount) + " PUBLIC lines, " + str(funcCount) + " FUNC lines"
    199        )
    200        LOG.debug(logString)
    201 
    202        return SymbolInfo(symbolMap)
    203 
    204    def PrefetchRecentSymbolFiles(self):
    205        """This method runs in a loop. Use the options "prefetchThreshold" to adjust"""
    206 
    207        LOG.info("Prefetching recent symbol files")
    208        # Schedule next timer callback
    209        interval = self.sOptions["prefetchInterval"] * 60 * 60
    210        self.sCallbackTimer = threading.Timer(interval, self.PrefetchRecentSymbolFiles)
    211        self.sCallbackTimer.start()
    212 
    213        thresholdTime = time.time() - self.sOptions["prefetchThreshold"] * 60 * 60
    214        symDirsToInspect = {}
    215        for pdbName in PREFETCHED_LIBS:
    216            symDirsToInspect[pdbName] = []
    217            topLibPath = self.sOptions["symbolPaths"]["FIREFOX"] + os.sep + pdbName
    218 
    219            try:
    220                symbolDirs = os.listdir(topLibPath)
    221                for symbolDir in symbolDirs:
    222                    candidatePath = topLibPath + os.sep + symbolDir
    223                    mtime = os.path.getmtime(candidatePath)
    224                    if mtime > thresholdTime:
    225                        symDirsToInspect[pdbName].append((mtime, candidatePath))
    226            except Exception as e:
    227                LOG.error("Error while pre-fetching: " + str(e))
    228 
    229            LOG.info(
    230                "Found "
    231                + str(len(symDirsToInspect[pdbName]))
    232                + " new "
    233                + pdbName
    234                + " recent dirs"
    235            )
    236 
    237            # Only prefetch the most recent N entries
    238            symDirsToInspect[pdbName].sort(reverse=True)
    239            symDirsToInspect[pdbName] = symDirsToInspect[pdbName][
    240                : self.sOptions["prefetchMaxSymbolsPerLib"]
    241            ]
    242 
    243        # Don't fetch symbols already in cache.
    244        # Ideally, mutex would be held from check to insert in self.sCache,
    245        # but we don't want to hold the lock during I/O. This won't cause
    246        # inconsistencies.
    247        self.sCacheLock.acquire()
    248        try:
    249            for pdbName in symDirsToInspect:
    250                for mtime, symbolDirPath in symDirsToInspect[pdbName]:
    251                    pdbId = os.path.basename(symbolDirPath)
    252                    if pdbName in self.sCache and pdbId in self.sCache[pdbName]:
    253                        symDirsToInspect[pdbName].remove((mtime, symbolDirPath))
    254        finally:
    255            self.sCacheLock.release()
    256 
    257        # Read all new symbol files in at once
    258        fetchedSymbols = {}
    259        fetchedCount = 0
    260        for pdbName in symDirsToInspect:
    261            # The corresponding symbol file name ends with .sym
    262            symFileName = re.sub(r"\.[^\.]+$", ".sym", pdbName)
    263 
    264            for mtime, symbolDirPath in symDirsToInspect[pdbName]:
    265                pdbId = os.path.basename(symbolDirPath)
    266                symbolFilePath = symbolDirPath + os.sep + symFileName
    267                symbolInfo = self.FetchSymbolsFromFile(symbolFilePath)
    268                if symbolInfo:
    269                    # Stop if the prefetched items are bigger than the cache
    270                    if (
    271                        fetchedCount + symbolInfo.GetEntryCount()
    272                        > self.sOptions["maxCacheEntries"]
    273                    ):
    274                        break
    275                    fetchedSymbols[(pdbName, pdbId)] = symbolInfo
    276                    fetchedCount += symbolInfo.GetEntryCount()
    277                else:
    278                    LOG.error("Couldn't fetch .sym file symbols for " + symbolFilePath)
    279                    continue
    280 
    281        # Insert new symbols into global symbol cache
    282        self.sCacheLock.acquire()
    283        try:
    284            # Make room for the new symbols
    285            self.MaybeEvict(fetchedCount)
    286 
    287            for pdbName, pdbId in fetchedSymbols:
    288                if pdbName not in self.sCache:
    289                    self.sCache[pdbName] = {}
    290 
    291                if pdbId in self.sCache[pdbName]:
    292                    continue
    293 
    294                newSymbolFile = fetchedSymbols[(pdbName, pdbId)]
    295                self.sCache[pdbName][pdbId] = newSymbolFile
    296                self.sCacheCount += newSymbolFile.GetEntryCount()
    297 
    298                # Move new symbols to front of MRU list to give them a chance
    299                self.UpdateMruList(pdbName, pdbId)
    300 
    301        finally:
    302            self.sCacheLock.release()
    303 
    304        LOG.info("Finished prefetching recent symbol files")
    305 
    306    def UpdateMruList(self, pdbName, pdbId):
    307        libId = (pdbName, pdbId)
    308        if libId in self.sMruSymbols:
    309            self.sMruSymbols.remove(libId)
    310        self.sMruSymbols.insert(0, libId)
    311 
    312    def MaybeEvict(self, freeEntriesNeeded):
    313        maxCacheSize = self.sOptions["maxCacheEntries"]
    314        LOG.debug(
    315            "Cache occupancy before MaybeEvict: "
    316            + str(self.sCacheCount)
    317            + "/"
    318            + str(maxCacheSize)
    319        )
    320 
    321        if (
    322            self.sCacheCount == 0
    323            or self.sCacheCount + freeEntriesNeeded <= maxCacheSize
    324        ):
    325            # No need to lock mutex here, this doesn't need to be 100%
    326            return
    327 
    328        # If adding the new entries would exceed the max cache size,
    329        # evict so that cache is at 70% capacity after new entries added
    330        numOldEntriesAfterEvict = max(0, (0.70 * maxCacheSize) - freeEntriesNeeded)
    331        numToEvict = self.sCacheCount - numOldEntriesAfterEvict
    332 
    333        # Evict symbols until evict quota is met, starting with least recently
    334        # used
    335        for pdbName, pdbId in reversed(self.sMruSymbols):
    336            if numToEvict <= 0:
    337                break
    338 
    339            evicteeCount = self.sCache[pdbName][pdbId].GetEntryCount()
    340 
    341            del self.sCache[pdbName][pdbId]
    342            self.sCacheCount -= evicteeCount
    343            self.sMruSymbols.pop()
    344 
    345            numToEvict -= evicteeCount
    346 
    347        LOG.debug(
    348            "Cache occupancy after MaybeEvict: "
    349            + str(self.sCacheCount)
    350            + "/"
    351            + str(maxCacheSize)
    352        )