symFileManager.py (13403B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 import itertools 5 import os 6 import re 7 import threading 8 import time 9 from bisect import bisect 10 11 from mozlog import get_proxy_logger 12 13 LOG = get_proxy_logger("profiler") 14 15 # Libraries to keep prefetched 16 PREFETCHED_LIBS = ["xul.pdb", "firefox.pdb"] 17 18 19 class SymbolInfo: 20 def __init__(self, addressMap): 21 self.sortedAddresses = sorted(addressMap.keys()) 22 self.sortedSymbols = [addressMap[address] for address in self.sortedAddresses] 23 self.entryCount = len(self.sortedAddresses) 24 25 # TODO: Add checks for address < funcEnd ? 26 def Lookup(self, address): 27 nearest = bisect(self.sortedAddresses, address) - 1 28 if nearest < 0: 29 return None 30 return self.sortedSymbols[nearest] 31 32 def GetEntryCount(self): 33 return self.entryCount 34 35 36 # Singleton for .sym / .nmsym file cache management 37 38 39 class SymFileManager: 40 """This class fetches symbols from files and caches the results. 41 42 options (obj) 43 symbolPaths : dictionary 44 Paths to .SYM files, expressed internally as a mapping of app or platform 45 names to directories. App & OS names from requests are converted to 46 all-uppercase internally 47 e.g. { "FIREFOX": "/tmp/path" } 48 maxCacheEntries : number 49 Maximum number of symbol files to keep in memory 50 prefetchInterval : number 51 Frequency of checking for recent symbols to cache (in hours) 52 prefetchThreshold : number 53 Oldest file age to prefetch (in hours) 54 prefetchMaxSymbolsPerLib : (number) 55 Maximum number of library versions to pre-fetch per library 56 """ 57 58 sCache = {} 59 sCacheCount = 0 60 sCacheLock = threading.Lock() 61 sMruSymbols = [] 62 63 sOptions = {} 64 sCallbackTimer = None 65 66 def __init__(self, options): 67 self.sOptions = options 68 69 def GetLibSymbolMap(self, libName, breakpadId, symbolSources): 70 # Empty lib name means client couldn't associate frame with any lib 71 if libName == "": 72 return None 73 74 # Check cache first 75 libSymbolMap = None 76 self.sCacheLock.acquire() 77 try: 78 if libName in self.sCache and breakpadId in self.sCache[libName]: 79 libSymbolMap = self.sCache[libName][breakpadId] 80 self.UpdateMruList(libName, breakpadId) 81 finally: 82 self.sCacheLock.release() 83 84 if libSymbolMap is None: 85 LOG.debug("Need to fetch PDB file for " + libName + " " + breakpadId) 86 87 # Guess the name of the .sym or .nmsym file on disk 88 if libName[-4:] == ".pdb": 89 symFileNameWithoutExtension = re.sub(r"\.[^\.]+$", "", libName) 90 else: 91 symFileNameWithoutExtension = libName 92 93 # Look in the symbol dirs for this .sym or .nmsym file 94 for extension, source in itertools.product( 95 [".sym", ".nmsym"], symbolSources 96 ): 97 symFileName = symFileNameWithoutExtension + extension 98 pathSuffix = ( 99 os.sep + libName + os.sep + breakpadId + os.sep + symFileName 100 ) 101 path = self.sOptions["symbolPaths"][source] + pathSuffix 102 libSymbolMap = self.FetchSymbolsFromFile(path) 103 if libSymbolMap: 104 break 105 106 if not libSymbolMap: 107 LOG.debug("No matching sym files, tried " + str(symbolSources)) 108 return None 109 110 LOG.debug( 111 "Storing libSymbolMap under [" + libName + "][" + breakpadId + "]" 112 ) 113 self.sCacheLock.acquire() 114 try: 115 self.MaybeEvict(libSymbolMap.GetEntryCount()) 116 if libName not in self.sCache: 117 self.sCache[libName] = {} 118 self.sCache[libName][breakpadId] = libSymbolMap 119 self.sCacheCount += libSymbolMap.GetEntryCount() 120 self.UpdateMruList(libName, breakpadId) 121 LOG.debug( 122 str(self.sCacheCount) 123 + " symbols in cache after fetching symbol file" 124 ) 125 finally: 126 self.sCacheLock.release() 127 128 return libSymbolMap 129 130 def FetchSymbolsFromFile(self, path): 131 try: 132 symFile = open(path) 133 except Exception as e: 134 LOG.debug("Error opening file " + path + ": " + str(e)) 135 return None 136 137 LOG.debug("Parsing SYM file at " + path) 138 139 try: 140 symbolMap = {} 141 lineNum = 0 142 publicCount = 0 143 funcCount = 0 144 if path.endswith(".sym"): 145 for line in symFile: 146 lineNum += 1 147 if line[0:7] == "PUBLIC ": 148 line = line.rstrip() 149 fields = line.split(" ") 150 if len(fields) < 4: 151 LOG.debug("Line " + str(lineNum) + " is messed") 152 continue 153 if fields[1] == "m": 154 address = int(fields[2], 16) 155 symbolMap[address] = " ".join(fields[4:]) 156 else: 157 address = int(fields[1], 16) 158 symbolMap[address] = " ".join(fields[3:]) 159 publicCount += 1 160 elif line[0:5] == "FUNC ": 161 line = line.rstrip() 162 fields = line.split(" ") 163 if len(fields) < 5: 164 LOG.debug("Line " + str(lineNum) + " is messed") 165 continue 166 if fields[1] == "m": 167 address = int(fields[2], 16) 168 symbolMap[address] = " ".join(fields[5:]) 169 else: 170 address = int(fields[1], 16) 171 symbolMap[address] = " ".join(fields[4:]) 172 funcCount += 1 173 elif path.endswith(".nmsym"): 174 addressLength = 0 175 for line in symFile: 176 lineNum += 1 177 if line.startswith(" "): 178 continue 179 if addressLength == 0: 180 addressLength = line.find(" ") 181 address = int(line[0:addressLength], 16) 182 # Some lines have the form 183 # "address space letter space symbol", 184 # some have the form "address space symbol". 185 # The letter has a meaning, but we ignore it. 186 if line[addressLength + 2] == " ": 187 symbol = line[addressLength + 3 :].rstrip() 188 else: 189 symbol = line[addressLength + 1 :].rstrip() 190 symbolMap[address] = symbol 191 publicCount += 1 192 except Exception: 193 LOG.error("Error parsing SYM file " + path) 194 return None 195 196 logString = "Found " + str(len(symbolMap)) + " unique entries from " 197 logString += ( 198 str(publicCount) + " PUBLIC lines, " + str(funcCount) + " FUNC lines" 199 ) 200 LOG.debug(logString) 201 202 return SymbolInfo(symbolMap) 203 204 def PrefetchRecentSymbolFiles(self): 205 """This method runs in a loop. Use the options "prefetchThreshold" to adjust""" 206 207 LOG.info("Prefetching recent symbol files") 208 # Schedule next timer callback 209 interval = self.sOptions["prefetchInterval"] * 60 * 60 210 self.sCallbackTimer = threading.Timer(interval, self.PrefetchRecentSymbolFiles) 211 self.sCallbackTimer.start() 212 213 thresholdTime = time.time() - self.sOptions["prefetchThreshold"] * 60 * 60 214 symDirsToInspect = {} 215 for pdbName in PREFETCHED_LIBS: 216 symDirsToInspect[pdbName] = [] 217 topLibPath = self.sOptions["symbolPaths"]["FIREFOX"] + os.sep + pdbName 218 219 try: 220 symbolDirs = os.listdir(topLibPath) 221 for symbolDir in symbolDirs: 222 candidatePath = topLibPath + os.sep + symbolDir 223 mtime = os.path.getmtime(candidatePath) 224 if mtime > thresholdTime: 225 symDirsToInspect[pdbName].append((mtime, candidatePath)) 226 except Exception as e: 227 LOG.error("Error while pre-fetching: " + str(e)) 228 229 LOG.info( 230 "Found " 231 + str(len(symDirsToInspect[pdbName])) 232 + " new " 233 + pdbName 234 + " recent dirs" 235 ) 236 237 # Only prefetch the most recent N entries 238 symDirsToInspect[pdbName].sort(reverse=True) 239 symDirsToInspect[pdbName] = symDirsToInspect[pdbName][ 240 : self.sOptions["prefetchMaxSymbolsPerLib"] 241 ] 242 243 # Don't fetch symbols already in cache. 244 # Ideally, mutex would be held from check to insert in self.sCache, 245 # but we don't want to hold the lock during I/O. This won't cause 246 # inconsistencies. 247 self.sCacheLock.acquire() 248 try: 249 for pdbName in symDirsToInspect: 250 for mtime, symbolDirPath in symDirsToInspect[pdbName]: 251 pdbId = os.path.basename(symbolDirPath) 252 if pdbName in self.sCache and pdbId in self.sCache[pdbName]: 253 symDirsToInspect[pdbName].remove((mtime, symbolDirPath)) 254 finally: 255 self.sCacheLock.release() 256 257 # Read all new symbol files in at once 258 fetchedSymbols = {} 259 fetchedCount = 0 260 for pdbName in symDirsToInspect: 261 # The corresponding symbol file name ends with .sym 262 symFileName = re.sub(r"\.[^\.]+$", ".sym", pdbName) 263 264 for mtime, symbolDirPath in symDirsToInspect[pdbName]: 265 pdbId = os.path.basename(symbolDirPath) 266 symbolFilePath = symbolDirPath + os.sep + symFileName 267 symbolInfo = self.FetchSymbolsFromFile(symbolFilePath) 268 if symbolInfo: 269 # Stop if the prefetched items are bigger than the cache 270 if ( 271 fetchedCount + symbolInfo.GetEntryCount() 272 > self.sOptions["maxCacheEntries"] 273 ): 274 break 275 fetchedSymbols[(pdbName, pdbId)] = symbolInfo 276 fetchedCount += symbolInfo.GetEntryCount() 277 else: 278 LOG.error("Couldn't fetch .sym file symbols for " + symbolFilePath) 279 continue 280 281 # Insert new symbols into global symbol cache 282 self.sCacheLock.acquire() 283 try: 284 # Make room for the new symbols 285 self.MaybeEvict(fetchedCount) 286 287 for pdbName, pdbId in fetchedSymbols: 288 if pdbName not in self.sCache: 289 self.sCache[pdbName] = {} 290 291 if pdbId in self.sCache[pdbName]: 292 continue 293 294 newSymbolFile = fetchedSymbols[(pdbName, pdbId)] 295 self.sCache[pdbName][pdbId] = newSymbolFile 296 self.sCacheCount += newSymbolFile.GetEntryCount() 297 298 # Move new symbols to front of MRU list to give them a chance 299 self.UpdateMruList(pdbName, pdbId) 300 301 finally: 302 self.sCacheLock.release() 303 304 LOG.info("Finished prefetching recent symbol files") 305 306 def UpdateMruList(self, pdbName, pdbId): 307 libId = (pdbName, pdbId) 308 if libId in self.sMruSymbols: 309 self.sMruSymbols.remove(libId) 310 self.sMruSymbols.insert(0, libId) 311 312 def MaybeEvict(self, freeEntriesNeeded): 313 maxCacheSize = self.sOptions["maxCacheEntries"] 314 LOG.debug( 315 "Cache occupancy before MaybeEvict: " 316 + str(self.sCacheCount) 317 + "/" 318 + str(maxCacheSize) 319 ) 320 321 if ( 322 self.sCacheCount == 0 323 or self.sCacheCount + freeEntriesNeeded <= maxCacheSize 324 ): 325 # No need to lock mutex here, this doesn't need to be 100% 326 return 327 328 # If adding the new entries would exceed the max cache size, 329 # evict so that cache is at 70% capacity after new entries added 330 numOldEntriesAfterEvict = max(0, (0.70 * maxCacheSize) - freeEntriesNeeded) 331 numToEvict = self.sCacheCount - numOldEntriesAfterEvict 332 333 # Evict symbols until evict quota is met, starting with least recently 334 # used 335 for pdbName, pdbId in reversed(self.sMruSymbols): 336 if numToEvict <= 0: 337 break 338 339 evicteeCount = self.sCache[pdbName][pdbId].GetEntryCount() 340 341 del self.sCache[pdbName][pdbId] 342 self.sCacheCount -= evicteeCount 343 self.sMruSymbols.pop() 344 345 numToEvict -= evicteeCount 346 347 LOG.debug( 348 "Cache occupancy after MaybeEvict: " 349 + str(self.sCacheCount) 350 + "/" 351 + str(maxCacheSize) 352 )