stackanalysis.py (13291B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 6 # There seem to be sometimes identical events recorded twice by telemetry 7 def sanitize(rows): 8 newrows = [] 9 pcid = "unset" 10 psid = "unset" 11 pseq = "unset" 12 for row in rows: 13 cid = row["client_id"] 14 sid = row["session_id"] 15 seq = row["seq"] 16 if cid != pcid or sid != psid or seq != pseq: 17 newrows.append(row) 18 pcid = cid 19 psid = sid 20 pseq = seq 21 22 return newrows 23 24 25 # Given a set of rows, find all distinct build ids 26 def extractBuildIDs(rows): 27 buildids = {} 28 for row in rows: 29 id = row["build_id"] 30 if id in buildids: 31 buildids[id] = buildids[id] + 1 32 else: 33 buildids[id] = 1 34 return buildids 35 36 37 # Given a set of build ids and rows, enrich each row by an hg link. 38 # Relys on the result of utils.fetchBuildRevisions in buildids. 39 def constructHGLinks(buildids, rows): 40 for row in rows: 41 id = row["build_id"] 42 if id in buildids: 43 row["location"] = ( 44 buildids[id] + "/" + row["source_file"] + "#l" + row["source_line"] 45 ) 46 else: 47 row["location"] = id + "/" + row["source_file"] + "#l" + row["source_line"] 48 49 50 topmost_stackframes = set() 51 delta_frames = {} 52 53 54 def isTopmostFrame(frame): 55 f = (frame["location"], frame["result"]) 56 return f in topmost_stackframes 57 58 59 def addTopmostFrame(frame): 60 f = (frame["location"], frame["result"]) 61 if not isTopmostFrame(frame): 62 # print("Found new topmost frame {}.".format(frame)) 63 topmost_stackframes.add(f) 64 frame["topmost"] = True 65 66 67 def addFrameDelta(frame1, frame2): 68 if frame1["client_id"] != frame2["client_id"]: 69 return 70 if frame1["session_id"] != frame2["session_id"]: 71 return 72 73 fkey = "{}:{}-{}:{}".format( 74 frame2["location"], frame2["result"], frame1["location"], frame1["result"] 75 ) 76 if fkey not in delta_frames: 77 fdelta = {"delta_sum": 0, "delta_cnt": 0} 78 fdelta["prev_row"] = frame1 79 fdelta["candidate"] = frame2 80 delta_frames[fkey] = fdelta 81 82 fdelta = delta_frames[fkey] 83 etv1 = frame1["event_timestamp"] 84 etv2 = frame2["event_timestamp"] 85 if isinstance(etv1, int) and isinstance(etv2, int) and etv2 > etv1: 86 delta = etv2 - etv1 87 fdelta["delta_sum"] = fdelta["delta_sum"] + delta 88 fdelta["delta_cnt"] = fdelta["delta_cnt"] + 1 89 90 91 # There can be outliers in terms of time distance between two stack frames 92 # that belong to the same propagation stack. In order to not increase the 93 # risk that one outlier breaks thousands of stacks, we check for the average 94 # time distance. 95 def checkAverageFrameTimeDeltas(rows, max_delta): 96 # print("checkAverageFrameTimeDeltas") 97 prev_row = None 98 for row in rows: 99 if "topmost" in row or not row["session_complete"]: 100 prev_row = None 101 continue 102 103 if prev_row: 104 addFrameDelta(prev_row, row) 105 prev_row = row 106 107 for fd in delta_frames: 108 sum = delta_frames[fd]["delta_sum"] 109 cnt = delta_frames[fd]["delta_cnt"] 110 if cnt > 0 and (sum / cnt) > max_delta: 111 # print(delta_frames[fd]) 112 addTopmostFrame(delta_frames[fd]["candidate"]) 113 114 115 # A topmost frame is considered to initiate a new raw stack. We collect all 116 # candidates before we actually apply them. This implies, that we should run 117 # this function on a "large enough" sample of rows to be more accurate. 118 # As a side effect, we mark all rows that are part of a "complete" session 119 # (a session, that started within our data scope). 120 def collectTopmostFrames(rows): 121 prev_cid = "unset" 122 prev_sid = "unset" 123 prev_tid = "unset" 124 prev_ctx = "unset" 125 prev_sev = "ERROR" 126 session_complete = False 127 after_severity_downgrade = False 128 for row in rows: 129 cid = row["client_id"] 130 sid = row["session_id"] 131 tid = row["seq"] >> 32 # thread_id 132 ctx = row["context"] 133 seq = row["seq"] & 0x00000000FFFFFFFF # seq 134 sev = row["severity"] 135 136 # If we have a new session, ensure it is complete from start, 137 # otherwise we will ignore it entirely. 138 if cid != prev_cid or sid != prev_sid or tid != prev_tid: 139 if seq == 1: 140 session_complete = True 141 else: 142 session_complete = False 143 row["session_complete"] = session_complete 144 if session_complete: 145 # If we change client, session, thread or context, we can be sure to have 146 # a new topmost frame. 147 if ( 148 seq == 1 149 or cid != prev_cid 150 or sid != prev_sid 151 or tid != prev_tid 152 or ctx != prev_ctx 153 ): 154 addTopmostFrame(row) 155 after_severity_downgrade = False 156 # We do not expect a non-error to be ever upgraded to an error 157 elif sev == "ERROR" and prev_sev != "ERROR": 158 addTopmostFrame(row) 159 after_severity_downgrade = False 160 # If we just had a severity downgrade, we assume that we wanted 161 # to break the error propagation after this point and split, too 162 elif after_severity_downgrade: 163 addTopmostFrame(row) 164 after_severity_downgrade = False 165 elif prev_sev == "ERROR" and sev != "ERROR": 166 after_severity_downgrade = True 167 168 prev_cid = cid 169 prev_sid = sid 170 prev_tid = tid 171 prev_ctx = ctx 172 prev_sev = sev 173 174 # Should be ms. We've seen quite some runtime between stackframes in the 175 # wild. We might want to consider to make this configurable. In general 176 # we prefer local context over letting slip through some topmost frame 177 # unrecognized, assuming that fixing the issues one by one they will 178 # uncover them succesively. This is achieved by a rather high delta value. 179 max_avg_delta = 200 180 checkAverageFrameTimeDeltas(rows, max_avg_delta) 181 182 183 def getFrameKey(frame): 184 return "{}.{}|".format(frame["location"], frame["result"]) 185 186 187 def getStackKey(stack): 188 stack_key = "" 189 for frame in stack["frames"]: 190 stack_key += getFrameKey(frame) 191 return hash(stack_key) 192 193 194 # A "raw stack" is a list of frames, that: 195 # - share the same build_id (implicitely through location) 196 # - share the same client_id 197 # - share the same session_id 198 # - has a growing sequence number 199 # - stops at the first downgrade of severity from ERROR to else 200 # - XXX: contains each location at most once (no recursion) 201 # - appears to be in a reasonable short timeframe 202 # Calculates also a hash key to identify identical stacks 203 def collectRawStacks(rows): 204 collectTopmostFrames(rows) 205 raw_stacks = [] 206 stack = { 207 "stack_id": "unset", 208 "client_id": "unset", 209 "session_id": "unset", 210 "submit_timeabs": "unset", 211 "frames": [{"location": "unset"}], 212 } 213 stack_id = 1 214 first = True 215 for row in rows: 216 if isTopmostFrame(row): 217 if not first: 218 stack["stack_key"] = getStackKey(stack) 219 raw_stacks.append(stack) 220 stack_id += 1 221 stack = { 222 "stack_id": stack_id, 223 "client_id": row["client_id"], 224 "session_id": row["session_id"], 225 "submit_timeabs": row["submit_timeabs"], 226 "context": row["context"], 227 "frames": [], 228 } 229 230 stack["frames"].append({ 231 "location": row["location"], 232 "source_file": row["source_file"], 233 "source_line": row["source_line"], 234 "seq": row["seq"], 235 "severity": row["severity"], 236 "result": row["result"], 237 }) 238 first = False 239 240 return raw_stacks 241 242 243 # Merge all stacks that have the same hash key and count occurences. 244 # Relys on the ordering per client_id/session_id for correct counting. 245 def mergeEqualStacks(raw_stacks): 246 merged_stacks = {} 247 last_client_id = "none" 248 last_session_id = "none" 249 for stack in raw_stacks: 250 stack_key = stack["stack_key"] 251 merged_stack = stack 252 if stack_key in merged_stacks: 253 merged_stack = merged_stacks[stack_key] 254 if stack["client_id"] != last_client_id: 255 last_client_id = stack["client_id"] 256 merged_stack["client_count"] += 1 257 if stack["session_id"] != last_session_id: 258 last_session_id = stack["session_id"] 259 merged_stack["session_count"] += 1 260 merged_stack["hit_count"] += 1 261 else: 262 merged_stack["client_count"] = 1 263 last_client_id = merged_stack["client_id"] 264 merged_stack["session_count"] = 1 265 last_session_id = merged_stack["session_id"] 266 merged_stack["hit_count"] = 1 267 merged_stacks[stack_key] = merged_stack 268 269 merged_list = list(merged_stacks.values()) 270 merged_list.sort(key=lambda x: x.get("hit_count"), reverse=True) 271 return merged_list 272 273 274 # Split the list of stacks into: 275 # - aborted (has at least one frame with NS_ERROR_ABORT) 276 # - info/warning (has at least one frame with that severity) 277 # - error (has only error frames) 278 def filterStacksForPropagation( 279 all_stacks, error_stacks, warn_stacks, info_stacks, abort_stacks 280 ): 281 for stack in all_stacks: 282 warn = list(filter(lambda x: x["severity"] == "WARNING", stack["frames"])) 283 info = list(filter(lambda x: x["severity"] == "INFO", stack["frames"])) 284 abort = list(filter(lambda x: x["result"] == "NS_ERROR_ABORT", stack["frames"])) 285 if len(abort) > 0: 286 abort_stacks.append(stack) 287 elif len(info) > 0: 288 info_stacks.append(stack) 289 elif len(warn) > 0: 290 warn_stacks.append(stack) 291 else: 292 error_stacks.append(stack) 293 294 295 # Bugzilla comment markup 296 def printStacks(stacks): 297 row_format = "{} | {} | {} | {} | {}\n" 298 out = "" 299 out += row_format.format("Clients", "Sessions", "Hits", "Anchor (Context)", "Stack") 300 out += row_format.format("-------", "--------", "----", "----------------", "-----") 301 for stack in stacks: 302 framestr = "" 303 first = True 304 for frame in stack["frames"]: 305 if not first: 306 framestr += " <- " 307 framestr += "[{}#{}:{}]({})".format( 308 frame["source_file"], 309 frame["source_line"], 310 frame["result"], 311 frame["location"], 312 ) 313 first = False 314 out += row_format.format( 315 stack["client_count"], 316 stack["session_count"], 317 stack["hit_count"], 318 "{} ({})".format(stack["frames"][0]["anchor"], stack["context"]), 319 framestr, 320 ) 321 322 return out 323 324 325 def groupStacksForAnchors(stacks): 326 anchors = {} 327 for stack in stacks: 328 anchor_name = stack["frames"][0]["anchor"] 329 if anchor_name in anchors: 330 anchors[anchor_name]["stacks"].append(stack) 331 else: 332 anchor = {"anchor": anchor_name, "stacks": [stack]} 333 anchors[anchor_name] = anchor 334 return anchors 335 336 337 """ 338 def getSummaryForAnchor(anchor): 339 return "[QM_TRY] Errors in function {}".format(anchor) 340 341 342 def searchBugForAnchor(bugzilla_key, anchor): 343 summary = getSummaryForAnchor(anchor) 344 bug_url = "https://bugzilla.mozilla.org/rest/bug?" \ 345 "summary={}&api_key={}".format(summary, bugzilla_key) 346 return requests.get(url=bug_url).json()["bugs"] 347 348 349 def createBugForAnchor(bugzilla_key, anchor): 350 summary = getSummaryForAnchor(anchor) 351 bug_url = "https://bugzilla.mozilla.org/rest/bug?" \ 352 "Bugzilla_api_key={}".format(bugzilla_key) 353 body = { 354 "product" : "Core", 355 "component" : "Storage: Quota Manager", 356 "version" : "unspecified", 357 "summary" : summary, 358 "description" : "This bug collects errors reported by QM_TRY" 359 "macros for function {}.".format(anchor), 360 } 361 resp = requests.post(url=bug_url, json=body) 362 if resp.status_code != 200: 363 print(resp) 364 return 0 365 id = resp.json()["id"] 366 print("Added new bug {}:".format(id)) 367 return id 368 369 370 def ensureBugForAnchor(bugzilla_key, anchor): 371 buglist = searchBugForAnchor(bugzilla_key, anchor) 372 if (len(buglist) > 0): 373 id = buglist[0]["id"] 374 print("Found existing bug {}:".format(id)) 375 return id 376 return createBugForAnchor(bugzilla_key, anchor) 377 378 379 def addCommentForAnchor(bugzilla_key, anchor, stacks): 380 id = ensureBugForAnchor(bugzilla_key, anchor) 381 if (id <= 0): 382 print("Unable to create a bug for {}.".format(anchor)) 383 return 384 comment = printStacks(stacks) 385 print("") 386 print("Add comment to bug {}:".format(id)) 387 print(comment) 388 389 390 def addCommentsForStacks(bugzilla_key, stacks): 391 anchors = groupStacksForAnchors(stacks) 392 for anchor in anchors: 393 addCommentForAnchor(bugzilla_key, anchors[anchor]["anchor"], anchors[anchor]["stacks"]) 394 """