data_renewal_generate.py (6232B)
1 #!/usr/bin/env python3 2 # This Source Code Form is subject to the terms of the Mozilla Public 3 # License, v. 2.0. If a copy of the MPL was not distributed with this 4 # file, You can obtain one at https://mozilla.org/MPL/2.0/. 5 6 """ 7 A script to help generate telemetry renewal csv and request template. 8 This script also modifies metrics.yaml to mark soon to expired telemetry entries. 9 """ 10 11 import csv 12 import json 13 import os 14 import sys 15 16 import yaml 17 from yaml.loader import FullLoader 18 19 METRICS_FILENAME = "../app/metrics.yaml" 20 NEW_METRICS_FILENAME = "../app/metrics_new.yaml" 21 GLEAN_DICTIONARY_PREFIX = "https://dictionary.telemetry.mozilla.org/apps/fenix/metrics/" 22 23 # This is to make sure we only write headers for the csv file once 24 write_header = True 25 # The number of soon to expired telemetry detected 26 total_count = 0 27 28 USAGE = """usage: ./{script_name} future_fenix_version_number""" 29 30 # list of values that we care about 31 _KEY_FILTER = [ 32 "type", 33 "description", 34 "bugs", 35 "data_reviews", 36 "expires", 37 ] 38 39 40 def response(last_key, content, expire_version, writer, renewal): 41 global write_header 42 global total_count 43 for key, value in content.items(): 44 if key in {"$schema", "no_lint"}: 45 continue 46 if key == "disabled": 47 continue 48 49 if ("expires" in value) and ( 50 (value["expires"] == "never") or (not value["expires"] <= expire_version) 51 ): 52 continue 53 54 if key == "type": 55 remove_keys = [] 56 for key in content.keys(): 57 if key not in _KEY_FILTER: 58 remove_keys.append(key) 59 60 for key in remove_keys: 61 content.pop(key) 62 63 content["bugs"] = content["bugs"][0] 64 content["data_reviews"] = content["data_reviews"][0] 65 total_count += 1 66 67 # name of the telemtry 68 dictionary_url = GLEAN_DICTIONARY_PREFIX + last_key.lstrip(".").replace( 69 ".", "_" 70 ) 71 result = { 72 "#": total_count, 73 "name": last_key.lstrip("."), 74 "glean dictionary": dictionary_url, 75 } 76 result.update(content) 77 78 # add columns for product to fille out, these should always be added at the end 79 result.update({"keep(Y/N)": ""}) 80 result.update({"new expiry version": ""}) 81 result.update({"reason to extend": ""}) 82 83 # output data-renewal request template 84 if write_header: 85 header = result.keys() 86 writer.writerow(header) 87 write_header = False 88 renewal.write("# Request for Data Collection Renewal\n") 89 renewal.write("### Renew for 1 year\n") 90 renewal.write("Total: TBD\n") 91 renewal.write("———\n") 92 93 writer.writerow(result.values()) 94 95 renewal.write("`" + last_key.lstrip(".") + "`:\n") 96 renewal.write( 97 "1) Provide a link to the initial Data Collection Review Request for this collection.\n" 98 ) 99 renewal.write(" - " + content["data_reviews"] + "\n") 100 renewal.write("\n") 101 renewal.write("2) When will this collection now expire?\n") 102 renewal.write(" - TBD\n") 103 renewal.write("\n") 104 renewal.write("3) Why was the initial period of collection insufficient?\n") 105 renewal.write(" - TBD\n") 106 renewal.write("\n") 107 renewal.write("———\n") 108 return 109 110 if type(value) is dict: 111 response(last_key + "." + key, value, expire_version, writer, renewal) 112 113 114 with open(METRICS_FILENAME) as f: 115 try: 116 arg1 = sys.argv[1] 117 except Exception: 118 print("usage is to include argument of the form `100`") 119 sys.exit() 120 121 # parse metrics.yaml to json 122 write_header = True 123 data = yaml.load(f, Loader=FullLoader) 124 json_data = json.dumps(data) 125 content = json.loads(str(json_data)) 126 csv_filename = arg1 + "_expiry_list.csv" 127 renewal_filename = arg1 + "_renewal_request.txt" 128 current_version = int(arg1) 129 130 # remove files created by last run if exists 131 if os.path.exists(csv_filename): 132 print("remove old csv file") 133 os.remove(csv_filename) 134 135 # remove files created by last run if exists 136 if os.path.exists(renewal_filename): 137 print("remove old renewal request template file") 138 os.remove(renewal_filename) 139 140 # remove files created by last run if exists 141 if os.path.exists(NEW_METRICS_FILENAME): 142 print("remove old metrics yaml file") 143 os.remove(NEW_METRICS_FILENAME) 144 145 data_file = open(csv_filename, "w") 146 csv_writer = csv.writer(data_file) 147 renewal_file = open(renewal_filename, "w") 148 149 response("", content, current_version, csv_writer, renewal_file) 150 renewal_file.close() 151 print("Completed") 152 print("Total count: " + str(total_count)) 153 154 # Go through the metrics.yaml file to mark expired telemetry 155 verify_count = 0 156 f.seek(0, 0) 157 data = f.readlines() 158 with open(NEW_METRICS_FILENAME, "w") as f2: 159 for line in data: 160 if line.lstrip(" ").startswith("expires: ") and not ( 161 line.lstrip(" ").startswith("expires: never") 162 ): 163 start_pos = len("expires: ") 164 version = int(line.lstrip(" ")[start_pos:]) 165 if version <= current_version: 166 verify_count += 1 167 f2.writelines( 168 line.rstrip("\n") 169 + " /* TODO <" 170 + str(verify_count) 171 + "> require renewal */\n" 172 ) 173 else: 174 f2.writelines(line) 175 else: 176 f2.writelines(line) 177 f2.close() 178 179 print("\n==============================") 180 if total_count != verify_count: 181 print("!!! Count check failed !!!") 182 else: 183 print("Count check passed") 184 print("==============================") 185 186 os.remove(METRICS_FILENAME) 187 os.rename(NEW_METRICS_FILENAME, METRICS_FILENAME)