snapshot-hack.py (3610B)
1 #!/usr/bin/python3 2 # This Source Code Form is subject to the terms of the Mozilla Public 3 # License, v. 2.0. If a copy of the MPL was not distributed with this 4 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 6 import subprocess 7 import sys 8 import threading 9 import urllib.request 10 from urllib.parse import urlparse, urlunparse 11 12 # This script interposes between APT and its HTTP method. APT sends queries on 13 # stdin, and expect responses on stdout. We intercept those and change the 14 # snapshot.debian.org URLs it requests on the fly, if the equivalent URLs 15 # exist on deb.debian.org. 16 17 URI_HEADER = "URI: " 18 19 20 def url_exists(url): 21 try: 22 req = urllib.request.Request(url, method="HEAD") 23 response = urllib.request.urlopen(req) 24 return response.getcode() == 200 25 except Exception: 26 return False 27 28 29 def write_and_flush(fh, data): 30 fh.write(data) 31 fh.flush() 32 33 34 def output_handler(proc, url_mapping, lock): 35 for line in proc.stdout: 36 if line.startswith(URI_HEADER): 37 url = line[len(URI_HEADER) :].rstrip() 38 # APT expects back the original url it requested. 39 with lock: 40 original_url = url_mapping.get(url, None) 41 if original_url: 42 write_and_flush(sys.stdout, line.replace(url, original_url)) 43 continue 44 write_and_flush(sys.stdout, line) 45 46 47 def main(): 48 proc = subprocess.Popen( 49 ["/usr/lib/apt/methods/http"], 50 stdin=subprocess.PIPE, 51 stdout=subprocess.PIPE, 52 text=True, 53 ) 54 url_mapping = {} 55 lock = threading.Lock() 56 output_thread = threading.Thread( 57 target=output_handler, args=(proc, url_mapping, lock), daemon=True 58 ) 59 output_thread.start() 60 61 while True: 62 try: 63 line = sys.stdin.readline() 64 except KeyboardInterrupt: 65 # When apt cuts the connection, we receive a KeyboardInterrupt. 66 break 67 if not line: 68 break 69 70 if line.startswith(URI_HEADER): 71 url = line[len(URI_HEADER) :].rstrip() 72 url_parts = urlparse(url) 73 # For .deb packages, if we can find the file on deb.debian.org, take it 74 # from there instead of snapshot.debian.org, because deb.debian.org will 75 # be much faster. Hopefully, most files will be available on deb.debian.org. 76 if url_parts.hostname == "snapshot.debian.org" and url_parts.path.endswith( 77 ".deb" 78 ): 79 # The url is assumed to be of the form 80 # http://snapshot.debian.org/archive/section/yymmddThhmmssZ/... 81 path_parts = url_parts.path.split("/") 82 # urlparse().path always starts with a / so path_parts is 83 # expected to look like ["", "archive", "section", "yymmddThhmmssZ", ...] 84 # we want to remove "archive" and "yymmddThhmmssZ" to create an url 85 # on deb.debian.org. 86 path_parts.pop(3) 87 path_parts.pop(1) 88 modified_url = urlunparse( 89 url_parts._replace( 90 netloc="deb.debian.org", path="/".join(path_parts) 91 ) 92 ) 93 if url_exists(modified_url): 94 with lock: 95 url_mapping[modified_url] = url 96 write_and_flush(proc.stdin, line.replace(url, modified_url)) 97 continue 98 write_and_flush(proc.stdin, line) 99 100 proc.stdin.close() 101 output_thread.join() 102 103 104 if __name__ == "__main__": 105 main()