checksums.py (5030B)
1 #!/usr/bin/python 2 # This Source Code Form is subject to the terms of the Mozilla Public 3 # License, v. 2.0. If a copy of the MPL was not distributed with this 4 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 6 import hashlib 7 import logging 8 import os 9 import sys 10 from optparse import OptionParser 11 12 logger = logging.getLogger("checksums.py") 13 14 15 def digest_file(filename, digest, chunk_size=131072): 16 """Produce a checksum for the file specified by 'filename'. 'filename' 17 is a string path to a file that is opened and read in this function. The 18 checksum algorithm is specified by 'digest' and is a valid OpenSSL 19 algorithm. If the digest used is not valid or Python's hashlib doesn't 20 work, the None object will be returned instead. The size of blocks 21 that this function will read from the file object it opens based on 22 'filename' can be specified by 'chunk_size', which defaults to 1K""" 23 assert not os.path.isdir(filename), "this function only works with files" 24 25 logger.debug("Creating new %s object" % digest) 26 h = hashlib.new(digest) 27 with open(filename, "rb") as f: 28 while True: 29 data = f.read(chunk_size) 30 if not data: 31 logger.debug("Finished reading in file") 32 break 33 h.update(data) 34 hash = h.hexdigest() 35 logger.debug("Hash for %s is %s" % (filename, hash)) 36 return hash 37 38 39 def process_files(dirs, output_filename, digests): 40 """This function takes a list of directory names, 'drs'. It will then 41 compute the checksum for each of the files in these by by opening the files. 42 Once each file is read and its checksum is computed, this function 43 will write the information to the file specified by 'output_filename'. 44 The path written in the output file will have anything specified by 'strip' 45 removed from the path. The output file is closed before returning nothing 46 The algorithm to compute checksums with can be specified by 'digests' 47 and needs to be a list of valid OpenSSL algorithms. 48 49 The output file is written in the format: 50 <hash> <algorithm> <filesize> <filepath> 51 Example: 52 d1fa09a<snip>e4220 sha1 14250744 firefox-4.0b6pre.en-US.mac64.dmg 53 """ 54 55 if os.path.exists(output_filename): 56 logger.debug('Overwriting existing checksums file "%s"' % output_filename) 57 else: 58 logger.debug('Creating a new checksums file "%s"' % output_filename) 59 with open(output_filename, "w+") as output: 60 for d in dirs: 61 for root, _, files in os.walk(d): 62 for f in sorted(files): 63 full = os.path.join(root, f) 64 rel = os.path.relpath(full, d) 65 66 for digest in digests: 67 hash = digest_file(full, digest) 68 69 output.write( 70 "%s %s %s %s\n" % (hash, digest, os.path.getsize(full), rel) 71 ) 72 73 74 def setup_logging(level=logging.DEBUG): 75 """This function sets up the logging module using a speficiable logging 76 module logging level. The default log level is DEBUG. 77 78 The output is in the format: 79 <level> - <message> 80 Example: 81 DEBUG - Finished reading in file""" 82 83 logger = logging.getLogger("checksums.py") 84 logger.setLevel(logging.DEBUG) 85 handler = logging.StreamHandler() 86 handler.setLevel(level) 87 formatter = logging.Formatter("%(levelname)s - %(message)s") 88 handler.setFormatter(formatter) 89 logger.addHandler(handler) 90 91 92 def main(): 93 """This is a main function that parses arguments, sets up logging 94 and generates a checksum file""" 95 # Parse command line arguments 96 parser = OptionParser() 97 parser.add_option( 98 "-d", 99 "--digest", 100 help="checksum algorithm to use", 101 action="append", 102 dest="digests", 103 ) 104 parser.add_option( 105 "-o", 106 "--output", 107 help="output file to use", 108 action="store", 109 dest="outfile", 110 default="checksums", 111 ) 112 parser.add_option( 113 "-v", 114 "--verbose", 115 help="Be noisy (takes precedence over quiet)", 116 action="store_true", 117 dest="verbose", 118 default=False, 119 ) 120 parser.add_option( 121 "-q", 122 "--quiet", 123 help="Be quiet", 124 action="store_true", 125 dest="quiet", 126 default=False, 127 ) 128 129 options, args = parser.parse_args() 130 131 # Figure out which logging level to use 132 if options.verbose: 133 loglevel = logging.DEBUG 134 elif options.quiet: 135 loglevel = logging.ERROR 136 else: 137 loglevel = logging.INFO 138 139 # Set up logging 140 setup_logging(loglevel) 141 142 # Validate the digest type to use 143 if not options.digests: 144 options.digests = ["sha1"] 145 146 for i in args: 147 if not os.path.isdir(i): 148 logger.error("%s is not a directory" % i) 149 sys.exit(1) 150 151 process_files(args, options.outfile, options.digests) 152 153 154 if __name__ == "__main__": 155 main()