tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

checksums.py (5030B)


      1 #!/usr/bin/python
      2 # This Source Code Form is subject to the terms of the Mozilla Public
      3 # License, v. 2.0. If a copy of the MPL was not distributed with this
      4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      5 
      6 import hashlib
      7 import logging
      8 import os
      9 import sys
     10 from optparse import OptionParser
     11 
     12 logger = logging.getLogger("checksums.py")
     13 
     14 
     15 def digest_file(filename, digest, chunk_size=131072):
     16    """Produce a checksum for the file specified by 'filename'.  'filename'
     17    is a string path to a file that is opened and read in this function.  The
     18    checksum algorithm is specified by 'digest' and is a valid OpenSSL
     19    algorithm.  If the digest used is not valid or Python's hashlib doesn't
     20    work, the None object will be returned instead.  The size of blocks
     21    that this function will read from the file object it opens based on
     22    'filename' can be specified by 'chunk_size', which defaults to 1K"""
     23    assert not os.path.isdir(filename), "this function only works with files"
     24 
     25    logger.debug("Creating new %s object" % digest)
     26    h = hashlib.new(digest)
     27    with open(filename, "rb") as f:
     28        while True:
     29            data = f.read(chunk_size)
     30            if not data:
     31                logger.debug("Finished reading in file")
     32                break
     33            h.update(data)
     34    hash = h.hexdigest()
     35    logger.debug("Hash for %s is %s" % (filename, hash))
     36    return hash
     37 
     38 
     39 def process_files(dirs, output_filename, digests):
     40    """This function takes a list of directory names, 'drs'. It will then
     41    compute the checksum for each of the files in these by by opening the files.
     42    Once each file is read and its checksum is computed, this function
     43    will write the information to the file specified by 'output_filename'.
     44    The path written in the output file will have anything specified by 'strip'
     45    removed from the path.  The output file is closed before returning nothing
     46    The algorithm to compute checksums with can be specified by 'digests'
     47    and needs to be a list of valid OpenSSL algorithms.
     48 
     49    The output file is written in the format:
     50        <hash> <algorithm> <filesize> <filepath>
     51    Example:
     52        d1fa09a<snip>e4220 sha1 14250744 firefox-4.0b6pre.en-US.mac64.dmg
     53    """
     54 
     55    if os.path.exists(output_filename):
     56        logger.debug('Overwriting existing checksums file "%s"' % output_filename)
     57    else:
     58        logger.debug('Creating a new checksums file "%s"' % output_filename)
     59    with open(output_filename, "w+") as output:
     60        for d in dirs:
     61            for root, _, files in os.walk(d):
     62                for f in sorted(files):
     63                    full = os.path.join(root, f)
     64                    rel = os.path.relpath(full, d)
     65 
     66                    for digest in digests:
     67                        hash = digest_file(full, digest)
     68 
     69                        output.write(
     70                            "%s %s %s %s\n" % (hash, digest, os.path.getsize(full), rel)
     71                        )
     72 
     73 
     74 def setup_logging(level=logging.DEBUG):
     75    """This function sets up the logging module using a speficiable logging
     76    module logging level.  The default log level is DEBUG.
     77 
     78    The output is in the format:
     79        <level> - <message>
     80    Example:
     81        DEBUG - Finished reading in file"""
     82 
     83    logger = logging.getLogger("checksums.py")
     84    logger.setLevel(logging.DEBUG)
     85    handler = logging.StreamHandler()
     86    handler.setLevel(level)
     87    formatter = logging.Formatter("%(levelname)s - %(message)s")
     88    handler.setFormatter(formatter)
     89    logger.addHandler(handler)
     90 
     91 
     92 def main():
     93    """This is a main function that parses arguments, sets up logging
     94    and generates a checksum file"""
     95    # Parse command line arguments
     96    parser = OptionParser()
     97    parser.add_option(
     98        "-d",
     99        "--digest",
    100        help="checksum algorithm to use",
    101        action="append",
    102        dest="digests",
    103    )
    104    parser.add_option(
    105        "-o",
    106        "--output",
    107        help="output file to use",
    108        action="store",
    109        dest="outfile",
    110        default="checksums",
    111    )
    112    parser.add_option(
    113        "-v",
    114        "--verbose",
    115        help="Be noisy (takes precedence over quiet)",
    116        action="store_true",
    117        dest="verbose",
    118        default=False,
    119    )
    120    parser.add_option(
    121        "-q",
    122        "--quiet",
    123        help="Be quiet",
    124        action="store_true",
    125        dest="quiet",
    126        default=False,
    127    )
    128 
    129    options, args = parser.parse_args()
    130 
    131    # Figure out which logging level to use
    132    if options.verbose:
    133        loglevel = logging.DEBUG
    134    elif options.quiet:
    135        loglevel = logging.ERROR
    136    else:
    137        loglevel = logging.INFO
    138 
    139    # Set up logging
    140    setup_logging(loglevel)
    141 
    142    # Validate the digest type to use
    143    if not options.digests:
    144        options.digests = ["sha1"]
    145 
    146    for i in args:
    147        if not os.path.isdir(i):
    148            logger.error("%s is not a directory" % i)
    149            sys.exit(1)
    150 
    151    process_files(args, options.outfile, options.digests)
    152 
    153 
    154 if __name__ == "__main__":
    155    main()