tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

midl.py (20199B)


      1 # Copyright 2017 The Chromium Authors
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 from __future__ import division
      6 
      7 import array
      8 import difflib
      9 import filecmp
     10 import io
     11 import operator
     12 import os
     13 import posixpath
     14 import re
     15 import shutil
     16 import struct
     17 import subprocess
     18 import sys
     19 import tempfile
     20 import uuid
     21 
     22 from functools import reduce
     23 
     24 
     25 def ZapTimestamp(filename):
     26  contents = open(filename, 'rb').read()
     27  # midl.exe writes timestamp 2147483647 (2^31 - 1) as creation date into its
     28  # outputs, but using the local timezone.  To make the output timezone-
     29  # independent, replace that date with a fixed string of the same length.
     30  # Also blank out the minor version number.
     31  if filename.endswith('.tlb'):
     32    # See https://chromium-review.googlesource.com/c/chromium/src/+/693223 for
     33    # a fairly complete description of the .tlb binary format.
     34    # TLB files start with a 54 byte header. Offset 0x20 stores how many types
     35    # are defined in the file, and the header is followed by that many uint32s.
     36    # After that, 15 section headers appear.  Each section header is 16 bytes,
     37    # starting with offset and length uint32s.
     38    # Section 12 in the file contains custom() data. custom() data has a type
     39    # (int, string, etc).  Each custom data chunk starts with a uint16_t
     40    # describing its type.  Type 8 is string data, consisting of a uint32_t
     41    # len, followed by that many data bytes, followed by 'W' bytes to pad to a
     42    # 4 byte boundary.  Type 0x13 is uint32 data, followed by 4 data bytes,
     43    # followed by two 'W' to pad to a 4 byte boundary.
     44    # The custom block always starts with one string containing "Created by
     45    # MIDL version 8...", followed by one uint32 containing 0x7fffffff,
     46    # followed by another uint32 containing the MIDL compiler version (e.g.
     47    # 0x0801026e for v8.1.622 -- 0x26e == 622).  These 3 fields take 0x54 bytes.
     48    # There might be more custom data after that, but these 3 blocks are always
     49    # there for file-level metadata.
     50    # All data is little-endian in the file.
     51    assert contents[0:8] == b'MSFT\x02\x00\x01\x00'
     52    ntypes, = struct.unpack_from('<I', contents, 0x20)
     53    custom_off, custom_len = struct.unpack_from(
     54        '<II', contents, 0x54 + 4*ntypes + 11*16)
     55    assert custom_len >= 0x54
     56    # First: Type string (0x8), followed by 0x3e characters.
     57    assert contents[custom_off:custom_off + 6] == b'\x08\x00\x3e\x00\x00\x00'
     58    assert re.match(
     59        br'Created by MIDL version 8\.\d\d\.\d{4} '
     60        br'at ... Jan 1. ..:..:.. 2038\n',
     61        contents[custom_off + 6:custom_off + 6 + 0x3e])
     62    # Second: Type uint32 (0x13) storing 0x7fffffff (followed by WW / 0x57 pad)
     63    assert contents[custom_off+6+0x3e:custom_off+6+0x3e+8] == \
     64        b'\x13\x00\xff\xff\xff\x7f\x57\x57'
     65    # Third: Type uint32 (0x13) storing MIDL compiler version.
     66    assert contents[custom_off + 6 + 0x3e + 8:custom_off + 6 + 0x3e + 8 +
     67                    2] == b'\x13\x00'
     68    # Replace "Created by" string with fixed string, and fixed MIDL version with
     69    # 8.1.622 always.
     70    contents = (
     71        contents[0:custom_off + 6] +
     72        b'Created by MIDL version 8.xx.xxxx at a redacted point in time\n' +
     73        # uint32 (0x13) val 0x7fffffff, WW, uint32 (0x13), val 0x0801026e, WW
     74        b'\x13\x00\xff\xff\xff\x7f\x57\x57\x13\x00\x6e\x02\x01\x08\x57\x57' +
     75        contents[custom_off + 0x54:])
     76  else:
     77    contents = re.sub(
     78        br'File created by MIDL compiler version 8\.\d\d\.\d{4} \*/\r\n'
     79        br'/\* at ... Jan 1. ..:..:.. 2038',
     80        br'File created by MIDL compiler version 8.xx.xxxx */\r\n'
     81        br'/* at a redacted point in time', contents)
     82    contents = re.sub(
     83        br'    Oicf, W1, Zp8, env=(.....) \(32b run\), '
     84        br'target_arch=(AMD64|X86) 8\.\d\d\.\d{4}',
     85        br'    Oicf, W1, Zp8, env=\1 (32b run), target_arch=\2 8.xx.xxxx',
     86        contents)
     87    # TODO(thakis): If we need more hacks than these, try to verify checked-in
     88    # outputs when we're using the hermetic toolchain.
     89    # midl.exe older than 8.1.622 omit '//' after #endif, fix that:
     90    contents = contents.replace(b'#endif !_MIDL_USE_GUIDDEF_',
     91                                b'#endif // !_MIDL_USE_GUIDDEF_')
     92    # midl.exe puts the midl version into code in one place.  To have
     93    # predictable output, lie about the midl version if it's not 8.1.622.
     94    # This is unfortunate, but remember that there's beauty too in imperfection.
     95    contents = contents.replace(b'0x801026c, /* MIDL Version 8.1.620 */',
     96                                b'0x801026e, /* MIDL Version 8.1.622 */')
     97  open(filename, 'wb').write(contents)
     98 
     99 
    100 def get_tlb_contents(tlb_file):
    101  # See ZapTimestamp() for a short overview of the .tlb format.
    102  contents = open(tlb_file, 'rb').read()
    103  assert contents[0:8] == b'MSFT\x02\x00\x01\x00'
    104  ntypes, = struct.unpack_from('<I', contents, 0x20)
    105  type_off, type_len = struct.unpack_from('<II', contents, 0x54 + 4*ntypes)
    106 
    107  guid_off, guid_len = struct.unpack_from(
    108      '<II', contents, 0x54 + 4*ntypes + 5*16)
    109  assert guid_len % 24 == 0
    110 
    111  contents = array.array('B', contents)
    112 
    113  return contents, ntypes, type_off, guid_off, guid_len
    114 
    115 
    116 def recreate_guid_hashtable(contents, ntypes, guid_off, guid_len):
    117  # This function is called after changing guids in section 6 (the "guid"
    118  # section). This function recreates the GUID hashtable in section 5. Since the
    119  # hash table uses chaining, it's easiest to recompute it from scratch rather
    120  # than trying to patch it up.
    121  hashtab = [0xffffffff] * (0x80 // 4)
    122  for guidind in range(guid_off, guid_off + guid_len, 24):
    123    guidbytes, typeoff, nextguid = struct.unpack_from(
    124        '<16sII', contents, guidind)
    125    words = struct.unpack('<8H', guidbytes)
    126    # midl seems to use the following simple hash function for GUIDs:
    127    guidhash = reduce(operator.xor, [w for w in words]) % (0x80 // 4)
    128    nextguid = hashtab[guidhash]
    129    struct.pack_into('<I', contents, guidind + 0x14, nextguid)
    130    hashtab[guidhash] = guidind - guid_off
    131  hash_off, hash_len = struct.unpack_from(
    132      '<II', contents, 0x54 + 4*ntypes + 4*16)
    133  for i, hashval in enumerate(hashtab):
    134    struct.pack_into('<I', contents, hash_off + 4*i, hashval)
    135 
    136 
    137 def overwrite_guids_h(h_file, dynamic_guids):
    138  contents = open(h_file, 'rb').read()
    139  for key in dynamic_guids:
    140    contents = re.sub(key, dynamic_guids[key], contents, flags=re.I)
    141  open(h_file, 'wb').write(contents)
    142 
    143 
    144 def get_uuid_format(guid, prefix):
    145  formatted_uuid = b'0x%s,0x%s,0x%s,' % (guid[0:8], guid[9:13], guid[14:18])
    146  formatted_uuid += b'%s0x%s,0x%s' % (prefix, guid[19:21], guid[21:23])
    147  for i in range(24, len(guid), 2):
    148    formatted_uuid += b',0x' + guid[i:i + 2]
    149  return formatted_uuid
    150 
    151 
    152 def get_uuid_format_iid_file(guid):
    153  # Convert from "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83" to
    154  # 0xD0E1CACC,0xC63C,0x4192,0x94,0xAB,0xBF,0x8E,0xAD,0x0E,0x3B,0x83.
    155  return get_uuid_format(guid, b'')
    156 
    157 
    158 def overwrite_guids_iid(iid_file, dynamic_guids):
    159  contents = open(iid_file, 'rb').read()
    160  for key in dynamic_guids:
    161    contents = re.sub(get_uuid_format_iid_file(key),
    162                      get_uuid_format_iid_file(dynamic_guids[key]),
    163                      contents,
    164                      flags=re.I)
    165  open(iid_file, 'wb').write(contents)
    166 
    167 
    168 def get_uuid_format_proxy_file(guid):
    169  # Convert from "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83" to
    170  # {0xD0E1CACC,0xC63C,0x4192,{0x94,0xAB,0xBF,0x8E,0xAD,0x0E,0x3B,0x83}}.
    171  return get_uuid_format(guid, b'{')
    172 
    173 
    174 def overwrite_guids_proxy(proxy_file, dynamic_guids):
    175  contents = open(proxy_file, 'rb').read()
    176  for key in dynamic_guids:
    177    contents = re.sub(get_uuid_format_proxy_file(key),
    178                      get_uuid_format_proxy_file(dynamic_guids[key]),
    179                      contents,
    180                      flags=re.I)
    181  open(proxy_file, 'wb').write(contents)
    182 
    183 
    184 def getguid(contents, offset):
    185  # Returns a guid string of the form "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83".
    186  g0, g1, g2, g3 = struct.unpack_from('<IHH8s', contents, offset)
    187  g3 = b''.join([b'%02X' % g for g in bytearray(g3)])
    188  return b'%08X-%04X-%04X-%s-%s' % (g0, g1, g2, g3[0:4], g3[4:])
    189 
    190 
    191 def setguid(contents, offset, guid):
    192  guid = uuid.UUID(guid.decode('utf-8'))
    193  struct.pack_into('<IHH8s', contents, offset,
    194                   *(guid.fields[0:3] + (guid.bytes[8:], )))
    195 
    196 
    197 def overwrite_guids_tlb(tlb_file, dynamic_guids):
    198  contents, ntypes, type_off, guid_off, guid_len = get_tlb_contents(tlb_file)
    199 
    200  for i in range(0, guid_len, 24):
    201    current_guid = getguid(contents, guid_off + i)
    202    for key in dynamic_guids:
    203      if key.lower() == current_guid.lower():
    204        setguid(contents, guid_off + i, dynamic_guids[key])
    205 
    206  recreate_guid_hashtable(contents, ntypes, guid_off, guid_len)
    207  open(tlb_file, 'wb').write(contents)
    208 
    209 
    210 # Handle multiple guid substitutions, where |dynamic_guids| is of the form
    211 # "PLACEHOLDER-GUID-158428a4-6014-4978-83ba-9fad0dabe791="
    212 # "3d852661-c795-4d20-9b95-5561e9a1d2d9,"
    213 # "PLACEHOLDER-GUID-63B8FFB1-5314-48C9-9C57-93EC8BC6184B="
    214 # "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83".
    215 #
    216 # Before specifying |dynamic_guids| in the build, the IDL file is first compiled
    217 # with "158428a4-6014-4978-83ba-9fad0dabe791" and
    218 # "63B8FFB1-5314-48C9-9C57-93EC8BC6184B". These are the "replaceable" guids,
    219 # i.e., guids that can be replaced in future builds. The resulting MIDL outputs
    220 # are copied over to src\third_party\win_build_output\.
    221 #
    222 # Then, in the future, any changes to these guids can be accomplished by
    223 # providing |dynamic_guids| of the format above in the build file. These
    224 # "dynamic" guid changes by themselves will not require the MIDL compiler and
    225 # therefore will not require copying output over to
    226 # src\third_party\win_build_output\.
    227 #
    228 # The pre-generated src\third_party\win_build_output\ files are used for
    229 # cross-compiling on other platforms, since the MIDL compiler is Windows-only.
    230 def overwrite_guids(h_file, iid_file, proxy_file, tlb_file, dynamic_guids):
    231  # Fix up GUIDs in .h, _i.c, _p.c, and .tlb.
    232  overwrite_guids_h(h_file, dynamic_guids)
    233  overwrite_guids_iid(iid_file, dynamic_guids)
    234  overwrite_guids_proxy(proxy_file, dynamic_guids)
    235  if tlb_file:
    236    overwrite_guids_tlb(tlb_file, dynamic_guids)
    237 
    238 
    239 # This function removes all occurrences of 'PLACEHOLDER-GUID-' from the
    240 # template, and if |dynamic_guids| is specified, also replaces the guids within
    241 # the file. Finally, it writes the resultant output to the |idl| file.
    242 def generate_idl_from_template(idl_template, dynamic_guids, idl):
    243  contents = open(idl_template, 'rb').read()
    244  contents = re.sub(b'PLACEHOLDER-GUID-', b'', contents, flags=re.I)
    245  if dynamic_guids:
    246    for key in dynamic_guids:
    247      contents = re.sub(key, dynamic_guids[key], contents, flags=re.I)
    248  open(idl, 'wb').write(contents)
    249 
    250 
    251 # This function runs the MIDL compiler with the provided arguments. It creates
    252 # and returns a tuple of |0,midl_output_dir| on success.
    253 def run_midl(args, env_dict):
    254  midl_output_dir = tempfile.mkdtemp()
    255  delete_midl_output_dir = True
    256 
    257  try:
    258    popen = subprocess.Popen(args + ['/out', midl_output_dir],
    259                             shell=True,
    260                             universal_newlines=True,
    261                             env=env_dict,
    262                             stdout=subprocess.PIPE,
    263                             stderr=subprocess.STDOUT)
    264    out, _ = popen.communicate()
    265 
    266    # Filter junk out of stdout, and write filtered versions. Output we want
    267    # to filter is pairs of lines that look like this:
    268    # Processing C:\Program Files (x86)\Microsoft SDKs\...\include\objidl.idl
    269    # objidl.idl
    270    lines = out.splitlines()
    271    prefixes = ('Processing ', '64 bit Processing ')
    272    processing = set(
    273        os.path.basename(x) for x in lines if x.startswith(prefixes))
    274    for line in lines:
    275      if not line.startswith(prefixes) and line not in processing:
    276        print(line)
    277 
    278    if popen.returncode != 0:
    279      return popen.returncode, midl_output_dir
    280 
    281    for f in os.listdir(midl_output_dir):
    282      ZapTimestamp(os.path.join(midl_output_dir, f))
    283 
    284    delete_midl_output_dir = False
    285  finally:
    286    if os.path.exists(midl_output_dir) and delete_midl_output_dir:
    287      shutil.rmtree(midl_output_dir)
    288 
    289  return 0, midl_output_dir
    290 
    291 
    292 # This function adds support for dynamic generation of guids: when values are
    293 # specified as 'uuid5:name', this function will substitute the values with
    294 # generated dynamic guids using the uuid5 function. The uuid5 function generates
    295 # a guid based on the SHA-1 hash of a namespace identifier (which is the guid
    296 # that comes after 'PLACEHOLDER-GUID-') and a name (which is a string, such as a
    297 # version string "87.1.2.3").
    298 #
    299 # For instance, when |dynamic_guid| is of the form:
    300 # "PLACEHOLDER-GUID-158428a4-6014-4978-83ba-9fad0dabe791=uuid5:88.0.4307.0
    301 # ,"
    302 # "PLACEHOLDER-GUID-63B8FFB1-5314-48C9-9C57-93EC8BC6184B=uuid5:88.0.4307.0
    303 # "
    304 #
    305 # "PLACEHOLDER-GUID-158428a4-6014-4978-83ba-9fad0dabe791" would be substituted
    306 # with uuid5("158428a4-6014-4978-83ba-9fad0dabe791", "88.0.4307.0"), which is
    307 # "64700170-AD80-5DE3-924E-2F39D862CFD5". And
    308 # "PLACEHOLDER-GUID-63B8FFB1-5314-48C9-9C57-93EC8BC6184B" would be
    309 # substituted with uuid5("63B8FFB1-5314-48C9-9C57-93EC8BC6184B", "88.0.4307.0"),
    310 # which is "7B6E7538-3C38-5565-BC92-42BCEE268D76".
    311 def uuid5_substitutions(dynamic_guids):
    312  for key, value in dynamic_guids.items():
    313    if value.startswith('uuid5:'):
    314      name = value.split('uuid5:', 1)[1]
    315      assert name
    316      dynamic_guids[key] = str(uuid.uuid5(uuid.UUID(key), name)).upper()
    317 
    318 
    319 def main(arch, gendir, outdir, dynamic_guids, tlb, h, dlldata, iid, proxy,
    320         clang, idl, *flags):
    321  # Copy checked-in outputs to final location.
    322  source = gendir
    323  if os.path.isdir(os.path.join(source, os.path.basename(idl))):
    324    source = os.path.join(source, os.path.basename(idl))
    325  source = os.path.join(source, arch.split('.')[1])  # Append 'x86' or 'x64'.
    326  source = os.path.normpath(source)
    327 
    328  source_exists = True
    329  if not os.path.isdir(source):
    330    source_exists = False
    331    if sys.platform != 'win32':
    332      print('Directory %s needs to be populated from Windows first' % source)
    333      return 1
    334 
    335    # This is a brand new IDL file that does not have outputs under
    336    # third_party\win_build_output\midl. We create an empty directory for now.
    337    os.makedirs(source)
    338 
    339  common_files = [h, iid]
    340  if tlb != 'none':
    341    # Not all projects use tlb files.
    342    common_files += [tlb]
    343  else:
    344    tlb = None
    345 
    346  if dlldata != 'none':
    347    # Not all projects use dlldta files.
    348    common_files += [dlldata]
    349  else:
    350    dlldata = None
    351 
    352  # Not all projects use proxy files
    353  if proxy != 'none':
    354    # Not all projects use proxy files.
    355    common_files += [proxy]
    356  else:
    357    proxy = None
    358 
    359  for source_file in common_files:
    360    file_path = os.path.join(source, source_file)
    361    if not os.path.isfile(file_path):
    362      source_exists = False
    363      if sys.platform != 'win32':
    364        print('File %s needs to be generated from Windows first' % file_path)
    365        return 1
    366 
    367      # Either this is a brand new IDL file that does not have outputs under
    368      # third_party\win_build_output\midl or the file is (unexpectedly) missing.
    369      # We create an empty file for now. The rest of the machinery below will
    370      # then generate the correctly populated file using the MIDL compiler and
    371      # instruct the developer to copy that file under
    372      # third_party\win_build_output\midl.
    373      open(file_path, 'wb').close()
    374    shutil.copy(file_path, outdir)
    375 
    376  if dynamic_guids != 'none':
    377    assert '=' in dynamic_guids
    378    if dynamic_guids.startswith("ignore_proxy_stub,"):
    379      # TODO(ganesh): The custom proxy/stub file ("_p.c") is not generated
    380      # correctly for dynamic IIDs (but correctly if there are only dynamic
    381      # CLSIDs). The proxy/stub lookup functions generated by MIDL.exe within
    382      # "_p.c" rely on a sorted set of vtable lists, which we are not currently
    383      # regenerating. At the moment, no project in Chromium that uses dynamic
    384      # IIDs is relying on the custom proxy/stub file. So for now, if
    385      # |dynamic_guids| is prefixed with "ignore_proxy_stub,", we exclude the
    386      # custom proxy/stub file from the directory comparisons.
    387      common_files.remove(proxy)
    388      dynamic_guids = dynamic_guids.split("ignore_proxy_stub,", 1)[1]
    389    dynamic_guids = re.sub('PLACEHOLDER-GUID-', '', dynamic_guids, flags=re.I)
    390    dynamic_guids = dynamic_guids.split(',')
    391    dynamic_guids = dict(s.split('=') for s in dynamic_guids)
    392    uuid5_substitutions(dynamic_guids)
    393    dynamic_guids_bytes = {
    394        k.encode('utf-8'): v.encode('utf-8')
    395        for k, v in dynamic_guids.items()
    396    }
    397    if source_exists:
    398      overwrite_guids(*(os.path.join(outdir, file) if file else None
    399                        for file in [h, iid, proxy, tlb]),
    400                      dynamic_guids=dynamic_guids_bytes)
    401  else:
    402    dynamic_guids = None
    403 
    404  # On non-Windows, that's all we can do.
    405  if sys.platform != 'win32':
    406    return 0
    407 
    408  idl_template = None
    409  if dynamic_guids:
    410    idl_template = idl
    411 
    412    # posixpath is used here to keep the MIDL-generated files with a uniform
    413    # separator of '/' instead of mixed '/' and '\\'.
    414    idl = posixpath.join(
    415        outdir,
    416        os.path.splitext(os.path.basename(idl_template))[0] + '.idl')
    417 
    418    # |idl_template| can contain one or more occurrences of guids that are
    419    # substituted with |dynamic_guids|, and then MIDL is run on the substituted
    420    # IDL file.
    421    generate_idl_from_template(idl_template, dynamic_guids_bytes, idl)
    422 
    423  # On Windows, run midl.exe on the input and check that its outputs are
    424  # identical to the checked-in outputs (after replacing guids if
    425  # |dynamic_guids| is specified).
    426 
    427  # Read the environment block from the file. This is stored in the format used
    428  # by CreateProcess. Drop last 2 NULs, one for list terminator, one for
    429  # trailing vs. separator.
    430  env_pairs = open(arch).read()[:-2].split('\0')
    431  env_dict = dict([item.split('=', 1) for item in env_pairs])
    432 
    433  # Extract the /D options and send them to the preprocessor.
    434  preprocessor_options = '-E -nologo -Wno-nonportable-include-path'
    435  preprocessor_options += ''.join(
    436      [' ' + flag for flag in flags if flag.startswith('/D')])
    437  args = ['midl', '/nologo'] + list(flags) + (['/tlb', tlb] if tlb else []) + [
    438      '/h', h
    439  ] + (['/dlldata', dlldata] if dlldata else []) + ['/iid', iid] + (
    440      ['/proxy', proxy] if proxy else
    441      []) + ['/cpp_cmd', clang, '/cpp_opt', preprocessor_options, idl]
    442 
    443  returncode, midl_output_dir = run_midl(args, env_dict)
    444  if returncode != 0:
    445    return returncode
    446 
    447  # Now compare the output in midl_output_dir to the copied-over outputs.
    448  _, mismatch, errors = filecmp.cmpfiles(midl_output_dir, outdir, common_files)
    449  assert not errors
    450 
    451  if mismatch:
    452    print('midl.exe output different from files in %s, see %s' %
    453          (outdir, midl_output_dir))
    454    for f in mismatch:
    455      if f.endswith('.tlb'): continue
    456      fromfile = os.path.join(outdir, f)
    457      tofile = os.path.join(midl_output_dir, f)
    458      print(''.join(
    459          difflib.unified_diff(
    460              io.open(fromfile).readlines(),
    461              io.open(tofile).readlines(), fromfile, tofile)))
    462 
    463    if dynamic_guids:
    464      # |idl_template| can contain one or more occurrences of guids prefixed
    465      # with 'PLACEHOLDER-GUID-'. We first remove the extraneous
    466      # 'PLACEHOLDER-GUID-' prefix and then run MIDL on the substituted IDL
    467      # file.
    468      # No guid substitutions are done at this point, because we want to compile
    469      # with the placeholder guids and then instruct the user to copy the output
    470      # over to |source| which is typically src\third_party\win_build_output\.
    471      # In future runs, the placeholder guids in |source| are replaced with the
    472      # guids specified in |dynamic_guids|.
    473      generate_idl_from_template(idl_template, None, idl)
    474      returncode, midl_output_dir = run_midl(args, env_dict)
    475      if returncode != 0:
    476        return returncode
    477 
    478    print('To rebaseline:')
    479    print(r'  copy /y %s\* %s' % (midl_output_dir, source))
    480    return 1
    481 
    482  return 0
    483 
    484 
    485 if __name__ == '__main__':
    486  sys.exit(main(*sys.argv[1:]))