tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ml.py (11861B)


      1 #!/usr/bin/env python3
      2 # Copyright 2018 The Chromium Authors
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 """Wraps ml.exe or ml64.exe and postprocesses the output to be deterministic.
      6 Sets timestamp in .obj file to 0, hence incompatible with link.exe /incremental.
      7 
      8 Use by prefixing the ml(64).exe invocation with this script:
      9    python ml.py ml.exe [args...]"""
     10 
     11 import array
     12 import collections
     13 import struct
     14 import subprocess
     15 import sys
     16 
     17 
     18 class Struct(object):
     19  """A thin wrapper around the struct module that returns a namedtuple"""
     20 
     21  def __init__(self, name, *args):
     22    """Pass the name of the return type, and then an interleaved list of
     23    format strings as used by the struct module and of field names."""
     24    self.fmt = '<' + ''.join(args[0::2])
     25    self.type = collections.namedtuple(name, args[1::2])
     26 
     27  def pack_into(self, buffer, offset, data):
     28    return struct.pack_into(self.fmt, buffer, offset, *data)
     29 
     30  def unpack_from(self, buffer, offset=0):
     31    return self.type(*struct.unpack_from(self.fmt, buffer, offset))
     32 
     33  def size(self):
     34    return struct.calcsize(self.fmt)
     35 
     36 
     37 def Subtract(nt, **kwargs):
     38  """Subtract(nt, f=2) returns a new namedtuple with 2 subtracted from nt.f"""
     39  return nt._replace(**{k: getattr(nt, k) - v for k, v in kwargs.items()})
     40 
     41 
     42 def MakeDeterministic(objdata):
     43  # Takes data produced by ml(64).exe (without any special flags) and
     44  # 1. Sets the timestamp to 0
     45  # 2. Strips the .debug$S section (which contains an unwanted absolute path)
     46 
     47  # This makes several assumptions about ml's output:
     48  # - Section data is in the same order as the corresponding section headers:
     49  #   section headers preceding the .debug$S section header have their data
     50  #   preceding the .debug$S section data; likewise for section headers
     51  #   following the .debug$S section.
     52  # - The .debug$S section contains only the absolute path to the obj file and
     53  #   nothing else, in particular there's only a single entry in the symbol
     54  #   table referring to the .debug$S section.
     55  # - There are no COFF line number entries.
     56  # - There's no IMAGE_SYM_CLASS_CLR_TOKEN symbol.
     57  # These seem to hold in practice; if they stop holding this script needs to
     58  # become smarter.
     59 
     60  objdata = array.array('b', objdata)  # Writable, e.g. via struct.pack_into.
     61 
     62  # Read coff header.
     63  COFFHEADER = Struct('COFFHEADER', 'H', 'Machine', 'H', 'NumberOfSections',
     64                      'I', 'TimeDateStamp', 'I', 'PointerToSymbolTable', 'I',
     65                      'NumberOfSymbols', 'H', 'SizeOfOptionalHeader', 'H',
     66                      'Characteristics')
     67  coff_header = COFFHEADER.unpack_from(objdata)
     68  assert coff_header.SizeOfOptionalHeader == 0  # Only set for binaries.
     69 
     70  # Read section headers following coff header.
     71  SECTIONHEADER = Struct('SECTIONHEADER', '8s', 'Name', 'I', 'VirtualSize', 'I',
     72                         'VirtualAddress', 'I', 'SizeOfRawData', 'I',
     73                         'PointerToRawData', 'I', 'PointerToRelocations', 'I',
     74                         'PointerToLineNumbers', 'H', 'NumberOfRelocations',
     75                         'H', 'NumberOfLineNumbers', 'I', 'Characteristics')
     76  section_headers = []
     77  debug_section_index = -1
     78  for i in range(0, coff_header.NumberOfSections):
     79    section_header = SECTIONHEADER.unpack_from(objdata,
     80                                               offset=COFFHEADER.size() +
     81                                               i * SECTIONHEADER.size())
     82    assert not section_header[0].startswith(b'/')  # Support short names only.
     83    section_headers.append(section_header)
     84 
     85    if section_header.Name == b'.debug$S':
     86      assert debug_section_index == -1
     87      debug_section_index = i
     88  assert debug_section_index != -1
     89 
     90  data_start = COFFHEADER.size() + len(section_headers) * SECTIONHEADER.size()
     91 
     92  # Verify the .debug$S section looks like we expect.
     93  assert section_headers[debug_section_index].Name == b'.debug$S'
     94  assert section_headers[debug_section_index].VirtualSize == 0
     95  assert section_headers[debug_section_index].VirtualAddress == 0
     96  debug_size = section_headers[debug_section_index].SizeOfRawData
     97  debug_offset = section_headers[debug_section_index].PointerToRawData
     98  assert section_headers[debug_section_index].PointerToRelocations == 0
     99  assert section_headers[debug_section_index].PointerToLineNumbers == 0
    100  assert section_headers[debug_section_index].NumberOfRelocations == 0
    101  assert section_headers[debug_section_index].NumberOfLineNumbers == 0
    102 
    103  # Make sure sections in front of .debug$S have their data preceding it.
    104  for header in section_headers[:debug_section_index]:
    105    assert header.PointerToRawData < debug_offset
    106    assert header.PointerToRelocations < debug_offset
    107    assert header.PointerToLineNumbers < debug_offset
    108 
    109  # Make sure sections after of .debug$S have their data following it.
    110  for header in section_headers[debug_section_index + 1:]:
    111    # Make sure the .debug$S data is at the very end of section data:
    112    assert header.PointerToRawData > debug_offset
    113    assert header.PointerToRelocations == 0
    114    assert header.PointerToLineNumbers == 0
    115 
    116  # Make sure the first non-empty section's data starts right after the section
    117  # headers.
    118  for section_header in section_headers:
    119    if section_header.PointerToRawData == 0:
    120      assert section_header.PointerToRelocations == 0
    121      assert section_header.PointerToLineNumbers == 0
    122      continue
    123    assert section_header.PointerToRawData == data_start
    124    break
    125 
    126  # Make sure the symbol table (and hence, string table) appear after the last
    127  # section:
    128  assert (
    129      coff_header.PointerToSymbolTable >=
    130      section_headers[-1].PointerToRawData + section_headers[-1].SizeOfRawData)
    131 
    132  # The symbol table contains a symbol for the no-longer-present .debug$S
    133  # section. If we leave it there, lld-link will complain:
    134  #
    135  #    lld-link: error: .debug$S should not refer to non-existent section 5
    136  #
    137  # so we need to remove that symbol table entry as well. This shifts symbol
    138  # entries around and we need to update symbol table indices in:
    139  # - relocations
    140  # - line number records (never present)
    141  # - one aux symbol entry (IMAGE_SYM_CLASS_CLR_TOKEN; not present in ml output)
    142  SYM = Struct(
    143      'SYM',
    144      '8s',
    145      'Name',
    146      'I',
    147      'Value',
    148      'h',
    149      'SectionNumber',  # Note: Signed!
    150      'H',
    151      'Type',
    152      'B',
    153      'StorageClass',
    154      'B',
    155      'NumberOfAuxSymbols')
    156  i = 0
    157  debug_sym = -1
    158  while i < coff_header.NumberOfSymbols:
    159    sym_offset = coff_header.PointerToSymbolTable + i * SYM.size()
    160    sym = SYM.unpack_from(objdata, sym_offset)
    161 
    162    # 107 is IMAGE_SYM_CLASS_CLR_TOKEN, which has aux entry "CLR Token
    163    # Definition", which contains a symbol index. Check it's never present.
    164    assert sym.StorageClass != 107
    165 
    166    # Note: sym.SectionNumber is 1-based, debug_section_index is 0-based.
    167    if sym.SectionNumber - 1 == debug_section_index:
    168      assert debug_sym == -1, 'more than one .debug$S symbol found'
    169      debug_sym = i
    170      # Make sure the .debug$S symbol looks like we expect.
    171      # In particular, it should have exactly one aux symbol.
    172      assert sym.Name == b'.debug$S'
    173      assert sym.Value == 0
    174      assert sym.Type == 0
    175      assert sym.StorageClass == 3
    176      assert sym.NumberOfAuxSymbols == 1
    177    elif sym.SectionNumber > debug_section_index:
    178      sym = Subtract(sym, SectionNumber=1)
    179      SYM.pack_into(objdata, sym_offset, sym)
    180    i += 1 + sym.NumberOfAuxSymbols
    181  assert debug_sym != -1, '.debug$S symbol not found'
    182 
    183  # Note: Usually the .debug$S section is the last, but for files saying
    184  # `includelib foo.lib`, like safe_terminate_process.asm in 32-bit builds,
    185  # this isn't true: .drectve is after .debug$S.
    186 
    187  # Update symbol table indices in relocations.
    188  # There are a few processor types that have one or two relocation types
    189  # where SymbolTableIndex has a different meaning, but not for x86.
    190  REL = Struct('REL', 'I', 'VirtualAddress', 'I', 'SymbolTableIndex', 'H',
    191               'Type')
    192  for header in section_headers[0:debug_section_index]:
    193    for j in range(0, header.NumberOfRelocations):
    194      rel_offset = header.PointerToRelocations + j * REL.size()
    195      rel = REL.unpack_from(objdata, rel_offset)
    196      assert rel.SymbolTableIndex != debug_sym
    197      if rel.SymbolTableIndex > debug_sym:
    198        rel = Subtract(rel, SymbolTableIndex=2)
    199        REL.pack_into(objdata, rel_offset, rel)
    200 
    201  # Update symbol table indices in line numbers -- just check they don't exist.
    202  for header in section_headers:
    203    assert header.NumberOfLineNumbers == 0
    204 
    205  # Now that all indices are updated, remove the symbol table entry referring to
    206  # .debug$S and its aux entry.
    207  del objdata[coff_header.PointerToSymbolTable +
    208              debug_sym * SYM.size():coff_header.PointerToSymbolTable +
    209              (debug_sym + 2) * SYM.size()]
    210 
    211  # Now we know that it's safe to write out the input data, with just the
    212  # timestamp overwritten to 0, the last section header cut out (and the
    213  # offsets of all other section headers decremented by the size of that
    214  # one section header), and the last section's data cut out. The symbol
    215  # table offset needs to be reduced by one section header and the size of
    216  # the missing section.
    217  # (The COFF spec only requires on-disk sections to be aligned in image files,
    218  # for obj files it's not required. If that wasn't the case, deleting slices
    219  # if data would not generally be safe.)
    220 
    221  # Update section offsets and remove .debug$S section data.
    222  for i in range(0, debug_section_index):
    223    header = section_headers[i]
    224    if header.SizeOfRawData:
    225      header = Subtract(header, PointerToRawData=SECTIONHEADER.size())
    226    if header.NumberOfRelocations:
    227      header = Subtract(header, PointerToRelocations=SECTIONHEADER.size())
    228    if header.NumberOfLineNumbers:
    229      header = Subtract(header, PointerToLineNumbers=SECTIONHEADER.size())
    230    SECTIONHEADER.pack_into(objdata,
    231                            COFFHEADER.size() + i * SECTIONHEADER.size(),
    232                            header)
    233  for i in range(debug_section_index + 1, len(section_headers)):
    234    header = section_headers[i]
    235    shift = SECTIONHEADER.size() + debug_size
    236    if header.SizeOfRawData:
    237      header = Subtract(header, PointerToRawData=shift)
    238    if header.NumberOfRelocations:
    239      header = Subtract(header, PointerToRelocations=shift)
    240    if header.NumberOfLineNumbers:
    241      header = Subtract(header, PointerToLineNumbers=shift)
    242    SECTIONHEADER.pack_into(objdata,
    243                            COFFHEADER.size() + i * SECTIONHEADER.size(),
    244                            header)
    245 
    246  del objdata[debug_offset:debug_offset + debug_size]
    247 
    248  # Finally, remove .debug$S section header and update coff header.
    249  coff_header = coff_header._replace(TimeDateStamp=0)
    250  coff_header = Subtract(coff_header,
    251                         NumberOfSections=1,
    252                         PointerToSymbolTable=SECTIONHEADER.size() + debug_size,
    253                         NumberOfSymbols=2)
    254  COFFHEADER.pack_into(objdata, 0, coff_header)
    255 
    256  del objdata[COFFHEADER.size() +
    257              debug_section_index * SECTIONHEADER.size():COFFHEADER.size() +
    258              (debug_section_index + 1) * SECTIONHEADER.size()]
    259 
    260  # All done!
    261  if sys.version_info.major == 2:
    262    return objdata.tostring()
    263  else:
    264    return objdata.tobytes()
    265 
    266 
    267 def main():
    268  ml_result = subprocess.call(sys.argv[1:])
    269  if ml_result != 0:
    270    return ml_result
    271 
    272  objfile = None
    273  for i in range(1, len(sys.argv)):
    274    if sys.argv[i].startswith('/Fo'):
    275      objfile = sys.argv[i][len('/Fo'):]
    276  assert objfile, 'failed to find ml output'
    277 
    278  with open(objfile, 'rb') as f:
    279    objdata = f.read()
    280  objdata = MakeDeterministic(objdata)
    281  with open(objfile, 'wb') as f:
    282    f.write(objdata)
    283 
    284 
    285 if __name__ == '__main__':
    286  sys.exit(main())