tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

convert_dex_profile.py (19618B)


      1 #!/usr/bin/env vpython3
      2 #
      3 # Copyright 2018 The Chromium Authors
      4 # Use of this source code is governed by a BSD-style license that can be
      5 # found in the LICENSE file.
      6 
      7 import argparse
      8 import collections
      9 import functools
     10 import logging
     11 import re
     12 import subprocess
     13 import sys
     14 
     15 DEX_CLASS_NAME_RE = re.compile(r'\'L(?P<class_name>[^;]+);\'')
     16 DEX_METHOD_NAME_RE = re.compile(r'\'(?P<method_name>[^\']+)\'')
     17 DEX_METHOD_TYPE_RE = re.compile( # type descriptor method signature re
     18    r'\''
     19    r'\('
     20    r'(?P<method_params>[^)]*)'
     21    r'\)'
     22    r'(?P<method_return_type>[^\']+)'
     23    r'\'')
     24 DEX_METHOD_LINE_NR_RE = re.compile(r'line=(?P<line_number>\d+)')
     25 
     26 PROFILE_METHOD_RE = re.compile(
     27    r'(?P<tags>[HSP]+)' # tags such as H/S/P
     28    r'(?P<class_name>L[^;]+;)' # class name in type descriptor format
     29    r'->(?P<method_name>[^(]+)'
     30    r'\((?P<method_params>[^)]*)\)'
     31    r'(?P<method_return_type>.+)')
     32 
     33 PROGUARD_CLASS_MAPPING_RE = re.compile(
     34    r'(?P<original_name>[^ ]+)'
     35    r' -> '
     36    r'(?P<obfuscated_name>[^:]+):')
     37 PROGUARD_METHOD_MAPPING_RE = re.compile(
     38    # line_start:line_end: (optional)
     39    r'((?P<line_start>\d+):(?P<line_end>\d+):)?'
     40    r'(?P<return_type>[^ ]+)' # original method return type
     41    # original method class name (if exists)
     42    r' (?:(?P<original_method_class>[a-zA-Z_\d.$]+)\.)?'
     43    r'(?P<original_method_name>[^.\(]+)'
     44    r'\((?P<params>[^\)]*)\)' # original method params
     45    r'(?:[^ ]*)' # original method line numbers (ignored)
     46    r' -> '
     47    r'(?P<obfuscated_name>.+)') # obfuscated method name
     48 
     49 TYPE_DESCRIPTOR_RE = re.compile(
     50    r'(?P<brackets>\[*)'
     51    r'(?:'
     52    r'(?P<class_name>L[^;]+;)'
     53    r'|'
     54    r'[VZBSCIJFD]'
     55    r')')
     56 
     57 DOT_NOTATION_MAP = {
     58    '': '',
     59    'boolean': 'Z',
     60    'byte': 'B',
     61    'void': 'V',
     62    'short': 'S',
     63    'char': 'C',
     64    'int': 'I',
     65    'long': 'J',
     66    'float': 'F',
     67    'double': 'D'
     68 }
     69 
     70 
     71 @functools.total_ordering
     72 class Method:
     73  def __init__(self, name, class_name, param_types=None, return_type=None):
     74    self.name = name
     75    self.class_name = class_name
     76    self.param_types = param_types
     77    self.return_type = return_type
     78 
     79  def __str__(self):
     80    return '{}->{}({}){}'.format(self.class_name, self.name,
     81        self.param_types or '', self.return_type or '')
     82 
     83  def __repr__(self):
     84    return 'Method<{}->{}({}){}>'.format(self.class_name, self.name,
     85        self.param_types or '', self.return_type or '')
     86 
     87  @staticmethod
     88  def serialize(method):
     89    return (method.class_name, method.name, method.param_types,
     90            method.return_type)
     91 
     92  def __eq__(self, other):
     93    return self.serialize(self) == self.serialize(other)
     94 
     95  def __lt__(self, other):
     96    return self.serialize(self) < self.serialize(other)
     97 
     98  def __hash__(self):
     99    # only hash name and class_name since other fields may not be set yet.
    100    return hash((self.name, self.class_name))
    101 
    102 
    103 class Class:
    104  def __init__(self, name):
    105    self.name = name
    106    self._methods = []
    107 
    108  def AddMethod(self, method, line_numbers):
    109    self._methods.append((method, set(line_numbers)))
    110 
    111  def FindMethodsAtLine(self, method_name, line_start, line_end=None):
    112    """Searches through dex class for a method given a name and line numbers
    113 
    114    The dex maps methods to line numbers, this method, given the a method name
    115    in this class as well as a start line and an optional end line (which act as
    116    hints as to which function in the class is being looked for), returns a list
    117    of possible matches (or none if none are found).
    118 
    119    Args:
    120      method_name: name of method being searched for
    121      line_start: start of hint range for lines in this method
    122      line_end: end of hint range for lines in this method (optional)
    123 
    124    Returns:
    125      A list of Method objects that could match the hints given, or None if no
    126      method is found.
    127    """
    128    found_methods = []
    129    if line_end is None:
    130      hint_lines = set([line_start])
    131    else:
    132      hint_lines = set(range(line_start, line_end+1))
    133 
    134    named_methods = [(method, l) for method, l in self._methods
    135                     if method.name == method_name]
    136 
    137    if len(named_methods) == 1:
    138      return [method for method, l in named_methods]
    139    if len(named_methods) == 0:
    140      return None
    141 
    142    for method, line_numbers in named_methods:
    143      if not hint_lines.isdisjoint(line_numbers):
    144        found_methods.append(method)
    145 
    146    if len(found_methods) > 0:
    147      if len(found_methods) > 1:
    148        logging.warning('ambigous methods in dex %s at lines %s in class "%s"',
    149            found_methods, hint_lines, self.name)
    150      return found_methods
    151 
    152    for method, line_numbers in named_methods:
    153      if (max(hint_lines) >= min(line_numbers)
    154          and min(hint_lines) <= max(line_numbers)):
    155        found_methods.append(method)
    156 
    157    if len(found_methods) > 0:
    158      if len(found_methods) > 1:
    159        logging.warning('ambigous methods in dex %s at lines %s in class "%s"',
    160            found_methods, hint_lines, self.name)
    161      return found_methods
    162    logging.warning(
    163        'No method named "%s" in class "%s" is '
    164        'mapped to lines %s', method_name, self.name, hint_lines)
    165    return None
    166 
    167 
    168 class Profile:
    169  def __init__(self):
    170    # {Method: set(char)}
    171    self._methods = collections.defaultdict(set)
    172    self._classes = []
    173 
    174  def AddMethod(self, method, tags):
    175    for tag in tags:
    176      self._methods[method].add(tag)
    177 
    178  def AddClass(self, cls):
    179    self._classes.append(cls)
    180 
    181  def WriteToFile(self, path):
    182    with open(path, 'w') as output_profile:
    183      for cls in sorted(self._classes):
    184        output_profile.write(cls + '\n')
    185      for method in sorted(self._methods):
    186        tags = sorted(self._methods[method])
    187        line = '{}{}\n'.format(''.join(tags), str(method))
    188        output_profile.write(line)
    189 
    190 
    191 class ProguardMapping:
    192  def __init__(self):
    193    # {Method: set(Method)}
    194    self._method_mapping = collections.defaultdict(set)
    195    # {String: String} String is class name in type descriptor format
    196    self._class_mapping = dict()
    197 
    198  def AddMethodMapping(self, from_method, to_method):
    199    self._method_mapping[from_method].add(to_method)
    200 
    201  def AddClassMapping(self, from_class, to_class):
    202    self._class_mapping[from_class] = to_class
    203 
    204  def GetMethodMapping(self, from_method):
    205    return self._method_mapping.get(from_method)
    206 
    207  def GetClassMapping(self, from_class):
    208    return self._class_mapping.get(from_class, from_class)
    209 
    210  def MapTypeDescriptor(self, type_descriptor):
    211    match = TYPE_DESCRIPTOR_RE.search(type_descriptor)
    212    assert match is not None
    213    class_name = match.group('class_name')
    214    if class_name is not None:
    215      return match.group('brackets') + self.GetClassMapping(class_name)
    216    # just a native type, return as is
    217    return match.group()
    218 
    219  def MapTypeDescriptorList(self, type_descriptor_list):
    220    return TYPE_DESCRIPTOR_RE.sub(
    221        lambda match: self.MapTypeDescriptor(match.group()),
    222        type_descriptor_list)
    223 
    224 
    225 class MalformedLineException(Exception):
    226  def __init__(self, message, line_number):
    227    super().__init__(message)
    228    self.message = message
    229    self.line_number = line_number
    230 
    231  def __str__(self):
    232    return self.message + ' at line {}'.format(self.line_number)
    233 
    234 
    235 class MalformedProguardMappingException(MalformedLineException):
    236  pass
    237 
    238 
    239 class MalformedProfileException(MalformedLineException):
    240  pass
    241 
    242 
    243 def _RunDexDump(dexdump_path, dex_file_path):
    244  return subprocess.check_output([dexdump_path,
    245                                  dex_file_path]).decode('utf-8').splitlines()
    246 
    247 
    248 def _ReadFile(file_path):
    249  with open(file_path, 'r') as f:
    250    return f.readlines()
    251 
    252 
    253 def _ToTypeDescriptor(dot_notation):
    254  """Parses a dot notation type and returns it in type descriptor format
    255 
    256  eg:
    257  org.chromium.browser.ChromeActivity -> Lorg/chromium/browser/ChromeActivity;
    258  boolean -> Z
    259  int[] -> [I
    260 
    261  Args:
    262    dot_notation: trimmed string with a single type in dot notation format
    263 
    264  Returns:
    265    A string with the type in type descriptor format
    266  """
    267  dot_notation = dot_notation.strip()
    268  prefix = ''
    269  while dot_notation.endswith('[]'):
    270    prefix += '['
    271    dot_notation = dot_notation[:-2]
    272  if dot_notation in DOT_NOTATION_MAP:
    273    return prefix + DOT_NOTATION_MAP[dot_notation]
    274  return prefix + 'L' + dot_notation.replace('.', '/') + ';'
    275 
    276 
    277 def _DotNotationListToTypeDescriptorList(dot_notation_list_string):
    278  """Parses a param list of dot notation format and returns it in type
    279  descriptor format
    280 
    281  eg:
    282  org.chromium.browser.ChromeActivity,boolean,int[] ->
    283      Lorg/chromium/browser/ChromeActivity;Z[I
    284 
    285  Args:
    286    dot_notation_list_string: single string with multiple comma separated types
    287                              in dot notation format
    288 
    289  Returns:
    290    A string with the param list in type descriptor format
    291  """
    292  return ''.join(_ToTypeDescriptor(param) for param in
    293      dot_notation_list_string.split(','))
    294 
    295 
    296 def ProcessDex(dex_dump):
    297  """Parses dexdump output returning a dict of class names to Class objects
    298 
    299  Parses output of the dexdump command on a dex file and extracts information
    300  about classes and their respective methods and which line numbers a method is
    301  mapped to.
    302 
    303  Methods that are not mapped to any line number are ignored and not listed
    304  inside their respective Class objects.
    305 
    306  Args:
    307    dex_dump: An array of lines of dexdump output
    308 
    309  Returns:
    310    A dict that maps from class names in type descriptor format (but without the
    311    surrounding 'L' and ';') to Class objects.
    312  """
    313  # class_name: Class
    314  classes_by_name = {}
    315  current_class = None
    316  current_method = None
    317  reading_positions = False
    318  reading_methods = False
    319  method_line_numbers = []
    320  for line in dex_dump:
    321    line = line.strip()
    322    if line.startswith('Class descriptor'):
    323      # New class started, no longer reading methods.
    324      reading_methods = False
    325      current_class = Class(DEX_CLASS_NAME_RE.search(line).group('class_name'))
    326      classes_by_name[current_class.name] = current_class
    327    elif (line.startswith('Direct methods')
    328          or line.startswith('Virtual methods')):
    329      reading_methods = True
    330    elif reading_methods and line.startswith('name'):
    331      assert current_class is not None
    332      current_method = Method(
    333          DEX_METHOD_NAME_RE.search(line).group('method_name'),
    334          "L" + current_class.name + ";")
    335    elif reading_methods and line.startswith('type'):
    336      assert current_method is not None
    337      match = DEX_METHOD_TYPE_RE.search(line)
    338      current_method.param_types = match.group('method_params')
    339      current_method.return_type = match.group('method_return_type')
    340    elif line.startswith('positions'):
    341      assert reading_methods
    342      reading_positions = True
    343      method_line_numbers = []
    344    elif reading_positions and line.startswith('0x'):
    345      line_number = DEX_METHOD_LINE_NR_RE.search(line).group('line_number')
    346      method_line_numbers.append(int(line_number))
    347    elif reading_positions and line.startswith('locals'):
    348      if len(method_line_numbers) > 0:
    349        current_class.AddMethod(current_method, method_line_numbers)
    350      # finished reading method line numbers
    351      reading_positions = False
    352  return classes_by_name
    353 
    354 
    355 def ProcessProguardMapping(proguard_mapping_lines, dex):
    356  """Parses a proguard mapping file
    357 
    358  This takes proguard mapping file lines and then uses the obfuscated dex to
    359  create a mapping of unobfuscated methods to obfuscated ones and vice versa.
    360 
    361  The dex is used because the proguard mapping file only has the name of the
    362  obfuscated methods but not their signature, thus the dex is read to look up
    363  which method with a specific name was mapped to the lines mentioned in the
    364  proguard mapping file.
    365 
    366  Args:
    367    proguard_mapping_lines: Array of strings, each is a line from the proguard
    368                            mapping file (in order).
    369    dex: a dict of class name (in type descriptor format but without the
    370         enclosing 'L' and ';') to a Class object.
    371  Returns:
    372    Two dicts the first maps from obfuscated methods to a set of non-obfuscated
    373    ones. It also maps the obfuscated class names to original class names, both
    374    in type descriptor format (with the enclosing 'L' and ';')
    375  """
    376  mapping = ProguardMapping()
    377  reverse_mapping = ProguardMapping()
    378  to_be_obfuscated = []
    379  current_class_orig = None
    380  current_class_obfs = None
    381  for index, line in enumerate(proguard_mapping_lines):
    382    if line.strip() == '':
    383      continue
    384    if not line.startswith(' '):
    385      match = PROGUARD_CLASS_MAPPING_RE.search(line)
    386      if match is None:
    387        raise MalformedProguardMappingException(
    388            'Malformed class mapping', index)
    389      current_class_orig = match.group('original_name')
    390      current_class_obfs = match.group('obfuscated_name')
    391      mapping.AddClassMapping(_ToTypeDescriptor(current_class_obfs),
    392                              _ToTypeDescriptor(current_class_orig))
    393      reverse_mapping.AddClassMapping(_ToTypeDescriptor(current_class_orig),
    394                                      _ToTypeDescriptor(current_class_obfs))
    395      continue
    396 
    397    assert current_class_orig is not None
    398    assert current_class_obfs is not None
    399    line = line.strip()
    400    match = PROGUARD_METHOD_MAPPING_RE.search(line)
    401    # check if is a method mapping (we ignore field mappings)
    402    if match is not None:
    403      # check if this line is an inlining by reading ahead 1 line.
    404      if index + 1 < len(proguard_mapping_lines):
    405        next_match = PROGUARD_METHOD_MAPPING_RE.search(
    406            proguard_mapping_lines[index+1].strip())
    407        if (next_match and match.group('line_start') is not None
    408            and next_match.group('line_start') == match.group('line_start')
    409            and next_match.group('line_end') == match.group('line_end')):
    410          continue # This is an inlining, skip
    411 
    412      original_method = Method(
    413          match.group('original_method_name'),
    414          _ToTypeDescriptor(
    415              match.group('original_method_class') or current_class_orig),
    416          _DotNotationListToTypeDescriptorList(match.group('params')),
    417          _ToTypeDescriptor(match.group('return_type')))
    418 
    419      if match.group('line_start') is not None:
    420        obfs_methods = (dex[current_class_obfs.replace('.', '/')]
    421            .FindMethodsAtLine(
    422                match.group('obfuscated_name'),
    423                int(match.group('line_start')),
    424                int(match.group('line_end'))))
    425 
    426        if obfs_methods is None:
    427          continue
    428 
    429        for obfs_method in obfs_methods:
    430          mapping.AddMethodMapping(obfs_method, original_method)
    431          reverse_mapping.AddMethodMapping(original_method, obfs_method)
    432      else:
    433        to_be_obfuscated.append(
    434            (original_method, match.group('obfuscated_name')))
    435 
    436  for original_method, obfuscated_name in to_be_obfuscated:
    437    obfuscated_method = Method(
    438        obfuscated_name,
    439        reverse_mapping.GetClassMapping(original_method.class_name),
    440        reverse_mapping.MapTypeDescriptorList(original_method.param_types),
    441        reverse_mapping.MapTypeDescriptor(original_method.return_type))
    442    mapping.AddMethodMapping(obfuscated_method, original_method)
    443    reverse_mapping.AddMethodMapping(original_method, obfuscated_method)
    444  return mapping, reverse_mapping
    445 
    446 
    447 def ProcessProfile(input_profile, proguard_mapping):
    448  """Parses an android profile and uses the proguard mapping to (de)obfuscate it
    449 
    450  This takes the android profile lines and for each method or class for the
    451  profile, it uses the mapping to either obfuscate or deobfuscate (based on the
    452  provided mapping) and returns a Profile object that stores this information.
    453 
    454  Args:
    455    input_profile: array of lines of the input profile
    456    proguard_mapping: a proguard mapping that would map from the classes and
    457                      methods in the input profile to the classes and methods
    458                      that should be in the output profile.
    459 
    460  Returns:
    461    A Profile object that stores the information (ie list of mapped classes and
    462    methods + tags)
    463  """
    464  profile = Profile()
    465  for index, line in enumerate(input_profile):
    466    line = line.strip()
    467    if line.startswith('L'):
    468      profile.AddClass(proguard_mapping.GetClassMapping(line))
    469      continue
    470    match = PROFILE_METHOD_RE.search(line)
    471    if not match:
    472      raise MalformedProfileException("Malformed line", index)
    473 
    474    method = Method(
    475        match.group('method_name'),
    476        match.group('class_name'),
    477        match.group('method_params'),
    478        match.group('method_return_type'))
    479 
    480    mapped_methods = proguard_mapping.GetMethodMapping(method)
    481    if mapped_methods is None:
    482      logging.warning('No method matching "%s" has been found in the proguard '
    483                      'mapping file', method)
    484      continue
    485 
    486    for original_method in mapped_methods:
    487      profile.AddMethod(original_method, match.group('tags'))
    488 
    489  return profile
    490 
    491 
    492 def ObfuscateProfile(nonobfuscated_profile, dex_file, proguard_mapping,
    493                     dexdump_path, output_filename):
    494  """Helper method for obfuscating a profile.
    495 
    496  Args:
    497    nonobfuscated_profile: a profile with nonobfuscated symbols.
    498    dex_file: path to the dex file matching the mapping.
    499    proguard_mapping: a mapping from nonobfuscated to obfuscated symbols used
    500      in the dex file.
    501    dexdump_path: path to the dexdump utility.
    502    output_filename: output filename in which to write the obfuscated profile.
    503  """
    504  dexinfo = ProcessDex(_RunDexDump(dexdump_path, dex_file))
    505  _, reverse_mapping = ProcessProguardMapping(
    506      _ReadFile(proguard_mapping), dexinfo)
    507  obfuscated_profile = ProcessProfile(
    508      _ReadFile(nonobfuscated_profile), reverse_mapping)
    509  obfuscated_profile.WriteToFile(output_filename)
    510 
    511 
    512 def main(args):
    513  parser = argparse.ArgumentParser()
    514  parser.add_argument(
    515      '--dexdump-path',
    516      required=True,
    517      help='Path to dexdump binary.')
    518  parser.add_argument(
    519      '--dex-path',
    520      required=True,
    521      help='Path to dex file corresponding to the proguard mapping file.')
    522  parser.add_argument(
    523      '--proguard-mapping-path',
    524      required=True,
    525      help='Path to input proguard mapping file corresponding to the dex file.')
    526  parser.add_argument(
    527      '--output-profile-path',
    528      required=True,
    529      help='Path to output profile.')
    530  parser.add_argument(
    531      '--input-profile-path',
    532      required=True,
    533      help='Path to output profile.')
    534  parser.add_argument(
    535      '--verbose',
    536      action='store_true',
    537      default=False,
    538      help='Print verbose output.')
    539  obfuscation = parser.add_mutually_exclusive_group(required=True)
    540  obfuscation.add_argument('--obfuscate', action='store_true',
    541      help='Indicates to output an obfuscated profile given a deobfuscated '
    542     'one.')
    543  obfuscation.add_argument('--deobfuscate', dest='obfuscate',
    544      action='store_false', help='Indicates to output a deobfuscated profile '
    545      'given an obfuscated one.')
    546  options = parser.parse_args(args)
    547 
    548  if options.verbose:
    549    log_level = logging.WARNING
    550  else:
    551    log_level = logging.ERROR
    552  logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level)
    553 
    554  dex = ProcessDex(_RunDexDump(options.dexdump_path, options.dex_path))
    555  proguard_mapping, reverse_proguard_mapping = ProcessProguardMapping(
    556      _ReadFile(options.proguard_mapping_path), dex)
    557  if options.obfuscate:
    558    profile = ProcessProfile(
    559        _ReadFile(options.input_profile_path),
    560        reverse_proguard_mapping)
    561  else:
    562    profile = ProcessProfile(
    563        _ReadFile(options.input_profile_path),
    564        proguard_mapping)
    565  profile.WriteToFile(options.output_profile_path)
    566 
    567 
    568 if __name__ == '__main__':
    569  main(sys.argv[1:])