tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

manifest_utils.py (11254B)


      1 # Copyright 2019 The Chromium Authors
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Contains common helpers for working with Android manifests."""
      6 
      7 import hashlib
      8 import os
      9 import re
     10 import shlex
     11 import sys
     12 import xml.dom.minidom as minidom
     13 from xml.etree import ElementTree
     14 
     15 from util import build_utils
     16 import action_helpers  # build_utils adds //build to sys.path.
     17 
     18 ANDROID_NAMESPACE = 'http://schemas.android.com/apk/res/android'
     19 TOOLS_NAMESPACE = 'http://schemas.android.com/tools'
     20 DIST_NAMESPACE = 'http://schemas.android.com/apk/distribution'
     21 EMPTY_ANDROID_MANIFEST_PATH = os.path.abspath(
     22    os.path.join(os.path.dirname(__file__), '..', '..', 'AndroidManifest.xml'))
     23 # When normalizing for expectation matching, wrap these tags when they are long
     24 # or else they become very hard to read.
     25 _WRAP_CANDIDATES = (
     26    '<manifest',
     27    '<application',
     28    '<activity',
     29    '<provider',
     30    '<receiver',
     31    '<service',
     32 )
     33 # Don't wrap lines shorter than this.
     34 _WRAP_LINE_LENGTH = 100
     35 
     36 _xml_namespace_initialized = False
     37 
     38 
     39 def _RegisterElementTreeNamespaces():
     40  global _xml_namespace_initialized
     41  if _xml_namespace_initialized:
     42    return
     43  _xml_namespace_initialized = True
     44  ElementTree.register_namespace('android', ANDROID_NAMESPACE)
     45  ElementTree.register_namespace('tools', TOOLS_NAMESPACE)
     46  ElementTree.register_namespace('dist', DIST_NAMESPACE)
     47 
     48 
     49 def NamespacedGet(node, key):
     50  return node.get('{%s}%s' % (ANDROID_NAMESPACE, key))
     51 
     52 
     53 def NamespacedSet(node, key, value):
     54  node.set('{%s}%s' % (ANDROID_NAMESPACE, key), value)
     55 
     56 
     57 def ParseManifest(path):
     58  """Parses an AndroidManifest.xml using ElementTree.
     59 
     60  Registers required namespaces, creates application node if missing, adds any
     61  missing namespaces for 'android', 'tools' and 'dist'.
     62 
     63  Returns tuple of:
     64    doc: Root xml document.
     65    manifest_node: the <manifest> node.
     66    app_node: the <application> node.
     67  """
     68  _RegisterElementTreeNamespaces()
     69  doc = ElementTree.parse(path)
     70  # ElementTree.find does not work if the required tag is the root.
     71  if doc.getroot().tag == 'manifest':
     72    manifest_node = doc.getroot()
     73  else:
     74    manifest_node = doc.find('manifest')
     75  assert manifest_node is not None, 'Manifest is none for path ' + path
     76 
     77  app_node = doc.find('application')
     78  if app_node is None:
     79    app_node = ElementTree.SubElement(manifest_node, 'application')
     80 
     81  return doc, manifest_node, app_node
     82 
     83 
     84 def SaveManifest(doc, path):
     85  with action_helpers.atomic_output(path) as f:
     86    f.write(ElementTree.tostring(doc.getroot(), encoding='UTF-8'))
     87 
     88 
     89 def GetPackage(manifest_node):
     90  return manifest_node.get('package')
     91 
     92 
     93 def SetUsesSdk(manifest_node,
     94               target_sdk_version,
     95               min_sdk_version,
     96               max_sdk_version=None):
     97  uses_sdk_node = manifest_node.find('./uses-sdk')
     98  if uses_sdk_node is None:
     99    uses_sdk_node = ElementTree.SubElement(manifest_node, 'uses-sdk')
    100  NamespacedSet(uses_sdk_node, 'targetSdkVersion', target_sdk_version)
    101  NamespacedSet(uses_sdk_node, 'minSdkVersion', min_sdk_version)
    102  if max_sdk_version:
    103    NamespacedSet(uses_sdk_node, 'maxSdkVersion', max_sdk_version)
    104 
    105 
    106 def SetTargetApiIfUnset(manifest_node, target_sdk_version):
    107  uses_sdk_node = manifest_node.find('./uses-sdk')
    108  if uses_sdk_node is None:
    109    uses_sdk_node = ElementTree.SubElement(manifest_node, 'uses-sdk')
    110  curr_target_sdk_version = NamespacedGet(uses_sdk_node, 'targetSdkVersion')
    111  if curr_target_sdk_version is None:
    112    NamespacedSet(uses_sdk_node, 'targetSdkVersion', target_sdk_version)
    113  return curr_target_sdk_version is None
    114 
    115 
    116 def OverrideMinSdkVersionIfPresent(manifest_node, min_sdk_version):
    117  uses_sdk_node = manifest_node.find('./uses-sdk')
    118  if uses_sdk_node is not None:
    119    NamespacedSet(uses_sdk_node, 'minSdkVersion', min_sdk_version)
    120 
    121 
    122 def _SortAndStripElementTree(root):
    123  # Sort alphabetically with two exceptions:
    124  # 1) Put <application> node last (since it's giant).
    125  # 2) Put android:name before other attributes.
    126  def element_sort_key(node):
    127    if node.tag == 'application':
    128      return 'z'
    129    ret = ElementTree.tostring(node)
    130    # ElementTree.tostring inserts namespace attributes for any that are needed
    131    # for the node or any of its descendants. Remove them so as to prevent a
    132    # change to a child that adds/removes a namespace usage from changing sort
    133    # order.
    134    return re.sub(r' xmlns:.*?".*?"', '', ret.decode('utf8'))
    135 
    136  name_attr = '{%s}name' % ANDROID_NAMESPACE
    137 
    138  def attribute_sort_key(tup):
    139    return ('', '') if tup[0] == name_attr else tup
    140 
    141  def helper(node):
    142    for child in node:
    143      if child.text and child.text.isspace():
    144        child.text = None
    145      helper(child)
    146 
    147    # Sort attributes (requires Python 3.8+).
    148    node.attrib = dict(sorted(node.attrib.items(), key=attribute_sort_key))
    149 
    150    # Sort nodes
    151    node[:] = sorted(node, key=element_sort_key)
    152 
    153  helper(root)
    154 
    155 
    156 def _SplitElement(line):
    157  """Parses a one-line xml node into ('<tag', ['a="b"', ...]], '/>')."""
    158 
    159  # Shlex splits nicely, but removes quotes. Need to put them back.
    160  def restore_quotes(value):
    161    return value.replace('=', '="', 1) + '"'
    162 
    163  # Simplify restore_quotes by separating />.
    164  assert line.endswith('>'), line
    165  end_tag = '>'
    166  if line.endswith('/>'):
    167    end_tag = '/>'
    168  line = line[:-len(end_tag)]
    169 
    170  # Use shlex to avoid having to re-encode &quot;, etc.
    171  parts = shlex.split(line)
    172  start_tag = parts[0]
    173  attrs = parts[1:]
    174 
    175  return start_tag, [restore_quotes(x) for x in attrs], end_tag
    176 
    177 
    178 def _CreateNodeHash(lines):
    179  """Computes a hash (md5) for the first XML node found in |lines|.
    180 
    181  Args:
    182    lines: List of strings containing pretty-printed XML.
    183 
    184  Returns:
    185    Positive 32-bit integer hash of the node (including children).
    186  """
    187  target_indent = lines[0].find('<')
    188  tag_closed = False
    189  for i, l in enumerate(lines[1:]):
    190    cur_indent = l.find('<')
    191    if cur_indent != -1 and cur_indent <= target_indent:
    192      tag_lines = lines[:i + 1]
    193      break
    194    if not tag_closed and 'android:name="' in l:
    195      # To reduce noise of node tags changing, use android:name as the
    196      # basis the hash since they usually unique.
    197      tag_lines = [l]
    198      break
    199    tag_closed = tag_closed or '>' in l
    200  else:
    201    assert False, 'Did not find end of node:\n' + '\n'.join(lines)
    202 
    203  # Insecure and truncated hash as it only needs to be unique vs. its neighbors.
    204  return hashlib.md5(('\n'.join(tag_lines)).encode('utf8')).hexdigest()[:8]
    205 
    206 
    207 def _IsSelfClosing(lines):
    208  """Given pretty-printed xml, returns whether first node is self-closing."""
    209  for l in lines:
    210    idx = l.find('>')
    211    if idx != -1:
    212      return l[idx - 1] == '/'
    213  raise RuntimeError('Did not find end of tag:\n%s' % '\n'.join(lines))
    214 
    215 
    216 def _AddDiffTags(lines):
    217  # When multiple identical tags appear sequentially, XML diffs can look like:
    218  # +  </tag>
    219  # +  <tag>
    220  # rather than:
    221  # +  <tag>
    222  # +  </tag>
    223  # To reduce confusion, add hashes to tags.
    224  # This also ensures changed tags show up with outer <tag> elements rather than
    225  # showing only changed attributes.
    226  hash_stack = []
    227  for i, l in enumerate(lines):
    228    stripped = l.lstrip()
    229    # Ignore non-indented tags and lines that are not the start/end of a node.
    230    if l[0] != ' ' or stripped[0] != '<':
    231      continue
    232    # Ignore self-closing nodes that fit on one line.
    233    if l[-2:] == '/>':
    234      continue
    235    # Ignore <application> since diff tag changes with basically any change.
    236    if stripped.lstrip('</').startswith('application'):
    237      continue
    238 
    239    # Check for the closing tag (</foo>).
    240    if stripped[1] != '/':
    241      cur_hash = _CreateNodeHash(lines[i:])
    242      if not _IsSelfClosing(lines[i:]):
    243        hash_stack.append(cur_hash)
    244    else:
    245      cur_hash = hash_stack.pop()
    246    lines[i] += '  # DIFF-ANCHOR: {}'.format(cur_hash)
    247  assert not hash_stack, 'hash_stack was not empty:\n' + '\n'.join(hash_stack)
    248 
    249 
    250 def NormalizeManifest(manifest_contents, version_code_offset,
    251                      library_version_offset):
    252  _RegisterElementTreeNamespaces()
    253  # This also strips comments and sorts node attributes alphabetically.
    254  root = ElementTree.fromstring(manifest_contents)
    255  package = GetPackage(root)
    256 
    257  app_node = root.find('application')
    258  if app_node is not None:
    259    # android:debuggable is added when !is_official_build. Strip it out to avoid
    260    # expectation diffs caused by not adding is_official_build. Play store
    261    # blocks uploading apps with it set, so there's no risk of it slipping in.
    262    debuggable_name = '{%s}debuggable' % ANDROID_NAMESPACE
    263    if debuggable_name in app_node.attrib:
    264      del app_node.attrib[debuggable_name]
    265 
    266    version_code = NamespacedGet(root, 'versionCode')
    267    if version_code and version_code_offset:
    268      version_code = int(version_code) - int(version_code_offset)
    269      NamespacedSet(root, 'versionCode', f'OFFSET={version_code}')
    270    version_name = NamespacedGet(root, 'versionName')
    271    if version_name:
    272      version_name = re.sub(r'\d+', '#', version_name)
    273      NamespacedSet(root, 'versionName', version_name)
    274 
    275    # Trichrome's static library version number is updated daily. To avoid
    276    # frequent manifest check failures, we remove the exact version number
    277    # during normalization.
    278    for node in app_node:
    279      if node.tag in ['uses-static-library', 'static-library']:
    280        version = NamespacedGet(node, 'version')
    281        if version and library_version_offset:
    282          version = int(version) - int(library_version_offset)
    283          NamespacedSet(node, 'version', f'OFFSET={version}')
    284 
    285  # We also remove the exact package name (except the one at the root level)
    286  # to avoid noise during manifest comparison.
    287  def blur_package_name(node):
    288    for key in node.keys():
    289      node.set(key, node.get(key).replace(package, '$PACKAGE'))
    290 
    291    for child in node:
    292      blur_package_name(child)
    293 
    294  # We only blur the package names of non-root nodes because they generate a lot
    295  # of diffs when doing manifest checks for upstream targets. We still want to
    296  # have 1 piece of package name not blurred just in case the package name is
    297  # mistakenly changed.
    298  for child in root:
    299    blur_package_name(child)
    300 
    301  _SortAndStripElementTree(root)
    302 
    303  # Fix up whitespace/indentation.
    304  dom = minidom.parseString(ElementTree.tostring(root))
    305  out_lines = []
    306  for l in dom.toprettyxml(indent='  ').splitlines():
    307    if not l or l.isspace():
    308      continue
    309    if len(l) > _WRAP_LINE_LENGTH and any(x in l for x in _WRAP_CANDIDATES):
    310      indent = ' ' * l.find('<')
    311      start_tag, attrs, end_tag = _SplitElement(l)
    312      out_lines.append('{}{}'.format(indent, start_tag))
    313      for attribute in attrs:
    314        out_lines.append('{}    {}'.format(indent, attribute))
    315      out_lines[-1] += '>'
    316      # Heuristic: Do not allow multi-line tags to be self-closing since these
    317      # can generally be allowed to have nested elements. When diffing, it adds
    318      # noise if the base file is self-closing and the non-base file is not
    319      # self-closing.
    320      if end_tag == '/>':
    321        out_lines.append('{}{}>'.format(indent, start_tag.replace('<', '</')))
    322    else:
    323      out_lines.append(l)
    324 
    325  # Make output more diff-friendly.
    326  _AddDiffTags(out_lines)
    327 
    328  return '\n'.join(out_lines) + '\n'