tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

http_header_util.py (7028B)


      1 # Copyright 2011, Google Inc.
      2 # All rights reserved.
      3 #
      4 # Redistribution and use in source and binary forms, with or without
      5 # modification, are permitted provided that the following conditions are
      6 # met:
      7 #
      8 #     * Redistributions of source code must retain the above copyright
      9 # notice, this list of conditions and the following disclaimer.
     10 #     * Redistributions in binary form must reproduce the above
     11 # copyright notice, this list of conditions and the following disclaimer
     12 # in the documentation and/or other materials provided with the
     13 # distribution.
     14 #     * Neither the name of Google Inc. nor the names of its
     15 # contributors may be used to endorse or promote products derived from
     16 # this software without specific prior written permission.
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 """Utilities for parsing and formatting headers that follow the grammar defined
     30 in HTTP RFC http://www.ietf.org/rfc/rfc2616.txt.
     31 """
     32 
     33 from __future__ import absolute_import
     34 import six.moves.urllib.parse
     35 
     36 _SEPARATORS = '()<>@,;:\\"/[]?={} \t'
     37 
     38 
     39 def _is_char(c):
     40    """Returns true iff c is in CHAR as specified in HTTP RFC."""
     41 
     42    return ord(c) <= 127
     43 
     44 
     45 def _is_ctl(c):
     46    """Returns true iff c is in CTL as specified in HTTP RFC."""
     47 
     48    return ord(c) <= 31 or ord(c) == 127
     49 
     50 
     51 class ParsingState(object):
     52    def __init__(self, data):
     53        self.data = data
     54        self.head = 0
     55 
     56 
     57 def peek(state, pos=0):
     58    """Peeks the character at pos from the head of data."""
     59 
     60    if state.head + pos >= len(state.data):
     61        return None
     62 
     63    return state.data[state.head + pos]
     64 
     65 
     66 def consume(state, amount=1):
     67    """Consumes specified amount of bytes from the head and returns the
     68    consumed bytes. If there's not enough bytes to consume, returns None.
     69    """
     70 
     71    if state.head + amount > len(state.data):
     72        return None
     73 
     74    result = state.data[state.head:state.head + amount]
     75    state.head = state.head + amount
     76    return result
     77 
     78 
     79 def consume_string(state, expected):
     80    """Given a parsing state and a expected string, consumes the string from
     81    the head. Returns True if consumed successfully. Otherwise, returns
     82    False.
     83    """
     84 
     85    pos = 0
     86 
     87    for c in expected:
     88        if c != peek(state, pos):
     89            return False
     90        pos += 1
     91 
     92    consume(state, pos)
     93    return True
     94 
     95 
     96 def consume_lws(state):
     97    """Consumes a LWS from the head. Returns True if any LWS is consumed.
     98    Otherwise, returns False.
     99 
    100    LWS = [CRLF] 1*( SP | HT )
    101    """
    102 
    103    original_head = state.head
    104 
    105    consume_string(state, '\r\n')
    106 
    107    pos = 0
    108 
    109    while True:
    110        c = peek(state, pos)
    111        if c == ' ' or c == '\t':
    112            pos += 1
    113        else:
    114            if pos == 0:
    115                state.head = original_head
    116                return False
    117            else:
    118                consume(state, pos)
    119                return True
    120 
    121 
    122 def consume_lwses(state):
    123    r"""Consumes \*LWS from the head."""
    124 
    125    while consume_lws(state):
    126        pass
    127 
    128 
    129 def consume_token(state):
    130    """Consumes a token from the head. Returns the token or None if no token
    131    was found.
    132    """
    133 
    134    pos = 0
    135 
    136    while True:
    137        c = peek(state, pos)
    138        if c is None or c in _SEPARATORS or _is_ctl(c) or not _is_char(c):
    139            if pos == 0:
    140                return None
    141 
    142            return consume(state, pos)
    143        else:
    144            pos += 1
    145 
    146 
    147 def consume_token_or_quoted_string(state):
    148    """Consumes a token or a quoted-string, and returns the token or unquoted
    149    string. If no token or quoted-string was found, returns None.
    150    """
    151 
    152    original_head = state.head
    153 
    154    if not consume_string(state, '"'):
    155        return consume_token(state)
    156 
    157    result = []
    158 
    159    expect_quoted_pair = False
    160 
    161    while True:
    162        if not expect_quoted_pair and consume_lws(state):
    163            result.append(' ')
    164            continue
    165 
    166        c = consume(state)
    167        if c is None:
    168            # quoted-string is not enclosed with double quotation
    169            state.head = original_head
    170            return None
    171        elif expect_quoted_pair:
    172            expect_quoted_pair = False
    173            if _is_char(c):
    174                result.append(c)
    175            else:
    176                # Non CHAR character found in quoted-pair
    177                state.head = original_head
    178                return None
    179        elif c == '\\':
    180            expect_quoted_pair = True
    181        elif c == '"':
    182            return ''.join(result)
    183        elif _is_ctl(c):
    184            # Invalid character %r found in qdtext
    185            state.head = original_head
    186            return None
    187        else:
    188            result.append(c)
    189 
    190 
    191 def quote_if_necessary(s):
    192    """Quotes arbitrary string into quoted-string."""
    193 
    194    quote = False
    195    if s == '':
    196        return '""'
    197 
    198    result = []
    199    for c in s:
    200        if c == '"' or c in _SEPARATORS or _is_ctl(c) or not _is_char(c):
    201            quote = True
    202 
    203        if c == '"' or _is_ctl(c):
    204            result.append('\\' + c)
    205        else:
    206            result.append(c)
    207 
    208    if quote:
    209        return '"' + ''.join(result) + '"'
    210    else:
    211        return ''.join(result)
    212 
    213 
    214 def parse_uri(uri):
    215    """Parse absolute URI then return host, port and resource."""
    216 
    217    parsed = six.moves.urllib.parse.urlsplit(uri)
    218    if parsed.scheme != 'wss' and parsed.scheme != 'ws':
    219        # |uri| must be a relative URI.
    220        # TODO(toyoshim): Should validate |uri|.
    221        return None, None, uri
    222 
    223    if parsed.hostname is None:
    224        return None, None, None
    225 
    226    port = None
    227    try:
    228        port = parsed.port
    229    except ValueError:
    230        # The port property cause ValueError on invalid null port descriptions
    231        # like 'ws://host:INVALID_PORT/path', where the assigned port is not
    232        # *DIGIT. For python 3.6 and later, ValueError also raises when
    233        # assigning invalid port numbers such as 'ws://host:-1/path'. Earlier
    234        # versions simply return None and ignore invalid port attributes.
    235        return None, None, None
    236 
    237    if port is None:
    238        if parsed.scheme == 'ws':
    239            port = 80
    240        else:
    241            port = 443
    242 
    243    path = parsed.path
    244    if not path:
    245        path += '/'
    246    if parsed.query:
    247        path += '?' + parsed.query
    248    if parsed.fragment:
    249        path += '#' + parsed.fragment
    250 
    251    return parsed.hostname, port, path
    252 
    253 
    254 # vi:sts=4 sw=4 et