tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

util.py (13653B)


      1 # Copyright 2011, Google Inc.
      2 # All rights reserved.
      3 #
      4 # Redistribution and use in source and binary forms, with or without
      5 # modification, are permitted provided that the following conditions are
      6 # met:
      7 #
      8 #     * Redistributions of source code must retain the above copyright
      9 # notice, this list of conditions and the following disclaimer.
     10 #     * Redistributions in binary form must reproduce the above
     11 # copyright notice, this list of conditions and the following disclaimer
     12 # in the documentation and/or other materials provided with the
     13 # distribution.
     14 #     * Neither the name of Google Inc. nor the names of its
     15 # contributors may be used to endorse or promote products derived from
     16 # this software without specific prior written permission.
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 """WebSocket utilities."""
     30 
     31 from __future__ import absolute_import
     32 import array
     33 import errno
     34 import logging
     35 import os
     36 import re
     37 import six
     38 from six.moves import map
     39 from six.moves import range
     40 import socket
     41 import struct
     42 import zlib
     43 
     44 try:
     45    from mod_pywebsocket import fast_masking
     46 except ImportError:
     47    pass
     48 
     49 
     50 def prepend_message_to_exception(message, exc):
     51    """Prepend message to the exception."""
     52    exc.args = (message + str(exc), )
     53    return
     54 
     55 
     56 def __translate_interp(interp, cygwin_path):
     57    """Translate interp program path for Win32 python to run cygwin program
     58    (e.g. perl).  Note that it doesn't support path that contains space,
     59    which is typically true for Unix, where #!-script is written.
     60    For Win32 python, cygwin_path is a directory of cygwin binaries.
     61 
     62    Args:
     63      interp: interp command line
     64      cygwin_path: directory name of cygwin binary, or None
     65    Returns:
     66      translated interp command line.
     67    """
     68    if not cygwin_path:
     69        return interp
     70    m = re.match('^[^ ]*/([^ ]+)( .*)?', interp)
     71    if m:
     72        cmd = os.path.join(cygwin_path, m.group(1))
     73        return cmd + m.group(2)
     74    return interp
     75 
     76 
     77 def get_script_interp(script_path, cygwin_path=None):
     78    r"""Get #!-interpreter command line from the script.
     79 
     80    It also fixes command path.  When Cygwin Python is used, e.g. in WebKit,
     81    it could run "/usr/bin/perl -wT hello.pl".
     82    When Win32 Python is used, e.g. in Chromium, it couldn't.  So, fix
     83    "/usr/bin/perl" to "<cygwin_path>\perl.exe".
     84 
     85    Args:
     86      script_path: pathname of the script
     87      cygwin_path: directory name of cygwin binary, or None
     88    Returns:
     89      #!-interpreter command line, or None if it is not #!-script.
     90    """
     91    fp = open(script_path)
     92    line = fp.readline()
     93    fp.close()
     94    m = re.match('^#!(.*)', line)
     95    if m:
     96        return __translate_interp(m.group(1), cygwin_path)
     97    return None
     98 
     99 
    100 def hexify(s):
    101    return ' '.join(['%02x' % x for x in six.iterbytes(s)])
    102 
    103 
    104 def get_class_logger(o):
    105    """Return the logging class information."""
    106    return logging.getLogger('%s.%s' %
    107                             (o.__class__.__module__, o.__class__.__name__))
    108 
    109 
    110 def pack_byte(b):
    111    """Pack an integer to network-ordered byte"""
    112    return struct.pack('!B', b)
    113 
    114 
    115 class NoopMasker(object):
    116    """A NoOp masking object.
    117 
    118    This has the same interface as RepeatedXorMasker but just returns
    119    the string passed in without making any change.
    120    """
    121    def __init__(self):
    122        """NoOp."""
    123        pass
    124 
    125    def mask(self, s):
    126        """NoOp."""
    127        return s
    128 
    129 
    130 class RepeatedXorMasker(object):
    131    """A masking object that applies XOR on the string.
    132 
    133    Applies XOR on the byte string given to mask method with the masking bytes
    134    given to the constructor repeatedly. This object remembers the position
    135    in the masking bytes the last mask method call ended and resumes from
    136    that point on the next mask method call.
    137    """
    138    def __init__(self, masking_key):
    139        self._masking_key = masking_key
    140        self._masking_key_index = 0
    141 
    142    def _mask_using_swig(self, s):
    143        """Perform the mask via SWIG."""
    144        masked_data = fast_masking.mask(s, self._masking_key,
    145                                        self._masking_key_index)
    146        self._masking_key_index = ((self._masking_key_index + len(s)) %
    147                                   len(self._masking_key))
    148        return masked_data
    149 
    150    def _mask_using_array(self, s):
    151        """Perform the mask via python."""
    152        if isinstance(s, six.text_type):
    153            raise Exception(
    154                'Masking Operation should not process unicode strings')
    155 
    156        result = bytearray(s)
    157 
    158        # Use temporary local variables to eliminate the cost to access
    159        # attributes
    160        masking_key = [c for c in six.iterbytes(self._masking_key)]
    161        masking_key_size = len(masking_key)
    162        masking_key_index = self._masking_key_index
    163 
    164        for i in range(len(result)):
    165            result[i] ^= masking_key[masking_key_index]
    166            masking_key_index = (masking_key_index + 1) % masking_key_size
    167 
    168        self._masking_key_index = masking_key_index
    169 
    170        return bytes(result)
    171 
    172    if 'fast_masking' in globals():
    173        mask = _mask_using_swig
    174    else:
    175        mask = _mask_using_array
    176 
    177 
    178 # By making wbits option negative, we can suppress CMF/FLG (2 octet) and
    179 # ADLER32 (4 octet) fields of zlib so that we can use zlib module just as
    180 # deflate library. DICTID won't be added as far as we don't set dictionary.
    181 # LZ77 window of 32K will be used for both compression and decompression.
    182 # For decompression, we can just use 32K to cover any windows size. For
    183 # compression, we use 32K so receivers must use 32K.
    184 #
    185 # Compression level is Z_DEFAULT_COMPRESSION. We don't have to match level
    186 # to decode.
    187 #
    188 # See zconf.h, deflate.cc, inflate.cc of zlib library, and zlibmodule.c of
    189 # Python. See also RFC1950 (ZLIB 3.3).
    190 
    191 
    192 class _Deflater(object):
    193    def __init__(self, window_bits):
    194        self._logger = get_class_logger(self)
    195 
    196        # Using the smallest window bits of 9 for generating input frames.
    197        # On WebSocket spec, the smallest window bit is 8. However, zlib does
    198        # not accept window_bit = 8.
    199        #
    200        # Because of a zlib deflate quirk, back-references will not use the
    201        # entire range of 1 << window_bits, but will instead use a restricted
    202        # range of (1 << window_bits) - 262. With an increased window_bits = 9,
    203        # back-references will be within a range of 250. These can still be
    204        # decompressed with window_bits = 8 and the 256-byte window used there.
    205        #
    206        # Similar disscussions can be found in https://crbug.com/691074
    207        window_bits = max(window_bits, 9)
    208 
    209        self._compress = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
    210                                          zlib.DEFLATED, -window_bits)
    211 
    212    def compress(self, bytes):
    213        compressed_bytes = self._compress.compress(bytes)
    214        self._logger.debug('Compress input %r', bytes)
    215        self._logger.debug('Compress result %r', compressed_bytes)
    216        return compressed_bytes
    217 
    218    def compress_and_flush(self, bytes):
    219        compressed_bytes = self._compress.compress(bytes)
    220        compressed_bytes += self._compress.flush(zlib.Z_SYNC_FLUSH)
    221        self._logger.debug('Compress input %r', bytes)
    222        self._logger.debug('Compress result %r', compressed_bytes)
    223        return compressed_bytes
    224 
    225    def compress_and_finish(self, bytes):
    226        compressed_bytes = self._compress.compress(bytes)
    227        compressed_bytes += self._compress.flush(zlib.Z_FINISH)
    228        self._logger.debug('Compress input %r', bytes)
    229        self._logger.debug('Compress result %r', compressed_bytes)
    230        return compressed_bytes
    231 
    232 
    233 class _Inflater(object):
    234    def __init__(self, window_bits):
    235        self._logger = get_class_logger(self)
    236        self._window_bits = window_bits
    237 
    238        self._unconsumed = b''
    239 
    240        self.reset()
    241 
    242    def decompress(self, size):
    243        if not (size == -1 or size > 0):
    244            raise Exception('size must be -1 or positive')
    245 
    246        data = b''
    247 
    248        while True:
    249            data += self._decompress.decompress(self._unconsumed,
    250                                                max(0, size - len(data)))
    251            self._unconsumed = self._decompress.unconsumed_tail
    252            if self._decompress.unused_data:
    253                # Encountered a last block (i.e. a block with BFINAL = 1) and
    254                # found a new stream (unused_data). We cannot use the same
    255                # zlib.Decompress object for the new stream. Create a new
    256                # Decompress object to decompress the new one.
    257                #
    258                # It's fine to ignore unconsumed_tail if unused_data is not
    259                # empty.
    260                self._unconsumed = self._decompress.unused_data
    261                self.reset()
    262                if size >= 0 and len(data) == size:
    263                    # data is filled. Don't call decompress again.
    264                    break
    265                else:
    266                    # Re-invoke Decompress.decompress to try to decompress all
    267                    # available bytes before invoking read which blocks until
    268                    # any new byte is available.
    269                    continue
    270            else:
    271                # Here, since unused_data is empty, even if unconsumed_tail is
    272                # not empty, bytes of requested length are already in data. We
    273                # don't have to "continue" here.
    274                break
    275 
    276        if data:
    277            self._logger.debug('Decompressed %r', data)
    278        return data
    279 
    280    def append(self, data):
    281        self._logger.debug('Appended %r', data)
    282        self._unconsumed += data
    283 
    284    def reset(self):
    285        self._logger.debug('Reset')
    286        self._decompress = zlib.decompressobj(-self._window_bits)
    287 
    288 
    289 # Compresses/decompresses given octets using the method introduced in RFC1979.
    290 
    291 
    292 class _RFC1979Deflater(object):
    293    """A compressor class that applies DEFLATE to given byte sequence and
    294    flushes using the algorithm described in the RFC1979 section 2.1.
    295    """
    296    def __init__(self, window_bits, no_context_takeover):
    297        self._deflater = None
    298        if window_bits is None:
    299            window_bits = zlib.MAX_WBITS
    300        self._window_bits = window_bits
    301        self._no_context_takeover = no_context_takeover
    302 
    303    def filter(self, bytes, end=True, bfinal=False):
    304        if self._deflater is None:
    305            self._deflater = _Deflater(self._window_bits)
    306 
    307        if bfinal:
    308            result = self._deflater.compress_and_finish(bytes)
    309            # Add a padding block with BFINAL = 0 and BTYPE = 0.
    310            result = result + pack_byte(0)
    311            self._deflater = None
    312            return result
    313 
    314        result = self._deflater.compress_and_flush(bytes)
    315        if end:
    316            # Strip last 4 octets which is LEN and NLEN field of a
    317            # non-compressed block added for Z_SYNC_FLUSH.
    318            result = result[:-4]
    319 
    320        if self._no_context_takeover and end:
    321            self._deflater = None
    322 
    323        return result
    324 
    325 
    326 class _RFC1979Inflater(object):
    327    """A decompressor class a la RFC1979.
    328 
    329    A decompressor class for byte sequence compressed and flushed following
    330    the algorithm described in the RFC1979 section 2.1.
    331    """
    332    def __init__(self, window_bits=zlib.MAX_WBITS):
    333        self._inflater = _Inflater(window_bits)
    334 
    335    def filter(self, bytes):
    336        # Restore stripped LEN and NLEN field of a non-compressed block added
    337        # for Z_SYNC_FLUSH.
    338        self._inflater.append(bytes + b'\x00\x00\xff\xff')
    339        return self._inflater.decompress(-1)
    340 
    341 
    342 class DeflateSocket(object):
    343    """A wrapper class for socket object to intercept send and recv to perform
    344    deflate compression and decompression transparently.
    345    """
    346 
    347    # Size of the buffer passed to recv to receive compressed data.
    348    _RECV_SIZE = 4096
    349 
    350    def __init__(self, socket):
    351        self._socket = socket
    352 
    353        self._logger = get_class_logger(self)
    354 
    355        self._deflater = _Deflater(zlib.MAX_WBITS)
    356        self._inflater = _Inflater(zlib.MAX_WBITS)
    357 
    358    def recv(self, size):
    359        """Receives data from the socket specified on the construction up
    360        to the specified size. Once any data is available, returns it even
    361        if it's smaller than the specified size.
    362        """
    363 
    364        # TODO(tyoshino): Allow call with size=0. It should block until any
    365        # decompressed data is available.
    366        if size <= 0:
    367            raise Exception('Non-positive size passed')
    368        while True:
    369            data = self._inflater.decompress(size)
    370            if len(data) != 0:
    371                return data
    372 
    373            read_data = self._socket.recv(DeflateSocket._RECV_SIZE)
    374            if not read_data:
    375                return b''
    376            self._inflater.append(read_data)
    377 
    378    def sendall(self, bytes):
    379        self.send(bytes)
    380 
    381    def send(self, bytes):
    382        self._socket.sendall(self._deflater.compress_and_flush(bytes))
    383        return len(bytes)
    384 
    385 
    386 # vi:sts=4 sw=4 et