http_header_util.py (7028B)
1 # Copyright 2011, Google Inc. 2 # All rights reserved. 3 # 4 # Redistribution and use in source and binary forms, with or without 5 # modification, are permitted provided that the following conditions are 6 # met: 7 # 8 # * Redistributions of source code must retain the above copyright 9 # notice, this list of conditions and the following disclaimer. 10 # * Redistributions in binary form must reproduce the above 11 # copyright notice, this list of conditions and the following disclaimer 12 # in the documentation and/or other materials provided with the 13 # distribution. 14 # * Neither the name of Google Inc. nor the names of its 15 # contributors may be used to endorse or promote products derived from 16 # this software without specific prior written permission. 17 # 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 """Utilities for parsing and formatting headers that follow the grammar defined 30 in HTTP RFC http://www.ietf.org/rfc/rfc2616.txt. 31 """ 32 33 from __future__ import absolute_import 34 import six.moves.urllib.parse 35 36 _SEPARATORS = '()<>@,;:\\"/[]?={} \t' 37 38 39 def _is_char(c): 40 """Returns true iff c is in CHAR as specified in HTTP RFC.""" 41 42 return ord(c) <= 127 43 44 45 def _is_ctl(c): 46 """Returns true iff c is in CTL as specified in HTTP RFC.""" 47 48 return ord(c) <= 31 or ord(c) == 127 49 50 51 class ParsingState(object): 52 def __init__(self, data): 53 self.data = data 54 self.head = 0 55 56 57 def peek(state, pos=0): 58 """Peeks the character at pos from the head of data.""" 59 60 if state.head + pos >= len(state.data): 61 return None 62 63 return state.data[state.head + pos] 64 65 66 def consume(state, amount=1): 67 """Consumes specified amount of bytes from the head and returns the 68 consumed bytes. If there's not enough bytes to consume, returns None. 69 """ 70 71 if state.head + amount > len(state.data): 72 return None 73 74 result = state.data[state.head:state.head + amount] 75 state.head = state.head + amount 76 return result 77 78 79 def consume_string(state, expected): 80 """Given a parsing state and a expected string, consumes the string from 81 the head. Returns True if consumed successfully. Otherwise, returns 82 False. 83 """ 84 85 pos = 0 86 87 for c in expected: 88 if c != peek(state, pos): 89 return False 90 pos += 1 91 92 consume(state, pos) 93 return True 94 95 96 def consume_lws(state): 97 """Consumes a LWS from the head. Returns True if any LWS is consumed. 98 Otherwise, returns False. 99 100 LWS = [CRLF] 1*( SP | HT ) 101 """ 102 103 original_head = state.head 104 105 consume_string(state, '\r\n') 106 107 pos = 0 108 109 while True: 110 c = peek(state, pos) 111 if c == ' ' or c == '\t': 112 pos += 1 113 else: 114 if pos == 0: 115 state.head = original_head 116 return False 117 else: 118 consume(state, pos) 119 return True 120 121 122 def consume_lwses(state): 123 r"""Consumes \*LWS from the head.""" 124 125 while consume_lws(state): 126 pass 127 128 129 def consume_token(state): 130 """Consumes a token from the head. Returns the token or None if no token 131 was found. 132 """ 133 134 pos = 0 135 136 while True: 137 c = peek(state, pos) 138 if c is None or c in _SEPARATORS or _is_ctl(c) or not _is_char(c): 139 if pos == 0: 140 return None 141 142 return consume(state, pos) 143 else: 144 pos += 1 145 146 147 def consume_token_or_quoted_string(state): 148 """Consumes a token or a quoted-string, and returns the token or unquoted 149 string. If no token or quoted-string was found, returns None. 150 """ 151 152 original_head = state.head 153 154 if not consume_string(state, '"'): 155 return consume_token(state) 156 157 result = [] 158 159 expect_quoted_pair = False 160 161 while True: 162 if not expect_quoted_pair and consume_lws(state): 163 result.append(' ') 164 continue 165 166 c = consume(state) 167 if c is None: 168 # quoted-string is not enclosed with double quotation 169 state.head = original_head 170 return None 171 elif expect_quoted_pair: 172 expect_quoted_pair = False 173 if _is_char(c): 174 result.append(c) 175 else: 176 # Non CHAR character found in quoted-pair 177 state.head = original_head 178 return None 179 elif c == '\\': 180 expect_quoted_pair = True 181 elif c == '"': 182 return ''.join(result) 183 elif _is_ctl(c): 184 # Invalid character %r found in qdtext 185 state.head = original_head 186 return None 187 else: 188 result.append(c) 189 190 191 def quote_if_necessary(s): 192 """Quotes arbitrary string into quoted-string.""" 193 194 quote = False 195 if s == '': 196 return '""' 197 198 result = [] 199 for c in s: 200 if c == '"' or c in _SEPARATORS or _is_ctl(c) or not _is_char(c): 201 quote = True 202 203 if c == '"' or _is_ctl(c): 204 result.append('\\' + c) 205 else: 206 result.append(c) 207 208 if quote: 209 return '"' + ''.join(result) + '"' 210 else: 211 return ''.join(result) 212 213 214 def parse_uri(uri): 215 """Parse absolute URI then return host, port and resource.""" 216 217 parsed = six.moves.urllib.parse.urlsplit(uri) 218 if parsed.scheme != 'wss' and parsed.scheme != 'ws': 219 # |uri| must be a relative URI. 220 # TODO(toyoshim): Should validate |uri|. 221 return None, None, uri 222 223 if parsed.hostname is None: 224 return None, None, None 225 226 port = None 227 try: 228 port = parsed.port 229 except ValueError: 230 # The port property cause ValueError on invalid null port descriptions 231 # like 'ws://host:INVALID_PORT/path', where the assigned port is not 232 # *DIGIT. For python 3.6 and later, ValueError also raises when 233 # assigning invalid port numbers such as 'ws://host:-1/path'. Earlier 234 # versions simply return None and ignore invalid port attributes. 235 return None, None, None 236 237 if port is None: 238 if parsed.scheme == 'ws': 239 port = 80 240 else: 241 port = 443 242 243 path = parsed.path 244 if not path: 245 path += '/' 246 if parsed.query: 247 path += '?' + parsed.query 248 if parsed.fragment: 249 path += '#' + parsed.fragment 250 251 return parsed.hostname, port, path 252 253 254 # vi:sts=4 sw=4 et