url.py (21846B)
1 # -*- coding: utf-8 -*- 2 import os 3 ccdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 4 # based on https://github.com/web-platform-tests/wpt/blob/275544eab54a0d0c7f74ccc2baae9711293d8908/url/urltestdata.txt 5 invalid = { 6 "scheme-trailing-space": "a: foo.com", 7 "scheme-trailing-tab": "a:\tfoo.com", 8 "scheme-trailing-newline": "a:\nfoo.com", 9 "scheme-trailing-cr": "a:\rfoo.com", 10 "scheme-http-no-slash": "http:foo.com", 11 "scheme-http-no-slash-colon": "http::@c:29", 12 "scheme-http-no-slash-square-bracket": "http:[61:27]/:foo", 13 "scheme-http-backslash": "http:\\\\foo.com\\", 14 "scheme-http-single-slash": "http:/example.com/", 15 "scheme-ftp-single-slash": "ftp:/example.com/", 16 "scheme-https-single-slash": "https:/example.com/", 17 "scheme-data-single-slash": "data:/example.com/", 18 "scheme-ftp-no-slash": "ftp:example.com/", 19 "scheme-https-no-slash": "https:example.com/", 20 "userinfo-password-bad-chars": "http://&a:foo(b]c@d:2/", 21 "userinfo-username-contains-at-sign": "http://::@c@d:2", 22 "userinfo-backslash": "http://a\\b:c\\d@foo.com", 23 "host-space": "http://example .org", 24 "host-tab": "http://example\t.org", 25 "host-newline": "http://example.\norg", 26 "host-cr": "http://example.\rorg", 27 "host-square-brackets-port-contains-colon": "http://[1::2]:3:4", 28 "port-999999": "http://f:999999/c", 29 "port-single-letter": "http://f:b/c", 30 "port-multiple-letters": "http://f:fifty-two/c", 31 "port-leading-colon": "http://2001::1", 32 "port-leading-colon-bracket-colon": "http://2001::1]:80", 33 "path-leading-backslash-at-sign": "http://foo.com/\\@", 34 "path-leading-colon-backslash": ":\\", 35 "path-leading-colon-chars-backslash": ":foo.com\\", 36 "path-relative-square-brackets": "[61:24:74]:98", 37 "fragment-contains-hash": "http://foo/path#f#g", 38 "path-percent-encoded-malformed": "http://example.com/foo/%2e%2", 39 "path-bare-percent-sign": "http://example.com/foo%", 40 "path-u0091": "http://example.com/foo\u0091", 41 "userinfo-username-contains-pile-of-poo": "http://💩:foo@example.com", 42 "userinfo-password-contains-pile-of-poo": "http://foo:💩@example.com", 43 "host-hostname-in-brackets": "http://[www.google.com]/", 44 "host-empty": "http://", 45 "host-empty-with-userinfo": "http://user:pass@/", 46 "port-leading-dash": "http://foo:-80/", 47 "host-empty-userinfo-empty": "http://@/www.example.com", 48 "host-invalid-unicode": "http://\ufdd0zyx.com", 49 "host-invalid-unicode-percent-encoded": "http://%ef%b7%90zyx.com", 50 "host-double-percent-encoded": "http://\uff05\uff14\uff11.com", 51 "host-double-percent-encoded-percent-encoded": "http://%ef%bc%85%ef%bc%94%ef%bc%91.com", 52 "host-u0000-percent-encoded": "http://\uff05\uff10\uff10.com", 53 "host-u0000-percent-encoded-percent-encoded": "http://%ef%bc%85%ef%bc%90%ef%bc%90.com", 54 } 55 invalid_absolute = invalid.copy() 56 57 invalid_url_code_points = { 58 "fragment-backslash": "#\\", 59 "fragment-leading-space": "http://f:21/b# e", 60 "path-contains-space": "/a/ /c", 61 "path-leading-space": "http://f:21/ b", 62 "path-tab": "http://example.com/foo\tbar", 63 "path-trailing-space": "http://f:21/b ?", 64 "port-cr": "http://f:\r/c", 65 "port-newline": "http://f:\n/c", 66 "port-space": "http://f: /c", 67 "port-tab": "http://f:\t/c", 68 "query-leading-space": "http://f:21/b? d", 69 "query-trailing-space": "http://f:21/b?d #", 70 } 71 invalid.update(invalid_url_code_points) 72 invalid_absolute.update(invalid_url_code_points) 73 74 valid_absolute = { 75 "scheme-private": "a:foo.com", 76 "scheme-private-slash": "foo:/", 77 "scheme-private-slash-slash": "foo://", 78 "scheme-private-path": "foo:/bar.com/", 79 "scheme-private-path-leading-slashes-only": "foo://///////", 80 "scheme-private-path-leading-slashes-chars": "foo://///////bar.com/", 81 "scheme-private-path-leading-slashes-colon-slashes": "foo:////://///", 82 "scheme-private-single-letter": "c:/foo", 83 "scheme-private-single-slash": "madeupscheme:/example.com/", 84 "scheme-file-single-slash": "file:/example.com/", 85 "scheme-ftps-single-slash": "ftps:/example.com/", 86 "scheme-gopher-single-slash": "gopher:/example.com/", 87 "scheme-ws-single-slash": "ws:/example.com/", 88 "scheme-wss-single-slash": "wss:/example.com/", 89 "scheme-javascript-single-slash": "javascript:/example.com/", 90 "scheme-mailto-single-slash": "mailto:/example.com/", 91 "scheme-private-no-slash": "madeupscheme:example.com/", 92 "scheme-ftps-no-slash": "ftps:example.com/", 93 "scheme-gopher-no-slash": "gopher:example.com/", 94 "scheme-wss-no-slash": "wss:example.com/", 95 "scheme-mailto-no-slash": "mailto:example.com/", 96 "scheme-data-no-slash": "data:text/plain,foo", 97 "userinfo": "http://user:pass@foo:21/bar;par?b#c", 98 "host-ipv6": "http://[2001::1]", 99 "host-ipv6-port": "http://[2001::1]:80", 100 "port-none-but-colon": "http://f:/c", 101 "port-0": "http://f:0/c", 102 "port-00000000000000": "http://f:00000000000000/c", 103 "port-00000000000000000000080": "http://f:00000000000000000000080/c", 104 "userinfo-host-port-path": "http://a:b@c:29/d", 105 "userinfo-username-non-alpha": "http://foo.com:b@d/", 106 "query-contains-question-mark": "http://foo/abcd?efgh?ijkl", 107 "fragment-contains-question-mark": "http://foo/abcd#foo?bar", 108 "path-percent-encoded-dot": "http://example.com/foo/%2e", 109 "path-percent-encoded-space": "http://example.com/%20foo", 110 "path-non-ascii": "http://example.com/\u00C2\u00A9zbar", 111 "path-percent-encoded-multiple": "http://example.com/foo%41%7a", 112 "path-percent-encoded-u0091": "http://example.com/foo%91", 113 "path-percent-encoded-u0000": "http://example.com/foo%00", 114 "path-percent-encoded-mixed-case": "http://example.com/%3A%3a%3C%3c", 115 "path-unicode-han": "http://example.com/\u4F60\u597D\u4F60\u597D", 116 "path-uFEFF": "http://example.com/\uFEFF/foo", 117 "path-u202E-u202D": "http://example.com/\u202E/foo/\u202D/bar", 118 "host-is-pile-of-poo": "http://💩", 119 "path-contains-pile-of-poo": "http://example.com/foo/💩", 120 "query-contains-pile-of-poo": "http://example.com/foo?💩", 121 "fragment-contains-pile-of-poo": "http://example.com/foo#💩", 122 "host-192.0x00A80001": "http://192.0x00A80001", 123 "userinfo-username-contains-percent-encoded": "http://%25DOMAIN:foobar@foodomain.com", 124 "userinfo-empty": "http://@www.example.com", 125 "userinfo-user-empty": "http://:b@www.example.com", 126 "userinfo-password-empty": "http://a:@www.example.com", 127 "host-exotic-whitespace": "http://GOO\u200b\u2060\ufeffgoo.com", 128 "host-exotic-dot": "http://www.foo\u3002bar.com", 129 "host-fullwidth": "http://\uff27\uff4f.com", 130 "host-idn-unicode-han": "http://\u4f60\u597d\u4f60\u597d", 131 "host-IP-address-broken": "http://192.168.0.257/", 132 } 133 valid = valid_absolute.copy() 134 135 valid_relative = { 136 "scheme-schemeless-relative": "//foo/bar", 137 "path-slash-only-relative": "/", 138 "path-simple-relative": "/a/b/c", 139 "path-percent-encoded-slash-relative": "/a%2fc", 140 "path-percent-encoded-slash-plus-slashes-relative": "/a/%2f/c", 141 "query-empty-no-path-relative": "?", 142 "fragment-empty-hash-only-no-path-relative": "#", 143 "fragment-slash-relative": "#/", 144 "fragment-semicolon-question-mark-relative": "#;?", 145 "fragment-non-ascii-relative": "#\u03B2", 146 } 147 valid.update(valid_relative) 148 invalid_absolute.update(valid_relative) 149 150 valid_relative_colon_dot = { 151 "scheme-none-relative": "foo.com", 152 "path-colon-relative": ":", 153 "path-leading-colon-letter-relative": ":a", 154 "path-leading-colon-chars-relative": ":foo.com", 155 "path-leading-colon-slash-relative": ":/", 156 "path-leading-colon-hash-relative": ":#", 157 "path-leading-colon-number-relative": ":23", 158 "path-slash-colon-number-relative": "/:23", 159 "path-leading-colon-colon-relative": "::", 160 "path-colon-colon-number-relative": "::23", 161 "path-starts-with-pile-of-poo": "💩http://foo", 162 "path-contains-pile-of-poo": "http💩//:foo", 163 "path-slash-pile-of-poo": "/💩", 164 } 165 valid.update(valid_relative_colon_dot) 166 167 invalid_file = { 168 "scheme-file-backslash": "file:c:\\foo\\bar.html", 169 "scheme-file-single-slash-c-bar": "file:/C|/foo/bar", 170 "scheme-file-slash-slash-abc-bar": "file://abc|/foo/bar", 171 "scheme-file-triple-slash-c-bar": "file:///C|/foo/bar", 172 } 173 invalid.update(invalid_file) 174 175 valid_file = { 176 "scheme-file-uppercase": "File://foo/bar.html", 177 "scheme-file-slash-slash-c-bar": "file://C|/foo/bar", 178 "scheme-file-host-included": "file://server/foo/bar", 179 "scheme-file-host-empty": "file:///foo/bar.txt", 180 "scheme-file-scheme-only": "file:", 181 "scheme-file-slash-only": "file:/", 182 "scheme-file-slash-slash-only": "file://", 183 "scheme-file-slash-slash-slash-only": "file:///", 184 "scheme-file-no-slash": "file:test", 185 } 186 valid.update(valid_file) 187 valid_absolute.update(valid_file) 188 189 warnings = { 190 "scheme-data-contains-fragment": "data:text/html,test#test", 191 } 192 193 element_attribute_pairs = [ 194 "a href", 195 # "a ping", space-separated list of URLs; tested elsewhere 196 "area href", 197 # "area ping", space-separated list of URLs; tested elsewhere 198 "audio src", 199 "base href", 200 "blockquote cite", 201 "button formaction", 202 "del cite", 203 "embed src", 204 "form action", 205 "iframe src", 206 "img src", # srcset is tested elsewhere 207 "input formaction", # type=submit, type=image 208 "input src", # type=image 209 "input value", # type=url 210 "ins cite", 211 "link href", 212 "object data", 213 "q cite", 214 "script src", 215 "source src", 216 "track src", 217 "video poster", 218 "video src", 219 ] 220 221 template = "<!DOCTYPE html>\n<meta charset=utf-8>\n" 222 223 def write_novalid_files(): 224 for el, attr in (pair.split() for pair in element_attribute_pairs): 225 for desc, url in invalid.items(): 226 if ("area" == el): 227 f = open(os.path.join(ccdir, "html/elements/area/href/%s-novalid.html" % desc), 'w') 228 f.write(template + '<title>invalid href: %s</title>\n' % desc) 229 f.write('<map name=foo><%s %s="%s" alt></map>\n' % (el, attr, url)) 230 f.close() 231 elif ("base" == el or "embed" == el): 232 f = open(os.path.join(ccdir, "html/elements/%s/%s/%s-novalid.html" % (el, attr, desc)), 'w') 233 f.write(template + '<title>invalid %s: %s</title>\n' % (attr, desc)) 234 f.write('<%s %s="%s">\n' % (el, attr, url)) 235 f.close() 236 elif ("img" == el): 237 f = open(os.path.join(ccdir, "html/elements/img/src/%s-novalid.html" % desc), 'w') 238 f.write(template + '<title>invalid src: %s</title>\n' % desc) 239 f.write('<img src="%s" alt>\n' % url) 240 f.close() 241 elif ("input" == el and "src" == attr): 242 f = open(os.path.join(ccdir, "html/elements/input/type-image-src/%s-novalid.html" % desc), 'w') 243 f.write(template + '<title>invalid src: %s</title>\n' % desc) 244 f.write('<%s type=image alt="foo" %s="%s">\n' % (el, attr, url)) 245 f.close() 246 elif ("input" == el and "formaction" == attr): 247 f = open(os.path.join(ccdir, "html/elements/input/type-submit-formaction/%s-novalid.html" % desc), 'w') 248 f.write(template + '<title>invalid formaction: %s</title>\n' % desc) 249 f.write('<%s type=submit %s="%s">\n' % (el, attr, url)) 250 f.close() 251 f = open(os.path.join(ccdir, "html/elements/input/type-image-formaction/%s-novalid.html" % desc), 'w') 252 f.write(template + '<title>invalid formaction: %s</title>\n' % desc) 253 f.write('<%s type=image alt="foo" %s="%s">\n' % (el, attr, url)) 254 f.close() 255 elif ("input" == el and "value" == attr): 256 f = open(os.path.join(ccdir, "html/elements/input/type-url-value/%s-novalid.html" % desc), 'w') 257 f.write(template + '<title>invalid value attribute: %s</title>\n' % desc) 258 f.write('<%s type=url %s="%s">\n' % (el, attr, url)) 259 f.close() 260 elif ("link" == el): 261 f = open(os.path.join(ccdir, "html/elements/link/href/%s-novalid.html" % desc), 'w') 262 f.write(template + '<title>invalid href: %s</title>\n' % desc) 263 f.write('<link href="%s" rel=help>\n' % url) 264 f.close() 265 elif ("source" == el or "track" == el): 266 f = open(os.path.join(ccdir, "html/elements/%s/%s/%s-novalid.html" % (el, attr, desc)), 'w') 267 f.write(template + '<title>invalid %s: %s</title>\n' % (attr, desc)) 268 f.write('<video><%s %s="%s"></video>\n' % (el, attr, url)) 269 f.close() 270 else: 271 f = open(os.path.join(ccdir, "html/elements/%s/%s/%s-novalid.html" % (el, attr, desc)), 'w') 272 f.write(template + '<title>invalid %s: %s</title>\n' % (attr, desc)) 273 f.write('<%s %s="%s"></%s>\n' % (el, attr, url, el)) 274 f.close() 275 for desc, url in invalid.items(): 276 f = open(os.path.join(ccdir, "html/microdata/itemid/%s-novalid.html" % desc), 'w') 277 f.write(template + '<title>invalid itemid: %s</title>\n' % desc) 278 f.write('<div itemid="%s" itemtype="http://foo" itemscope></div>\n' % url) 279 f.close() 280 for desc, url in invalid_absolute.items(): 281 f = open(os.path.join(ccdir, "html/microdata/itemtype/%s-novalid.html" % desc), 'w') 282 f.write(template + '<title>invalid itemtype: %s</title>\n' % desc) 283 f.write('<div itemtype="%s" itemscope></div>\n' % url) 284 f.close() 285 f = open(os.path.join(ccdir, "html/elements/input/type-url-value/%s-novalid.html" % desc), 'w') 286 f.write(template + '<title>invalid value attribute: %s</title>\n' %desc) 287 f.write('<input type=url value="%s">\n' % url) 288 f.close() 289 290 def write_haswarn_files(): 291 for el, attr in (pair.split() for pair in element_attribute_pairs): 292 for desc, url in warnings.items(): 293 if ("area" == el): 294 f = open(os.path.join(ccdir, "html/elements/area/href/%s-haswarn.html" % desc), 'w') 295 f.write(template + '<title>%s warning: %s</title>\n' % (attr, desc)) 296 f.write('<map name=foo><%s %s="%s" alt></map>\n' % (el, attr, url)) 297 f.close() 298 elif ("base" == el or "embed" == el): 299 f = open(os.path.join(ccdir, "html/elements/%s/%s/%s-haswarn.html" % (el, attr, desc)), 'w') 300 f.write(template + '<title>%s warning: %s</title>\n' % (attr, desc)) 301 f.write('<%s %s="%s">\n' % (el, attr, url)) 302 f.close() 303 elif ("img" == el): 304 f = open(os.path.join(ccdir, "html/elements/img/src/%s-haswarn.html" % desc), 'w') 305 f.write(template + '<title>%s warning: %s</title>\n' % (attr, desc)) 306 f.write('<%s %s="%s" alt>\n' % (el, attr, url)) 307 f.close() 308 elif ("input" == el and "src" == attr): 309 f = open(os.path.join(ccdir, "html/elements/input/type-image-src/%s-haswarn.html" % desc), 'w') 310 f.write(template + '<title>%s warning: %s</title>\n' % (attr, desc)) 311 f.write('<%s type=image alt="foo" %s="%s">\n' % (el, attr, url)) 312 f.close() 313 elif ("input" == el and "formaction" == attr): 314 f = open(os.path.join(ccdir, "html/elements/input/type-submit-formaction/%s-haswarn.html" % desc), 'w') 315 f.write(template + '<title>%s warning: %s</title>\n' % (attr, desc)) 316 f.write('<%s type=submit %s="%s">\n' % (el, attr, url)) 317 f.close() 318 f = open(os.path.join(ccdir, "html/elements/input/type-image-formaction/%s-haswarn.html" % desc), 'w') 319 f.write(template + '<title>%s warning: %s</title>\n' % (attr, desc)) 320 f.write('<%s type=image alt="foo" %s="%s">\n' % (el, attr, url)) 321 f.close() 322 elif ("input" == el and "value" == attr): 323 f = open(os.path.join(ccdir, "html/elements/input/type-url-value/%s-haswarn.html" % desc), 'w') 324 f.write(template + '<title>%s warning: %s</title>\n' % (attr, desc)) 325 f.write('<%s type=url %s="%s">\n' % (el, attr, url)) 326 f.close() 327 elif ("link" == el): 328 f = open(os.path.join(ccdir, "html/elements/link/href/%s-haswarn.html" % desc), 'w') 329 f.write(template + '<title>%s warning: %s</title>\n' % (attr, desc)) 330 f.write('<%s %s="%s" rel=help>\n' % (el, attr, url)) 331 f.close() 332 elif ("source" == el or "track" == el): 333 f = open(os.path.join(ccdir, "html/elements/%s/%s/%s-haswarn.html" % (el, attr, desc)), 'w') 334 f.write(template + '<title>%s warning: %s</title>\n' % (attr, desc)) 335 f.write('<video><%s %s="%s"></video>\n' % (el, attr, url)) 336 f.close() 337 else: 338 f = open(os.path.join(ccdir, "html/elements/%s/%s/%s-haswarn.html" % (el, attr, desc)), 'w') 339 f.write(template + '<title>%s warning: %s</title>\n' % (url, desc)) 340 f.write('<%s %s="%s"></%s>\n' % (el, attr, url, el)) 341 f.close() 342 for desc, url in warnings.items(): 343 f = open(os.path.join(ccdir, "html/microdata/itemtype-%s-haswarn.html" % desc ), 'w') 344 f.write(template + '<title>warning: %s</title>\n' % desc) 345 f.write('<div itemtype="%s" itemscope></div>\n' % url) 346 f.close() 347 f = open(os.path.join(ccdir, "html/microdata/itemid-%s-haswarn.html" % desc), 'w') 348 f.write(template + '<title>warning: %s</title>\n' % desc) 349 f.write('<div itemid="%s" itemtype="http://foo" itemscope></div>\n' % url) 350 f.close() 351 352 def write_isvalid_files(): 353 for el, attr in (pair.split() for pair in element_attribute_pairs): 354 if ("base" == el): 355 continue 356 if ("html" == el): 357 continue 358 elif ("input" == el and "value" == attr): 359 continue 360 elif ("input" == el and "formaction" == attr): 361 fs = open(os.path.join(ccdir, "html/elements/input/type-submit-formaction-isvalid.html"), 'w') 362 fs.write(template + '<title>valid formaction</title>\n') 363 fi = open(os.path.join(ccdir, "html/elements/input/type-image-formaction-isvalid.html"), 'w') 364 fi.write(template + '<title>valid formaction</title>\n') 365 elif ("input" == el and "src" == attr): 366 f = open(os.path.join(ccdir, "html/elements/input/type-image-src-isvalid.html"), 'w') 367 f.write(template + '<title>valid src</title>\n') 368 else: 369 f = open(os.path.join(ccdir, "html/elements/%s/%s-isvalid.html" % (el, attr)), 'w') 370 f.write(template + '<title>valid %s</title>\n' % attr) 371 for desc, url in valid.items(): 372 if ("area" == el): 373 f.write('<map name=foo><%s %s="%s" alt></map><!-- %s -->\n' % (el, attr, url, desc)) 374 elif ("embed" == el): 375 f.write('<%s %s="%s"><!-- %s -->\n' % (el, attr, url, desc)) 376 elif ("img" == el): 377 f.write('<%s %s="%s" alt><!-- %s -->\n' % (el, attr, url, desc)) 378 elif ("input" == el and "src" == attr): 379 f.write('<%s type=image alt="foo" %s="%s"><!-- %s -->\n' % (el, attr, url, desc)) 380 elif ("input" == el and "formaction" == attr): 381 fs.write('<%s type=submit %s="%s"><!-- %s -->\n' % (el, attr, url, desc)) 382 fi.write('<%s type=image alt="foo" %s="%s"><!-- %s -->\n' % (el, attr, url, desc)) 383 elif ("link" == el): 384 f.write('<%s %s="%s" rel=help><!-- %s -->\n' % (el, attr, url, desc)) 385 elif ("source" == el or "track" == el): 386 f.write('<video><%s %s="%s"></video><!-- %s -->\n' % (el, attr, url, desc)) 387 else: 388 f.write('<%s %s="%s"></%s><!-- %s -->\n' % (el, attr, url, el, desc)) 389 if ("input" == el and "formaction" == attr): 390 fs.close() 391 fi.close() 392 else: 393 if ("a" == el and "href" == attr): 394 f.write('<a href=""></a><!-- empty-href -->\n') 395 f.close() 396 for desc, url in valid.items(): 397 f = open(os.path.join(ccdir, "html/elements/base/href/%s-isvalid.html" % desc), 'w') 398 f.write(template + '<title>valid href: %s</title>\n' % desc) 399 f.write('<base href="%s">\n' % url) 400 f.close() 401 f = open(os.path.join(ccdir, "html/elements/meta/refresh-isvalid.html"), 'w') 402 f.write(template + '<title>valid meta refresh</title>\n') 403 for desc, url in valid.items(): 404 f.write('<meta http-equiv=refresh content="0; URL=%s"><!-- %s -->\n' % (url, desc)) 405 f.close() 406 f = open(os.path.join(ccdir, "html/microdata/itemid-isvalid.html"), 'w') 407 f.write(template + '<title>valid itemid</title>\n') 408 for desc, url in valid.items(): 409 f.write('<div itemid="%s" itemtype="http://foo" itemscope></div><!-- %s -->\n' % (url, desc)) 410 f.close() 411 f = open(os.path.join(ccdir, "html/microdata/itemtype-isvalid.html"), 'w') 412 f.write(template + '<title>valid itemtype</title>\n') 413 for desc, url in valid_absolute.items(): 414 f.write('<div itemtype="%s" itemscope></div><!-- %s -->\n' % (url, desc)) 415 f.close() 416 f = open(os.path.join(ccdir, "html/elements/input/type-url-value-isvalid.html"), 'w') 417 f.write(template + '<title>valid value attribute</title>\n') 418 for desc, url in valid_absolute.items(): 419 f.write('<input type=url value="%s"><!-- %s -->\n' % (url, desc)) 420 f.close() 421 422 write_novalid_files() 423 write_haswarn_files() 424 write_isvalid_files() 425 # vim: ts=4:sw=4