tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

protocol-server.py (14138B)


      1 # protocol-server
      2 #
      3 # a reference implementation of the Web Annotation Protocol
      4 #
      5 # Developed by Benjamin Young (@bigbulehat) and Shane McCarron (@halindrome).
      6 # Sponsored by Spec-Ops (https://spec-ops.io)
      7 
      8 import os
      9 import sys
     10 
     11 here = os.path.abspath(os.path.dirname(__file__))
     12 repo_root = os.path.abspath(os.path.join(here, os.pardir, os.pardir))
     13 
     14 sys.path.insert(0, os.path.join(repo_root, "tools"))
     15 sys.path.insert(0, os.path.join(repo_root, "tools", "six"))
     16 sys.path.insert(0, os.path.join(repo_root, "tools", "html5lib"))
     17 sys.path.insert(0, os.path.join(repo_root, "tools", "wptserve"))
     18 sys.path.insert(0, os.path.join(repo_root, "tools", "pywebsocket", "src"))
     19 sys.path.insert(0, os.path.join(repo_root, "tools", "py"))
     20 sys.path.insert(0, os.path.join(repo_root, "tools", "pytest"))
     21 sys.path.insert(0, os.path.join(repo_root, "tools", "webdriver"))
     22 
     23 import hashlib
     24 import json
     25 import urlparse
     26 import uuid
     27 
     28 import wptserve
     29 
     30 myprotocol = 'http'
     31 myhost = 'localhost'
     32 port = 8080
     33 doc_root = os.path.join(repo_root, "annotation-protocol", "files", "")
     34 container_path = doc_root + 'annotations/'
     35 
     36 URIroot = myprotocol + '://' + myhost + ':{0}'.format(port)
     37 
     38 per_page = 10
     39 
     40 MEDIA_TYPE = 'application/ld+json; profile="http://www.w3.org/ns/anno.jsonld"'
     41 # Prefer header variants
     42 PREFER_MINIMAL_CONTAINER = "http://www.w3.org/ns/ldp#PreferMinimalContainer"
     43 PREFER_CONTAINED_IRIS = "http://www.w3.org/ns/oa#PreferContainedIRIs"
     44 PREFER_CONTAINED_DESCRIPTIONS = \
     45        "http://www.w3.org/ns/oa#PreferContainedDescriptions"
     46 
     47 
     48 # dictionary for annotations that we create on the fly
     49 tempAnnotations = {}
     50 
     51 def extract_preference(prefer):
     52    """Extracts the parameters from a Prefer header's value
     53    >>> extract_preferences('return=representation;include="http://www.w3.org/ns/ldp#PreferMinimalContainer http://www.w3.org/ns/oa#PreferContainedIRIs"')
     54    {"return": "representation", "include": ["http://www.w3.org/ns/ldp#PreferMinimalContainer", "http://www.w3.org/ns/oa#PreferContainedIRIs"]}
     55    """
     56    obj = {}
     57    if prefer:
     58        params = prefer.split(';')
     59        for p in params:
     60            key, value = p.split('=')
     61            obj[key] = value.strip('"').split(' ')
     62    return obj
     63 
     64 
     65 def dump_json(obj):
     66    return json.dumps(obj, indent=4, sort_keys=True)
     67 
     68 def add_cors_headers(resp):
     69    headers_file = doc_root + 'annotations/cors.headers'
     70    resp.headers.update(load_headers_from_file(headers_file))
     71 
     72 def load_headers_from_file(path):
     73    headers = []
     74    with open(path, 'r') as header_file:
     75        data = header_file.read()
     76        headers = [tuple(item.strip() for item in line.split(":", 1))
     77                   for line in data.splitlines() if line]
     78    return headers
     79 
     80 def annotation_files():
     81    files = []
     82    for file in os.listdir(container_path):
     83        if file.endswith('.jsonld') or file.endswith('.json'):
     84            files.append(file)
     85    for item in list(tempAnnotations.keys()):
     86        files.append(item)
     87    return files
     88 
     89 
     90 def annotation_iris(skip=0):
     91    iris = []
     92    for filename in annotation_files():
     93        iris.append(URIroot + '/annotations/' + filename)
     94    return iris[skip:][:per_page]
     95 
     96 
     97 def annotations(skip=0):
     98    annotations = []
     99    files = annotation_files()
    100    for file in files:
    101        if file.startswith("temp-"):
    102            annotations.append(json.loads(tempAnnotations[file]))
    103        else:
    104            with open(container_path + file, 'r') as annotation:
    105                annotations.append(json.load(annotation))
    106    return annotations
    107 
    108 
    109 def total_annotations():
    110    return len(annotation_files())
    111 
    112 
    113 @wptserve.handlers.handler
    114 def collection_get(request, response):
    115    """Annotation Collection handler. NOTE: This also routes paging requests"""
    116 
    117    # Paginate if requested
    118    qs = urlparse.parse_qs(request.url_parts.query)
    119    if 'page' in qs:
    120        return page(request, response)
    121 
    122    # stub collection
    123    collection_json = {
    124      "@context": [
    125        "http://www.w3.org/ns/anno.jsonld",
    126        "http://www.w3.org/ns/ldp.jsonld"
    127      ],
    128      "id": URIroot + "/annotations/",
    129      "type": ["BasicContainer", "AnnotationCollection"],
    130      "total": 0,
    131      "label": "A Container for Web Annotations",
    132      "first": URIroot + "/annotations/?page=0"
    133    }
    134 
    135    last_page = (total_annotations() / per_page) - 1
    136    collection_json['last'] = URIroot + "/annotations/?page={0}".format(last_page)
    137 
    138    # Default Container format SHOULD be PreferContainedDescriptions
    139    preference = extract_preference(request.headers.get('Prefer'))
    140    if 'include' in preference:
    141        preference = preference['include']
    142    else:
    143        preference = None
    144 
    145    collection_json['total'] = total_annotations()
    146    # TODO: calculate last page and add it's page number
    147 
    148    if (qs.get('iris') and qs.get('iris')[0] is '1') \
    149            or (preference and PREFER_CONTAINED_IRIS in preference):
    150        return_iris = True
    151    else:
    152        return_iris = False
    153 
    154    # only PreferContainedIRIs has unqiue content
    155    if return_iris:
    156        collection_json['id'] += '?iris=1'
    157        collection_json['first'] += '&iris=1'
    158        collection_json['last'] += '&iris=1'
    159 
    160    if preference and PREFER_MINIMAL_CONTAINER not in preference:
    161        if return_iris:
    162            collection_json['first'] = annotation_iris()
    163        else:
    164            collection_json['first'] = annotations()
    165 
    166    collection_headers_file = doc_root + 'annotations/collection.headers'
    167    add_cors_headers(response)
    168    response.headers.update(load_headers_from_file(collection_headers_file))
    169    # this one's unique per request
    170    response.headers.set('Content-Location', collection_json['id'])
    171    return dump_json(collection_json)
    172 
    173 
    174 @wptserve.handlers.handler
    175 def collection_head(request, response):
    176    container_path = doc_root + request.request_path
    177    if os.path.isdir(container_path):
    178        response.status = 200
    179    else:
    180        response.status = 404
    181 
    182    add_cors_headers(response)
    183    headers_file = doc_root + 'annotations/collection.headers'
    184    for header, value in load_headers_from_file(headers_file):
    185        response.headers.append(header, value)
    186 
    187    response.content = None
    188 
    189 
    190 @wptserve.handlers.handler
    191 def collection_options(request, response):
    192    container_path = doc_root + request.request_path
    193    if os.path.isdir(container_path):
    194        response.status = 200
    195    else:
    196        response.status = 404
    197 
    198    add_cors_headers(response)
    199    headers_file = doc_root + 'annotations/collection.options.headers'
    200    for header, value in load_headers_from_file(headers_file):
    201        response.headers.append(header, value)
    202 
    203 def page(request, response):
    204    page_json = {
    205      "@context": "http://www.w3.org/ns/anno.jsonld",
    206      "id": URIroot + "/annotations/",
    207      "type": "AnnotationPage",
    208      "partOf": {
    209        "id": URIroot + "/annotations/",
    210        "total": 42023
    211      },
    212      "next": URIroot + "/annotations/",
    213      "items": [
    214      ]
    215    }
    216 
    217    add_cors_headers(response)
    218    headers_file = doc_root + 'annotations/collection.headers'
    219    response.headers.update(load_headers_from_file(headers_file))
    220 
    221    qs = urlparse.parse_qs(request.url_parts.query)
    222    page_num = int(qs.get('page')[0])
    223    page_json['id'] += '?page={0}'.format(page_num)
    224 
    225    total = total_annotations()
    226    so_far = (per_page * (page_num+1))
    227    remaining = total - so_far
    228 
    229    if page_num != 0:
    230        page_json['prev'] = URIroot + '/annotations/?page={0}'.format(page_num-1)
    231 
    232    page_json['partOf']['total'] = total
    233 
    234    if remaining > per_page:
    235        page_json['next'] += '?page={0}'.format(page_num+1)
    236    else:
    237        del page_json['next']
    238 
    239    if qs.get('iris') and qs.get('iris')[0] is '1':
    240        page_json['items'] = annotation_iris(so_far)
    241        page_json['id'] += '&iris=1'
    242        if 'prev' in page_json:
    243            page_json['prev'] += '&iris=1'
    244        if 'next' in page_json:
    245            page_json['next'] += '&iris=1'
    246    else:
    247        page_json['items'] = annotations(so_far)
    248 
    249    return dump_json(page_json)
    250 
    251 
    252 @wptserve.handlers.handler
    253 def annotation_get(request, response):
    254    """Individual Annotations"""
    255    requested_file = doc_root + request.request_path[1:]
    256    base = os.path.basename( requested_file )
    257 
    258    headers_file = doc_root + 'annotations/annotation.headers'
    259 
    260    if base.startswith("temp-") and tempAnnotations[base]:
    261        response.headers.update(load_headers_from_file(headers_file))
    262        response.headers.set('Etag', hashlib.sha1(base).hexdigest())
    263        data = dump_json(tempAnnotations[base])
    264        if data != "" :
    265            response.content = data
    266            response.status = 200
    267        else:
    268            response.content = ""
    269            response.status = 404
    270    elif os.path.isfile(requested_file):
    271        response.headers.update(load_headers_from_file(headers_file))
    272        # Calculate ETag using Apache httpd's default method (more or less)
    273        # http://www.askapache.info//2.3/mod/core.html#fileetag
    274        statinfo = os.stat(requested_file)
    275        etag = "{0}{1}{2}".format(statinfo.st_ino, statinfo.st_mtime,
    276                                  statinfo.st_size)
    277        # obfuscate so we don't leak info; hexdigest for string compatibility
    278        response.headers.set('Etag', hashlib.sha1(etag).hexdigest())
    279 
    280        with open(requested_file, 'r') as data_file:
    281            data = data_file.read()
    282        response.content = data
    283        response.status = 200
    284    else:
    285        response.content = 'Not Found'
    286        response.status = 404
    287 
    288    add_cors_headers(response)
    289 
    290 
    291 @wptserve.handlers.handler
    292 def annotation_head(request, response):
    293    requested_file = doc_root + request.request_path[1:]
    294    base = os.path.basename(requested_file)
    295 
    296    headers_file = doc_root + 'annotations/annotation.options.headers'
    297 
    298    if base.startswith("temp-") and tempAnnotations[base]:
    299        response.status = 200
    300        response.headers.update(load_headers_from_file(headers_file))
    301    elif os.path.isfile(requested_file):
    302        response.status = 200
    303        response.headers.update(load_headers_from_file(headers_file))
    304    else:
    305        response.status = 404
    306 
    307    add_cors_headers(response)
    308 
    309 @wptserve.handlers.handler
    310 def annotation_options(request, response):
    311    requested_file = doc_root + request.request_path[1:]
    312    base = os.path.basename(requested_file)
    313 
    314    headers_file = doc_root + 'annotations/annotation.options.headers'
    315 
    316    if base.startswith("temp-") and tempAnnotations[base]:
    317        response.status = 200
    318        response.headers.update(load_headers_from_file(headers_file))
    319    elif os.path.isfile(requested_file):
    320        response.status = 200
    321        response.headers.update(load_headers_from_file(headers_file))
    322    else:
    323        response.status = 404
    324 
    325    add_cors_headers(response)
    326 
    327 def create_annotation(body):
    328    # TODO: verify media type is JSON of some kind (at least)
    329    incoming = json.loads(body)
    330    id = "temp-"+str(uuid.uuid4())
    331    if 'id' in incoming:
    332        incoming['canonical'] = incoming['id']
    333    incoming['id'] = URIroot + '/annotations/' + id
    334 
    335    return incoming
    336 
    337 
    338 @wptserve.handlers.handler
    339 def annotation_post(request, response):
    340    incoming = create_annotation(request.body)
    341    newID = incoming['id']
    342    key = os.path.basename(newID)
    343 
    344    print("post:" + newID)
    345    print("post:" + key)
    346 
    347    tempAnnotations[key] = dump_json(incoming)
    348 
    349    headers_file = doc_root + 'annotations/annotation.headers'
    350    response.headers.update(load_headers_from_file(headers_file))
    351    response.headers.append('Location', newID)
    352    add_cors_headers(response)
    353    response.content = dump_json(incoming)
    354    response.status = 201
    355 
    356 @wptserve.handlers.handler
    357 def annotation_put(request, response):
    358    incoming = create_annotation(request.body)
    359 
    360    # remember it in our local cache too
    361    # tempAnnotations[request.request_path[1:]] = dump_jason(incoming)
    362    newID = incoming['id']
    363    key = os.path.basename(newID)
    364 
    365    print("put:" + newID)
    366    print("put:" + key)
    367 
    368    tempAnnotations[key] = dump_json(incoming)
    369 
    370    headers_file = doc_root + 'annotations/annotation.headers'
    371    response.headers.update(load_headers_from_file(headers_file))
    372    response.headers.append('Location', incoming['id'])
    373    add_cors_headers(response)
    374    response.content = dump_json(incoming)
    375    response.status = 200
    376 
    377 
    378 @wptserve.handlers.handler
    379 def annotation_delete(request, response):
    380    base = os.path.basename(request.request_path[1:])
    381    requested_file = doc_root + request.request_path[1:]
    382 
    383    add_cors_headers(response)
    384 
    385    headers_file = doc_root + 'annotations/annotation.headers'
    386 
    387    try:
    388        if base.startswith("temp-"):
    389            if tempAnnotations[base]:
    390                del tempAnnotations[base]
    391        else:
    392            os.remove(requested_file)
    393        response.headers.update(load_headers_from_file(headers_file))
    394        response.status = 204
    395        response.content = ''
    396    except OSError:
    397        response.status = 404
    398        response.content = 'Not Found'
    399 
    400 if __name__ == '__main__':
    401    print('http://' + myhost + ':{0}/'.format(port))
    402    print('container URI is http://' + myhost + ':{0}/'.format(port) + "/annotations/")
    403    print('example annotation URI is http://' + myhost + ':{0}/'.format(port) + "/annotations/anno1.json")
    404 
    405    routes = [
    406        ("GET", "", wptserve.handlers.file_handler),
    407        ("GET", "index.html", wptserve.handlers.file_handler),
    408 
    409        # container/collection responses
    410        ("HEAD", "annotations/", collection_head),
    411        ("OPTIONS", "annotations/", collection_options),
    412        ("GET", "annotations/", collection_get),
    413 
    414        # create annotations in the collection
    415        ("POST", "annotations/", annotation_post),
    416 
    417        # single annotation responses
    418        ("HEAD", "annotations/*", annotation_head),
    419        ("OPTIONS", "annotations/*", annotation_options),
    420        ("GET", "annotations/*", annotation_get),
    421        ("PUT", "annotations/*", annotation_put),
    422        ("DELETE", "annotations/*", annotation_delete)
    423    ]
    424 
    425    httpd = wptserve.server.WebTestHttpd(host=myhost, bind_hostname=myhost, port=port, doc_root=doc_root,
    426                                         routes=routes)
    427    httpd.start(block=True)