protocol-server.py (14138B)
1 # protocol-server 2 # 3 # a reference implementation of the Web Annotation Protocol 4 # 5 # Developed by Benjamin Young (@bigbulehat) and Shane McCarron (@halindrome). 6 # Sponsored by Spec-Ops (https://spec-ops.io) 7 8 import os 9 import sys 10 11 here = os.path.abspath(os.path.dirname(__file__)) 12 repo_root = os.path.abspath(os.path.join(here, os.pardir, os.pardir)) 13 14 sys.path.insert(0, os.path.join(repo_root, "tools")) 15 sys.path.insert(0, os.path.join(repo_root, "tools", "six")) 16 sys.path.insert(0, os.path.join(repo_root, "tools", "html5lib")) 17 sys.path.insert(0, os.path.join(repo_root, "tools", "wptserve")) 18 sys.path.insert(0, os.path.join(repo_root, "tools", "pywebsocket", "src")) 19 sys.path.insert(0, os.path.join(repo_root, "tools", "py")) 20 sys.path.insert(0, os.path.join(repo_root, "tools", "pytest")) 21 sys.path.insert(0, os.path.join(repo_root, "tools", "webdriver")) 22 23 import hashlib 24 import json 25 import urlparse 26 import uuid 27 28 import wptserve 29 30 myprotocol = 'http' 31 myhost = 'localhost' 32 port = 8080 33 doc_root = os.path.join(repo_root, "annotation-protocol", "files", "") 34 container_path = doc_root + 'annotations/' 35 36 URIroot = myprotocol + '://' + myhost + ':{0}'.format(port) 37 38 per_page = 10 39 40 MEDIA_TYPE = 'application/ld+json; profile="http://www.w3.org/ns/anno.jsonld"' 41 # Prefer header variants 42 PREFER_MINIMAL_CONTAINER = "http://www.w3.org/ns/ldp#PreferMinimalContainer" 43 PREFER_CONTAINED_IRIS = "http://www.w3.org/ns/oa#PreferContainedIRIs" 44 PREFER_CONTAINED_DESCRIPTIONS = \ 45 "http://www.w3.org/ns/oa#PreferContainedDescriptions" 46 47 48 # dictionary for annotations that we create on the fly 49 tempAnnotations = {} 50 51 def extract_preference(prefer): 52 """Extracts the parameters from a Prefer header's value 53 >>> extract_preferences('return=representation;include="http://www.w3.org/ns/ldp#PreferMinimalContainer http://www.w3.org/ns/oa#PreferContainedIRIs"') 54 {"return": "representation", "include": ["http://www.w3.org/ns/ldp#PreferMinimalContainer", "http://www.w3.org/ns/oa#PreferContainedIRIs"]} 55 """ 56 obj = {} 57 if prefer: 58 params = prefer.split(';') 59 for p in params: 60 key, value = p.split('=') 61 obj[key] = value.strip('"').split(' ') 62 return obj 63 64 65 def dump_json(obj): 66 return json.dumps(obj, indent=4, sort_keys=True) 67 68 def add_cors_headers(resp): 69 headers_file = doc_root + 'annotations/cors.headers' 70 resp.headers.update(load_headers_from_file(headers_file)) 71 72 def load_headers_from_file(path): 73 headers = [] 74 with open(path, 'r') as header_file: 75 data = header_file.read() 76 headers = [tuple(item.strip() for item in line.split(":", 1)) 77 for line in data.splitlines() if line] 78 return headers 79 80 def annotation_files(): 81 files = [] 82 for file in os.listdir(container_path): 83 if file.endswith('.jsonld') or file.endswith('.json'): 84 files.append(file) 85 for item in list(tempAnnotations.keys()): 86 files.append(item) 87 return files 88 89 90 def annotation_iris(skip=0): 91 iris = [] 92 for filename in annotation_files(): 93 iris.append(URIroot + '/annotations/' + filename) 94 return iris[skip:][:per_page] 95 96 97 def annotations(skip=0): 98 annotations = [] 99 files = annotation_files() 100 for file in files: 101 if file.startswith("temp-"): 102 annotations.append(json.loads(tempAnnotations[file])) 103 else: 104 with open(container_path + file, 'r') as annotation: 105 annotations.append(json.load(annotation)) 106 return annotations 107 108 109 def total_annotations(): 110 return len(annotation_files()) 111 112 113 @wptserve.handlers.handler 114 def collection_get(request, response): 115 """Annotation Collection handler. NOTE: This also routes paging requests""" 116 117 # Paginate if requested 118 qs = urlparse.parse_qs(request.url_parts.query) 119 if 'page' in qs: 120 return page(request, response) 121 122 # stub collection 123 collection_json = { 124 "@context": [ 125 "http://www.w3.org/ns/anno.jsonld", 126 "http://www.w3.org/ns/ldp.jsonld" 127 ], 128 "id": URIroot + "/annotations/", 129 "type": ["BasicContainer", "AnnotationCollection"], 130 "total": 0, 131 "label": "A Container for Web Annotations", 132 "first": URIroot + "/annotations/?page=0" 133 } 134 135 last_page = (total_annotations() / per_page) - 1 136 collection_json['last'] = URIroot + "/annotations/?page={0}".format(last_page) 137 138 # Default Container format SHOULD be PreferContainedDescriptions 139 preference = extract_preference(request.headers.get('Prefer')) 140 if 'include' in preference: 141 preference = preference['include'] 142 else: 143 preference = None 144 145 collection_json['total'] = total_annotations() 146 # TODO: calculate last page and add it's page number 147 148 if (qs.get('iris') and qs.get('iris')[0] is '1') \ 149 or (preference and PREFER_CONTAINED_IRIS in preference): 150 return_iris = True 151 else: 152 return_iris = False 153 154 # only PreferContainedIRIs has unqiue content 155 if return_iris: 156 collection_json['id'] += '?iris=1' 157 collection_json['first'] += '&iris=1' 158 collection_json['last'] += '&iris=1' 159 160 if preference and PREFER_MINIMAL_CONTAINER not in preference: 161 if return_iris: 162 collection_json['first'] = annotation_iris() 163 else: 164 collection_json['first'] = annotations() 165 166 collection_headers_file = doc_root + 'annotations/collection.headers' 167 add_cors_headers(response) 168 response.headers.update(load_headers_from_file(collection_headers_file)) 169 # this one's unique per request 170 response.headers.set('Content-Location', collection_json['id']) 171 return dump_json(collection_json) 172 173 174 @wptserve.handlers.handler 175 def collection_head(request, response): 176 container_path = doc_root + request.request_path 177 if os.path.isdir(container_path): 178 response.status = 200 179 else: 180 response.status = 404 181 182 add_cors_headers(response) 183 headers_file = doc_root + 'annotations/collection.headers' 184 for header, value in load_headers_from_file(headers_file): 185 response.headers.append(header, value) 186 187 response.content = None 188 189 190 @wptserve.handlers.handler 191 def collection_options(request, response): 192 container_path = doc_root + request.request_path 193 if os.path.isdir(container_path): 194 response.status = 200 195 else: 196 response.status = 404 197 198 add_cors_headers(response) 199 headers_file = doc_root + 'annotations/collection.options.headers' 200 for header, value in load_headers_from_file(headers_file): 201 response.headers.append(header, value) 202 203 def page(request, response): 204 page_json = { 205 "@context": "http://www.w3.org/ns/anno.jsonld", 206 "id": URIroot + "/annotations/", 207 "type": "AnnotationPage", 208 "partOf": { 209 "id": URIroot + "/annotations/", 210 "total": 42023 211 }, 212 "next": URIroot + "/annotations/", 213 "items": [ 214 ] 215 } 216 217 add_cors_headers(response) 218 headers_file = doc_root + 'annotations/collection.headers' 219 response.headers.update(load_headers_from_file(headers_file)) 220 221 qs = urlparse.parse_qs(request.url_parts.query) 222 page_num = int(qs.get('page')[0]) 223 page_json['id'] += '?page={0}'.format(page_num) 224 225 total = total_annotations() 226 so_far = (per_page * (page_num+1)) 227 remaining = total - so_far 228 229 if page_num != 0: 230 page_json['prev'] = URIroot + '/annotations/?page={0}'.format(page_num-1) 231 232 page_json['partOf']['total'] = total 233 234 if remaining > per_page: 235 page_json['next'] += '?page={0}'.format(page_num+1) 236 else: 237 del page_json['next'] 238 239 if qs.get('iris') and qs.get('iris')[0] is '1': 240 page_json['items'] = annotation_iris(so_far) 241 page_json['id'] += '&iris=1' 242 if 'prev' in page_json: 243 page_json['prev'] += '&iris=1' 244 if 'next' in page_json: 245 page_json['next'] += '&iris=1' 246 else: 247 page_json['items'] = annotations(so_far) 248 249 return dump_json(page_json) 250 251 252 @wptserve.handlers.handler 253 def annotation_get(request, response): 254 """Individual Annotations""" 255 requested_file = doc_root + request.request_path[1:] 256 base = os.path.basename( requested_file ) 257 258 headers_file = doc_root + 'annotations/annotation.headers' 259 260 if base.startswith("temp-") and tempAnnotations[base]: 261 response.headers.update(load_headers_from_file(headers_file)) 262 response.headers.set('Etag', hashlib.sha1(base).hexdigest()) 263 data = dump_json(tempAnnotations[base]) 264 if data != "" : 265 response.content = data 266 response.status = 200 267 else: 268 response.content = "" 269 response.status = 404 270 elif os.path.isfile(requested_file): 271 response.headers.update(load_headers_from_file(headers_file)) 272 # Calculate ETag using Apache httpd's default method (more or less) 273 # http://www.askapache.info//2.3/mod/core.html#fileetag 274 statinfo = os.stat(requested_file) 275 etag = "{0}{1}{2}".format(statinfo.st_ino, statinfo.st_mtime, 276 statinfo.st_size) 277 # obfuscate so we don't leak info; hexdigest for string compatibility 278 response.headers.set('Etag', hashlib.sha1(etag).hexdigest()) 279 280 with open(requested_file, 'r') as data_file: 281 data = data_file.read() 282 response.content = data 283 response.status = 200 284 else: 285 response.content = 'Not Found' 286 response.status = 404 287 288 add_cors_headers(response) 289 290 291 @wptserve.handlers.handler 292 def annotation_head(request, response): 293 requested_file = doc_root + request.request_path[1:] 294 base = os.path.basename(requested_file) 295 296 headers_file = doc_root + 'annotations/annotation.options.headers' 297 298 if base.startswith("temp-") and tempAnnotations[base]: 299 response.status = 200 300 response.headers.update(load_headers_from_file(headers_file)) 301 elif os.path.isfile(requested_file): 302 response.status = 200 303 response.headers.update(load_headers_from_file(headers_file)) 304 else: 305 response.status = 404 306 307 add_cors_headers(response) 308 309 @wptserve.handlers.handler 310 def annotation_options(request, response): 311 requested_file = doc_root + request.request_path[1:] 312 base = os.path.basename(requested_file) 313 314 headers_file = doc_root + 'annotations/annotation.options.headers' 315 316 if base.startswith("temp-") and tempAnnotations[base]: 317 response.status = 200 318 response.headers.update(load_headers_from_file(headers_file)) 319 elif os.path.isfile(requested_file): 320 response.status = 200 321 response.headers.update(load_headers_from_file(headers_file)) 322 else: 323 response.status = 404 324 325 add_cors_headers(response) 326 327 def create_annotation(body): 328 # TODO: verify media type is JSON of some kind (at least) 329 incoming = json.loads(body) 330 id = "temp-"+str(uuid.uuid4()) 331 if 'id' in incoming: 332 incoming['canonical'] = incoming['id'] 333 incoming['id'] = URIroot + '/annotations/' + id 334 335 return incoming 336 337 338 @wptserve.handlers.handler 339 def annotation_post(request, response): 340 incoming = create_annotation(request.body) 341 newID = incoming['id'] 342 key = os.path.basename(newID) 343 344 print("post:" + newID) 345 print("post:" + key) 346 347 tempAnnotations[key] = dump_json(incoming) 348 349 headers_file = doc_root + 'annotations/annotation.headers' 350 response.headers.update(load_headers_from_file(headers_file)) 351 response.headers.append('Location', newID) 352 add_cors_headers(response) 353 response.content = dump_json(incoming) 354 response.status = 201 355 356 @wptserve.handlers.handler 357 def annotation_put(request, response): 358 incoming = create_annotation(request.body) 359 360 # remember it in our local cache too 361 # tempAnnotations[request.request_path[1:]] = dump_jason(incoming) 362 newID = incoming['id'] 363 key = os.path.basename(newID) 364 365 print("put:" + newID) 366 print("put:" + key) 367 368 tempAnnotations[key] = dump_json(incoming) 369 370 headers_file = doc_root + 'annotations/annotation.headers' 371 response.headers.update(load_headers_from_file(headers_file)) 372 response.headers.append('Location', incoming['id']) 373 add_cors_headers(response) 374 response.content = dump_json(incoming) 375 response.status = 200 376 377 378 @wptserve.handlers.handler 379 def annotation_delete(request, response): 380 base = os.path.basename(request.request_path[1:]) 381 requested_file = doc_root + request.request_path[1:] 382 383 add_cors_headers(response) 384 385 headers_file = doc_root + 'annotations/annotation.headers' 386 387 try: 388 if base.startswith("temp-"): 389 if tempAnnotations[base]: 390 del tempAnnotations[base] 391 else: 392 os.remove(requested_file) 393 response.headers.update(load_headers_from_file(headers_file)) 394 response.status = 204 395 response.content = '' 396 except OSError: 397 response.status = 404 398 response.content = 'Not Found' 399 400 if __name__ == '__main__': 401 print('http://' + myhost + ':{0}/'.format(port)) 402 print('container URI is http://' + myhost + ':{0}/'.format(port) + "/annotations/") 403 print('example annotation URI is http://' + myhost + ':{0}/'.format(port) + "/annotations/anno1.json") 404 405 routes = [ 406 ("GET", "", wptserve.handlers.file_handler), 407 ("GET", "index.html", wptserve.handlers.file_handler), 408 409 # container/collection responses 410 ("HEAD", "annotations/", collection_head), 411 ("OPTIONS", "annotations/", collection_options), 412 ("GET", "annotations/", collection_get), 413 414 # create annotations in the collection 415 ("POST", "annotations/", annotation_post), 416 417 # single annotation responses 418 ("HEAD", "annotations/*", annotation_head), 419 ("OPTIONS", "annotations/*", annotation_options), 420 ("GET", "annotations/*", annotation_get), 421 ("PUT", "annotations/*", annotation_put), 422 ("DELETE", "annotations/*", annotation_delete) 423 ] 424 425 httpd = wptserve.server.WebTestHttpd(host=myhost, bind_hostname=myhost, port=port, doc_root=doc_root, 426 routes=routes) 427 httpd.start(block=True)