vocab_tester.py (8327B)
1 # Author: Rob Sanderson (azaroth42@gmail.com) 2 # License: Apache2 3 # Last Modified: 2016-09-02 4 5 import json 6 from rdflib import ConjunctiveGraph, URIRef 7 from pyld import jsonld 8 from pyld.jsonld import compact, expand, frame, from_rdf, to_rdf, JsonLdProcessor 9 import urllib 10 11 # Stop code from looking up the contexts online for every operation 12 docCache = {} 13 14 def fetch(url): 15 fh = urllib.urlopen(url) 16 data = fh.read() 17 fh.close() 18 return data 19 20 def load_document_and_cache(url): 21 if docCache.has_key(url): 22 return docCache[url] 23 24 doc = { 25 'contextUrl': None, 26 'documentUrl': None, 27 'document': '' 28 } 29 data = fetch(url) 30 doc['document'] = data; 31 docCache[url] = doc 32 return doc 33 34 jsonld.set_document_loader(load_document_and_cache) 35 36 class Validator(object): 37 38 def __init__(self): 39 40 self.rdflib_class_map = { 41 "Annotation": "oa:Annotation", 42 "Dataset": "dctypes:Dataset", 43 "Image": "dctypes:StillImage", 44 "Video": "dctypes:MovingImage", 45 "Audio": "dctypes:Sound", 46 "Text": "dctypes:Text", 47 "TextualBody": "oa:TextualBody", 48 "ResourceSelection": "oa:ResourceSelection", 49 "SpecificResource": "oa:SpecificResource", 50 "FragmentSelector": "oa:FragmentSelector", 51 "CssSelector": "oa:CssSelector", 52 "XPathSelector": "oa:XPathSelector", 53 "TextQuoteSelector": "oa:TextQuoteSelector", 54 "TextPositionSelector": "oa:TextPositionSelector", 55 "DataPositionSelector": "oa:DataPositionSelector", 56 "SvgSelector": "oa:SvgSelector", 57 "RangeSelector": "oa:RangeSelector", 58 "TimeState": "oa:TimeState", 59 "HttpState": "oa:HttpRequestState", 60 "CssStylesheet": "oa:CssStyle", 61 "Choice": "oa:Choice", 62 "Composite": "oa:Composite", 63 "List": "oa:List", 64 "Independents": "oa:Independents", 65 "Person": "foaf:Person", 66 "Software": "as:Application", 67 "Organization": "foaf:Organization", 68 "AnnotationCollection": "as:OrderedCollection", 69 "AnnotationPage": "as:OrderedCollectionPage", 70 "Audience": "schema:Audience" 71 } 72 73 74 def _clean_bnode_ids(self, js): 75 new = {} 76 for (k,v) in js.items(): 77 if k == 'id' and v.startswith("_:"): 78 continue 79 elif type(v) == dict: 80 # recurse 81 res = self._clean_bnode_ids(v) 82 new[k] = res 83 else: 84 new[k] = v 85 return new 86 87 def _mk_rdflib_jsonld(self, js): 88 # rdflib's json-ld implementation sucks 89 # Pre-process to make it work 90 # recurse the structure looking for types, and replacing them. 91 new = {} 92 for (k,v) in js.items(): 93 if k == 'type': 94 if type(v) == list: 95 nl = [] 96 for i in v: 97 if self.rdflib_class_map.has_key(i): 98 nl.append(self.rdflib_class_map[i]) 99 new['type'] = nl 100 else: 101 if self.rdflib_class_map.has_key(v): 102 new['type'] = self.rdflib_class_map[v] 103 elif type(v) == dict: 104 # recurse 105 res = self._mk_rdflib_jsonld(v) 106 new[k] = res 107 else: 108 new[k] = v 109 return new 110 111 def json_to_rdf(self, js, fmt=None): 112 d2 = self._mk_rdflib_jsonld(js) 113 js = json.dumps(d2) 114 g = ConjunctiveGraph() 115 g.parse(data=js, format='json-ld') 116 if fmt: 117 out = g.serialize(format=fmt) 118 return out 119 else: 120 return g 121 122 def rdf_to_jsonld(self, rdf, fmt): 123 124 g = ConjunctiveGraph() 125 g.parse(data=rdf, format=fmt) 126 out = g.serialize(format='json-ld') 127 128 j2 = json.loads(out) 129 j2 = {"@context": context_js, "@graph": j2} 130 framed = frame(j2, frame_js) 131 out = compact(framed, context_js) 132 # recursively clean blank node ids 133 #out = self._clean_bnode_ids(out) 134 return out 135 136 def compact_and_clean(self, js): 137 newjs = compact(js, context_js) 138 newjs['@context'] = context 139 if newjs.has_key("@graph"): 140 for k,v in newjs['@graph'].items(): 141 newjs[k] = v 142 del newjs['@graph'] 143 return newjs 144 145 validator = Validator() 146 147 example = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/model/wd2/examples/correct/anno4.json" 148 example_ttl = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/vocab/wd/examples/correct/anno1.ttl" 149 context = "http://www.w3.org/ns/anno.jsonld" 150 frameURI = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/jsonld/annotation_frame.jsonld" 151 # ontology = "https://www.w3.org/ns/oa.ttl" 152 ontology = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/vocab/wd/ontology/oa.ttl" 153 154 data = fetch(context) 155 context_js = json.loads(data) 156 data = fetch(example) 157 example_js = json.loads(data) 158 data = fetch(frameURI) 159 frame_js = json.loads(data) 160 161 # Test1: JSON-LD context document can be parsed without errors by JSON-LD validators 162 # Context document is parsable if it can be loaded and used to expand the example 163 try: 164 expanded = expand(example_js, context_js) 165 except: 166 print("Context is invalid, failed Test 1") 167 168 169 # Test2: JSON-LD context document can be used to convert JSON-LD serialized Annotations into RDF triples. 170 try: 171 jsonld_nq = to_rdf(example_js, {"base": "http://example.org/", "format": "application/nquads"}) 172 except: 173 print("Cannot use context to convert JSON-LD to NQuads") 174 175 176 # Test3: Graphs produced are isomorphic 177 try: 178 rl_g = validator.json_to_rdf(example_js) 179 g = ConjunctiveGraph() 180 js_g = g.parse(data=jsonld_nq, format="nt") 181 rl_g_nq = rl_g.serialize(format="nquads") 182 assert(len(rl_g.store) == len(js_g.store)) 183 assert(rl_g.isomorphic(js_g)) 184 except: 185 print("Different triples from two parsers, or non-isomorphic graphs") 186 187 188 # Test4: The graphs produced can be converted back into JSON-LD without loss of information 189 try: 190 js = validator.rdf_to_jsonld(jsonld_nq, "nt") 191 js2 = validator.compact_and_clean(js) 192 assert(js2 == example_js) 193 except: 194 print("Failed to recompact parsed data") 195 raise 196 197 198 # Test5: ontology documents can be parsed without errors by validators 199 try: 200 g = ConjunctiveGraph().parse(ontology, format="turtle") 201 except: 202 raise 203 204 205 # Test6: ontology is internally consistent with respect to domains, ranges, etc 206 207 # step 1: find all the classes. 208 rdftype = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") 209 rdfsdomain = URIRef("http://www.w3.org/2000/01/rdf-schema#domain") 210 rdfsrange = URIRef("http://www.w3.org/2000/01/rdf-schema#range") 211 rdfsresource = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#Resource") 212 rdfssco = URIRef("http://www.w3.org/2000/01/rdf-schema#subClassOf") 213 asColl = URIRef("http://www.w3.org/ns/activitystreams#OrderedCollection") 214 skosConcept = URIRef("http://www.w3.org/2004/02/skos/core#Concept") 215 216 otherClasses = [asColl, skosConcept] 217 classes = list(g.subjects(rdftype, URIRef("http://www.w3.org/2000/01/rdf-schema#Class"))) 218 props = list(g.subjects(rdftype, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#Property"))) 219 220 for p in props: 221 domains = list(g.objects(p, rdfsdomain)) 222 for d in domains: 223 assert(d in classes) 224 225 for p in props: 226 ranges = list(g.objects(p, rdfsrange)) 227 for r in ranges: 228 if not r in classes and not str(r).startswith("http://www.w3.org/2001/XMLSchema#") and \ 229 not r == rdfsresource: 230 print("Found inconsistent property: %s has unknown range" % p) 231 232 for c in classes: 233 parents = list(g.objects(c, rdfssco)) 234 for p in parents: 235 if not p in classes and not p in otherClasses: 236 print("Found inconsistent class: %s has unknown superClass" % c) 237 238 239 print("Done.")