tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

vocab_tester.py (8327B)


      1 # Author: Rob Sanderson (azaroth42@gmail.com)
      2 # License: Apache2
      3 # Last Modified: 2016-09-02
      4 
      5 import json
      6 from rdflib import ConjunctiveGraph, URIRef
      7 from pyld import jsonld
      8 from pyld.jsonld import compact, expand, frame, from_rdf, to_rdf, JsonLdProcessor
      9 import urllib
     10 
     11 # Stop code from looking up the contexts online for every operation
     12 docCache = {}
     13 
     14 def fetch(url):
     15    fh = urllib.urlopen(url)
     16    data = fh.read()
     17    fh.close()
     18    return data
     19 
     20 def load_document_and_cache(url):
     21    if docCache.has_key(url):
     22        return docCache[url]
     23 
     24    doc = {
     25        'contextUrl': None,
     26        'documentUrl': None,
     27        'document': ''
     28    }
     29    data = fetch(url)
     30    doc['document'] = data;
     31    docCache[url] = doc
     32    return doc
     33 
     34 jsonld.set_document_loader(load_document_and_cache)
     35 
     36 class Validator(object):
     37 
     38    def __init__(self):
     39 
     40        self.rdflib_class_map = {
     41            "Annotation":           "oa:Annotation",
     42            "Dataset":              "dctypes:Dataset",
     43            "Image":                "dctypes:StillImage",
     44            "Video":                "dctypes:MovingImage",
     45            "Audio":                "dctypes:Sound",
     46            "Text":                 "dctypes:Text",
     47            "TextualBody":          "oa:TextualBody",
     48            "ResourceSelection":    "oa:ResourceSelection",
     49            "SpecificResource":     "oa:SpecificResource",
     50            "FragmentSelector":     "oa:FragmentSelector",
     51            "CssSelector":          "oa:CssSelector",
     52            "XPathSelector":        "oa:XPathSelector",
     53            "TextQuoteSelector":    "oa:TextQuoteSelector",
     54            "TextPositionSelector": "oa:TextPositionSelector",
     55            "DataPositionSelector": "oa:DataPositionSelector",
     56            "SvgSelector":          "oa:SvgSelector",
     57            "RangeSelector":        "oa:RangeSelector",
     58            "TimeState":            "oa:TimeState",
     59            "HttpState":            "oa:HttpRequestState",
     60            "CssStylesheet":        "oa:CssStyle",
     61            "Choice":               "oa:Choice",
     62            "Composite":            "oa:Composite",
     63            "List":                 "oa:List",
     64            "Independents":         "oa:Independents",
     65            "Person":               "foaf:Person",
     66            "Software":             "as:Application",
     67            "Organization":         "foaf:Organization",
     68            "AnnotationCollection": "as:OrderedCollection",
     69            "AnnotationPage":       "as:OrderedCollectionPage",
     70            "Audience":             "schema:Audience"
     71        }
     72 
     73 
     74    def _clean_bnode_ids(self, js):
     75        new = {}
     76        for (k,v) in js.items():
     77            if k == 'id' and v.startswith("_:"):
     78                continue
     79            elif type(v) == dict:
     80                # recurse
     81                res = self._clean_bnode_ids(v)
     82                new[k] = res
     83            else:
     84                new[k] = v
     85        return new
     86 
     87    def _mk_rdflib_jsonld(self, js):
     88        # rdflib's json-ld implementation sucks
     89        # Pre-process to make it work
     90        # recurse the structure looking for types, and replacing them.
     91        new = {}
     92        for (k,v) in js.items():
     93            if k == 'type':
     94                if type(v) == list:
     95                    nl = []
     96                    for i in v:
     97                        if self.rdflib_class_map.has_key(i):
     98                            nl.append(self.rdflib_class_map[i])
     99                    new['type'] = nl
    100                else:
    101                    if self.rdflib_class_map.has_key(v):
    102                        new['type'] = self.rdflib_class_map[v]
    103            elif type(v) == dict:
    104                # recurse
    105                res = self._mk_rdflib_jsonld(v)
    106                new[k] = res
    107            else:
    108                new[k] = v
    109        return new
    110 
    111    def json_to_rdf(self, js, fmt=None):
    112        d2 = self._mk_rdflib_jsonld(js)
    113        js = json.dumps(d2)
    114        g = ConjunctiveGraph()
    115        g.parse(data=js, format='json-ld')
    116        if fmt:
    117            out = g.serialize(format=fmt)
    118            return out
    119        else:
    120            return g
    121 
    122    def rdf_to_jsonld(self, rdf, fmt):
    123 
    124        g = ConjunctiveGraph()
    125        g.parse(data=rdf, format=fmt)
    126        out = g.serialize(format='json-ld')
    127 
    128        j2 = json.loads(out)
    129        j2 = {"@context": context_js, "@graph": j2}
    130        framed = frame(j2, frame_js)
    131        out = compact(framed, context_js)
    132        # recursively clean blank node ids
    133        #out = self._clean_bnode_ids(out)
    134        return out
    135 
    136    def compact_and_clean(self, js):
    137        newjs = compact(js, context_js)
    138        newjs['@context'] = context
    139        if newjs.has_key("@graph"):
    140            for k,v in newjs['@graph'].items():
    141                newjs[k] = v
    142            del newjs['@graph']
    143        return newjs
    144 
    145 validator = Validator()
    146 
    147 example = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/model/wd2/examples/correct/anno4.json"
    148 example_ttl = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/vocab/wd/examples/correct/anno1.ttl"
    149 context = "http://www.w3.org/ns/anno.jsonld"
    150 frameURI = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/jsonld/annotation_frame.jsonld"
    151 # ontology = "https://www.w3.org/ns/oa.ttl"
    152 ontology = "https://raw.githubusercontent.com/w3c/web-annotation/gh-pages/vocab/wd/ontology/oa.ttl"
    153 
    154 data = fetch(context)
    155 context_js = json.loads(data)
    156 data = fetch(example)
    157 example_js = json.loads(data)
    158 data = fetch(frameURI)
    159 frame_js = json.loads(data)
    160 
    161 # Test1:  JSON-LD context document can be parsed without errors by JSON-LD validators
    162 # Context document is parsable if it can be loaded and used to expand the example
    163 try:
    164    expanded = expand(example_js, context_js)
    165 except:
    166    print("Context is invalid, failed Test 1")
    167 
    168 
    169 # Test2: JSON-LD context document can be used to convert JSON-LD serialized Annotations into RDF triples.
    170 try:
    171    jsonld_nq = to_rdf(example_js, {"base": "http://example.org/", "format": "application/nquads"})
    172 except:
    173    print("Cannot use context to convert JSON-LD to NQuads")
    174 
    175 
    176 # Test3: Graphs produced are isomorphic
    177 try:
    178    rl_g = validator.json_to_rdf(example_js)
    179    g = ConjunctiveGraph()
    180    js_g = g.parse(data=jsonld_nq, format="nt")
    181    rl_g_nq = rl_g.serialize(format="nquads")
    182    assert(len(rl_g.store) == len(js_g.store))
    183    assert(rl_g.isomorphic(js_g))
    184 except:
    185    print("Different triples from two parsers, or non-isomorphic graphs")
    186 
    187 
    188 # Test4: The graphs produced can be converted back into JSON-LD without loss of information
    189 try:
    190    js = validator.rdf_to_jsonld(jsonld_nq, "nt")
    191    js2 = validator.compact_and_clean(js)
    192    assert(js2 == example_js)
    193 except:
    194    print("Failed to recompact parsed data")
    195    raise
    196 
    197 
    198 # Test5: ontology documents can be parsed without errors by validators
    199 try:
    200    g = ConjunctiveGraph().parse(ontology, format="turtle")
    201 except:
    202    raise
    203 
    204 
    205 # Test6: ontology is internally consistent with respect to domains, ranges, etc
    206 
    207 # step 1: find all the classes.
    208 rdftype = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
    209 rdfsdomain = URIRef("http://www.w3.org/2000/01/rdf-schema#domain")
    210 rdfsrange = URIRef("http://www.w3.org/2000/01/rdf-schema#range")
    211 rdfsresource = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#Resource")
    212 rdfssco = URIRef("http://www.w3.org/2000/01/rdf-schema#subClassOf")
    213 asColl = URIRef("http://www.w3.org/ns/activitystreams#OrderedCollection")
    214 skosConcept = URIRef("http://www.w3.org/2004/02/skos/core#Concept")
    215 
    216 otherClasses = [asColl, skosConcept]
    217 classes = list(g.subjects(rdftype, URIRef("http://www.w3.org/2000/01/rdf-schema#Class")))
    218 props = list(g.subjects(rdftype, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#Property")))
    219 
    220 for p in props:
    221    domains = list(g.objects(p, rdfsdomain))
    222    for d in domains:
    223        assert(d in classes)
    224 
    225 for p in props:
    226    ranges = list(g.objects(p, rdfsrange))
    227    for r in ranges:
    228        if not r in classes and not str(r).startswith("http://www.w3.org/2001/XMLSchema#") and \
    229            not r == rdfsresource:
    230            print("Found inconsistent property: %s has unknown range" % p)
    231 
    232 for c in classes:
    233    parents = list(g.objects(c, rdfssco))
    234    for p in parents:
    235        if not p in classes and not p in otherClasses:
    236            print("Found inconsistent class: %s has unknown superClass" % c)
    237 
    238 
    239 print("Done.")