-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathprov.py
115 lines (94 loc) · 3.89 KB
/
prov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python
# SPDX-FileCopyrightText: 2022 Albert Meroño, Rinke Hoekstra, Carlos Martínez
#
# SPDX-License-Identifier: MIT
# prov.py: class generating grlc related W3C prov triples
from rdflib import Graph, URIRef, Namespace, RDF, Literal
from datetime import datetime
from subprocess import check_output
# grlc modules
import grlc.static as static
import grlc.glogging as glogging
glogger = glogging.getGrlcLogger(__name__)
class grlcPROV:
"""Record provenance of the grlc specification constructed."""
def __init__(self, user, repo):
"""Default constructor.
Keyword arguments:
user -- Github user.
repo -- Github repo.
"""
self.user = user
self.repo = repo
self.prov_g = Graph()
prov_uri = URIRef("http://www.w3.org/ns/prov#")
self.prov = Namespace(prov_uri)
self.prov_g.bind("prov", self.prov)
self.agent = URIRef("http://{}".format(static.SERVER_NAME))
self.entity_d = URIRef(
"http://{}/api/{}/{}/spec".format(static.SERVER_NAME, self.user, self.repo)
)
self.activity = URIRef(self.entity_d + "-activity")
self.init_prov_graph()
def init_prov_graph(self):
"""
Initialize PROV graph with all we know at the start of the recording
"""
try:
# Use git2prov to get prov on the repo
repo_prov = check_output(
[
"node_modules/git2prov/bin/git2prov",
"https://github.com/{}/{}/".format(self.user, self.repo),
"PROV-O",
]
).decode("utf-8")
repo_prov = repo_prov[repo_prov.find("@") :]
# glogger.debug('Git2PROV output: {}'.format(repo_prov))
glogger.debug("Ingesting Git2PROV output into RDF graph")
with open("temp.prov.ttl", "w") as temp_prov:
temp_prov.write(repo_prov)
self.prov_g.parse("temp.prov.ttl", format="turtle")
except Exception as e:
glogger.error(e)
glogger.error("Couldn't parse Git2PROV graph, continuing without repo PROV")
pass
self.prov_g.add((self.agent, RDF.type, self.prov.Agent))
self.prov_g.add((self.entity_d, RDF.type, self.prov.Entity))
self.prov_g.add((self.activity, RDF.type, self.prov.Activity))
# entity_d
self.prov_g.add((self.entity_d, self.prov.wasGeneratedBy, self.activity))
self.prov_g.add((self.entity_d, self.prov.wasAttributedTo, self.agent))
# later: entity_d genereated at time (when we know the end time)
# activity
self.prov_g.add((self.activity, self.prov.wasAssociatedWith, self.agent))
self.prov_g.add(
(self.activity, self.prov.startedAtTime, Literal(datetime.now()))
)
# later: activity used entity_o_1 ... entity_o_n
# later: activity endedAtTime (when we know the end time)
def add_used_entity(self, entity_uri):
"""
Add the provided URI as a used entity by the logged activity
"""
entity_o = URIRef(entity_uri)
self.prov_g.add((entity_o, RDF.type, self.prov.Entity))
self.prov_g.add((self.activity, self.prov.used, entity_o))
def end_prov_graph(self):
"""
Finalize prov recording with end time
"""
endTime = Literal(datetime.now())
self.prov_g.add((self.entity_d, self.prov.generatedAtTime, endTime))
self.prov_g.add((self.activity, self.prov.endedAtTime, endTime))
def log_prov_graph(self):
"""
Log provenance graph so far
"""
glogger.debug("Spec generation provenance graph:")
glogger.debug(self.prov_g.serialize(format="turtle"))
def serialize(self, format):
"""
Serialize provenance graph in the specified format
"""
return self.prov_g.serialize(format=format)