1 |
\documentclass[11pt,a4paper]{ivoa} |
2 |
\input tthdefs |
3 |
|
4 |
\usepackage[utf8]{inputenc} |
5 |
\usepackage{booktabs, tabulary} % for nicer tables |
6 |
|
7 |
% make the text in pdf properly searchable |
8 |
\usepackage{lmodern} |
9 |
|
10 |
% use listings for including text files and code snippets |
11 |
\usepackage{listings} |
12 |
|
13 |
\title{IVOA Provenance Data Model} |
14 |
|
15 |
\ivoagroup{DM} |
16 |
|
17 |
\author{Kristin Riebe} |
18 |
\author{Mathieu Servillat} |
19 |
\author{François Bonnarel} |
20 |
\author{Mireille Louys} |
21 |
\author{Florian Rothmaier} |
22 |
\author{Michèle Sanguillon} |
23 |
\author{IVOA Data Model Working Group} |
24 |
|
25 |
\editor{Kristin Riebe} |
26 |
\editor{Mathieu Servillat} |
27 |
|
28 |
% \previousversion[????URL????]{????Funny Label????} |
29 |
\previousversion[http://www.ivoa.net/documents/ProvenanceDM/20161121/]{WD-ProvenanceDM-1.0-20161121.pdf} |
30 |
\previousversion[http://volute.g-vo.org/svn/trunk/projects/dm/provenance/description/ProvDM-0.2-20160428.pdf]{ProvDM-0.2-20160428.pdf} |
31 |
\previousversion[http://volute.g-vo.org/svn/trunk/projects/dm/provenance/description/ProvDM-0.1-20141008.pdf]{ProvDM-0.1-20141008.pdf} |
32 |
|
33 |
|
34 |
% own definitions |
35 |
\definecolor{todocolor}{rgb}{1,1,0.8} |
36 |
\definecolor{darkred}{rgb}{0.6,0,0} |
37 |
\definecolor{rose}{rgb}{1.0,0.88,0.88} |
38 |
\definecolor{darkgrey}{rgb}{0.35,0.35,0.35} |
39 |
%\newcommand{\TODO}[1]{% |
40 |
% \noindent% |
41 |
% \textcolor{todocolor}{\sffamily [\textbf{TODO:} #1]}% |
42 |
%} |
43 |
|
44 |
\newcommand{\TODO}[1]{% |
45 |
\noindent% |
46 |
\colorbox{todocolor}{% |
47 |
\parbox{0.85\linewidth}{\sffamily \textbf{TODO:}\\ |
48 |
#1} |
49 |
}% |
50 |
\vspace{2pt} |
51 |
|
52 |
} |
53 |
|
54 |
\newcommand{\note}[1]{% |
55 |
\noindent% |
56 |
\textcolor{darkgrey}{{\sffamily Note:} \emph{#1}}% |
57 |
} |
58 |
|
59 |
|
60 |
\newcommand{\paragraphlb}[1]{\paragraph{#1}\mbox{}\\} % paragraph with line break |
61 |
|
62 |
\setlength{\fboxsep}{5pt} |
63 |
%\setlength{\fboxrule}{1.5pt} |
64 |
\newcommand{\warning}[1]{% |
65 |
\vspace{\baselineskip} |
66 |
\noindent |
67 |
\parbox{\linewidth}{% |
68 |
\colorbox{darkred}{% |
69 |
\parbox{0.7\linewidth}{\large \sffamily \textcolor{white}{Warning}}% |
70 |
}\\[-1pt] |
71 |
\noindent% |
72 |
\fcolorbox{darkred}{rose}{% |
73 |
\parbox{0.7\linewidth-2\fboxrule}{#1}% |
74 |
}% |
75 |
}% |
76 |
\vspace{\baselineskip} |
77 |
}% |
78 |
|
79 |
% for nicer tables: |
80 |
\renewcommand{\arraystretch}{1.3} |
81 |
\newcommand{\head}[1]{\textbf{#1}} |
82 |
|
83 |
|
84 |
% define new command for classes, in case we decide later on for a different style |
85 |
\newcommand{\class}[1]{\emph{#1}} |
86 |
|
87 |
\begin{document} |
88 |
\newcolumntype{Y}{>{\raggedright\arraybackslash}X} |
89 |
|
90 |
\begin{abstract} |
91 |
This document describes how provenance information for astronomical datasets |
92 |
%(with the focus on observational data) |
93 |
can be modeled, stored and exchanged within |
94 |
the astronomical community in a standardized way. |
95 |
We follow the definition of provenance as proposed by the W3C\footnote{\url{https://www.w3.org/TR/prov-overview/}}, i.e. that provenance is information about entities, activities, and people involved in producing a piece of data or thing, which can be used to form assessments about its quality, reliability or trustworthiness. |
96 |
Such provenance information in astronomy is important to enable any scientist to trace back |
97 |
the origin of a dataset (e.g. an image, spectrum, catalog or single points in a |
98 |
spectral energy distribution diagram or a light curve), learn about the people and |
99 |
organizations involved in a project and assess the quality of the dataset as well |
100 |
as the usefulness of the dataset for her own scientific work. |
101 |
\end{abstract} |
102 |
|
103 |
|
104 |
\section*{Acknowledgments} |
105 |
|
106 |
This document has been developed in part with support from the German |
107 |
Astrophysical Virtual Observatory, funded by BMBF Bewilligungsnummer 05A14BAD and 05A08VHA. |
108 |
The Provenance Working Group acknowledges support from the ASTERICS Project, funded by the European Commission (project 653477). |
109 |
|
110 |
Thanks for fruitful discussions to (in alphabetical order): |
111 |
Markus Demleitner, Harry Enke, Jochen Klar, Gerard Lemson, Markus Nullmeier |
112 |
and Adrian Partl. |
113 |
|
114 |
|
115 |
|
116 |
\section*{Conformance-related definitions} |
117 |
|
118 |
The words ``MUST'', ``SHALL'', ``SHOULD'', ``MAY'', ``RECOMMENDED'', and |
119 |
``OPTIONAL'' (in upper or lower case) used in this document are to be |
120 |
interpreted as described in IETF standard, \citet{std:RFC2119}. |
121 |
|
122 |
The \emph{Virtual Observatory (VO)} is |
123 |
a general term for a collection of federated resources that can be used |
124 |
to conduct astronomical research, education, and outreach. |
125 |
The \href{http://www.ivoa.net}{International |
126 |
Virtual Observatory Alliance (IVOA)} is a global |
127 |
collaboration of separately funded projects to develop standards and |
128 |
infrastructure that enable VO applications. |
129 |
|
130 |
|
131 |
\section{Introduction} |
132 |
\input{intro-general} |
133 |
\input{intro-requirements} |
134 |
\input{intro-VOarchitecture} |
135 |
\input{intro-previousefforts} |
136 |
|
137 |
\section{The provenance data model} |
138 |
\input{datamodel-description} |
139 |
|
140 |
|
141 |
%\section{Applying provenance -- Interactions with other Data models}\label{sec:dmlinks} |
142 |
\section{Links to other data models}\label{sec:dmlinks} |
143 |
%In this section we discuss how the Provenance Data Model interacts with |
144 |
%classes and attributes from other VO data models (especially DatasetDM). |
145 |
%(e.g. DatasetDM, SpectralDM (share some same classes), SimDM) |
146 |
%and how provenance information can be stored. |
147 |
|
148 |
The Provenance Data Model can be applied without making links to any other |
149 |
IVOA data model classes. For example when the data is not yet published, provenance information |
150 |
can be stored already, but a DatasetDM-description for the data may not yet exist. |
151 |
But if there are data models implemented for the datasets, then it is |
152 |
very useful to connect the classes and attributes of the different models, |
153 |
which we are going to discuss in this Section. These links help to avoid |
154 |
unnecessary repetitions in the metadata of datsets, and also offer the possibility |
155 |
to derive some basic provenance information from existing data model classes automatically. |
156 |
|
157 |
|
158 |
\subsection{Links with Dataset/Obscore Model} |
159 |
Entities and their descriptions in the Provenance Data Model |
160 |
are tightly linked to the \class{DataSet}-class in the DatasetDM/ObsCore Data Model, as well as to |
161 |
InputDataset and OutputDataSet in the Simulation Data Model \citep[SimDM,][]{std:SimDM}. |
162 |
Table \ref{tab:datasetmapping} maps classes and attributes from the Dataset Data Model |
163 |
to concepts in the Provenance Data Model. |
164 |
|
165 |
|
166 |
%\begin{figure}[h] |
167 |
%\centering |
168 |
%\includegraphics[width=\textwidth]{../datamodel-diagrams/classes-relations-dms} |
169 |
%\caption{Links between Agent and Party, Entity and Dataset.} |
170 |
%\label{fig:class-relations-dm} |
171 |
%\end{figure} |
172 |
% --> a similar figure is already given in the sections on entity and agent. |
173 |
|
174 |
\begin{table}[h] |
175 |
\small |
176 |
\tymax 0.5\textwidth |
177 |
\begin{tabulary}{1.0\textwidth}{@{}lLp{4cm}@{}} |
178 |
\toprule |
179 |
\head{Dataset DM} & \head{Provenance DM} & \head{Comment}\\ |
180 |
\midrule |
181 |
DataID.title & Entity.label & title of the dataset\\ |
182 |
DataID.collection & HadMember.collectionId & link to the collection to which the dataset belongs\\ |
183 |
DataID.creator & Agent.name & name of agent\\ |
184 |
DataID.creatorDID & AlternateOf.entityId & id for the dataset given by the creator\\ |
185 |
DataID.ObservationID & WasGeneratedBy.activityId & identifier to everything describing the observation; maybe it belongs to entity?\\ |
186 |
DataID.date & WasGeneratedBy.time & date and time when the dataset was completely created\\ |
187 |
Curation.PublisherDID & Entity.id & unique identifier for the dataset assigned by the publisher\\ |
188 |
Curation.PublisherID & Agent.id & link to the publisher; role: publisher, type: organization/astronomer private collection)\\ |
189 |
Curation.Publisher & Agent.name & name of the publisher\\ |
190 |
Curation.Date & Entity.releaseDate & release date of the dataset\\ |
191 |
Curation.Version & Entity.version & version of the dataset\\ |
192 |
Curation.Rights & Entity.access & access rights to the dataset; one of [...]\\ |
193 |
Curation.Reference & Entity.link & link to publication\\ |
194 |
Curation.Contact & Agent.Id or name? & link to Agent with role contact\\ |
195 |
DataProductType & EntityDescription.dataproduct\_type & type of a dataproduct/entity\\ |
196 |
DataProductSubType & EntityDescription.dataproduct\_subtype & subtype of a dataproduct/entity\\ |
197 |
ObsDataset.calibLevel & EntityDescription.level & (output) calibration level, integer between 0 and 3\\\hline |
198 |
\bottomrule |
199 |
\end{tabulary} |
200 |
\caption{Mapping between attributes from \class{Dataset}-classes from Dataset Metadata Model to classes in ProvenanceDM.} |
201 |
\label{tab:datasetmapping} |
202 |
\end{table} |
203 |
|
204 |
|
205 |
The \class{Agent} class, which is used for defining responsible persons and |
206 |
organizations, is similar to the \class{Party} class in the Dataset Metadata Model and SimDM. |
207 |
|
208 |
\subsection{Links with Simulation Data Model} |
209 |
In SimDM one also encounters a normalization similar to our split-up of descriptions from |
210 |
actual data instances and executions of processes: the SimDM class ``experiment'' |
211 |
is a type of \class{Activity} and its general, reusable description is called a ``protocol'', |
212 |
which can be considered as a type of this model's \class{ActivityDescription}. |
213 |
More direct mappings between classes and attributes of both models are given in Table~\ref{tab:simdmmapping}. |
214 |
|
215 |
\begin{table}[h] |
216 |
\small |
217 |
\tymax 0.5\textwidth |
218 |
\begin{tabulary}{1.0\textwidth}{@{}lLp{4cm}@{}} |
219 |
\toprule |
220 |
\head{Simulation DM} & \head{Provenance DM} & \head{Comment}\\ |
221 |
\midrule |
222 |
Experiment & Activity & \\ |
223 |
Experiment.name & Activity.label & human readable name; name attribute in SimDM is inherited from Resource-class\\ |
224 |
Experiment.executionTime & Activity.endTime & end time of the execution of an experiment/activity \\ |
225 |
Experiment.protocol & Activity.description\_ref & reference to the protocol or description class \\ |
226 |
Protocol & ActivityDescription & \\ |
227 |
Protocol.name & ActivityDescription.label & human readable name\\ |
228 |
Protocol.referenceURL & ActivityDescription.doculink & reference to a webpage describing it\\ |
229 |
% add Protocol.code, Protocol.version? |
230 |
ParameterSetting & Parameter & value of an (input) parameter\\ |
231 |
InputParameter & ParameterDescription & description of an (input) parameter\\ |
232 |
Party & Agent & responsible person or organization\\ |
233 |
Party.name & Agent.label & name of the agent \\ |
234 |
Contact & WasAssociatedWith/WasAttributedTo & \\ |
235 |
Contact.role & WasAssociatedWith.role/ WasAttributedTo.role & role which the agent/party had for a certain experiment/protocol or activity/entity\\ |
236 |
Contact.party & WasAssociatedWith.agent, WasAttributedTo.agent & reference to the agent/party \\ |
237 |
|
238 |
|
239 |
\bottomrule |
240 |
\end{tabulary} |
241 |
\caption{Mapping between classes and attributes from SimDM to classes/attributes in ProvenanceDM.} |
242 |
\label{tab:simdmmapping} |
243 |
\end{table} |
244 |
|
245 |
|
246 |
|
247 |
|
248 |
\subsection{Further links to data models} |
249 |
More similarities and links to other data models will be detailed in future |
250 |
versions of this working draft. |
251 |
|
252 |
|
253 |
\section{Accessing provenance information} |
254 |
\input{provaccess} |
255 |
|
256 |
\section{Discussion} |
257 |
\input{discussion} |
258 |
|
259 |
\section{Implementations of the data model for specific use cases}\label{sec:usecases-implementations} |
260 |
\input{usecases-implementations} |
261 |
|
262 |
|
263 |
\appendix |
264 |
\section{Changes from Previous Versions} |
265 |
% No previous versions yet. |
266 |
% these would be subsections "Changes from v. WD-..." |
267 |
% Use itemize environments. |
268 |
\subsection{Changes from WD-ProvenanceDM-1.0-20161121} |
269 |
\begin{itemize} |
270 |
\item More explanations on links to data models in Section~\ref{sec:dmlinks}. |
271 |
\item Introduced subsections for Section~\ref{sec:dmlinks}, added table with SimDM-links. |
272 |
\item Renamed \emph{docuLink} to \emph{doculink} |
273 |
\item Avoid double-meaning of \emph{description} by splitting it up into: |
274 |
\begin{itemize} |
275 |
\item \emph{description\_ref}: a foreign key, reference to a description class |
276 |
(which could be located at an url as well) |
277 |
\item \emph{annotation}: free text description |
278 |
\end{itemize} |
279 |
\item Applied similar naming scheme to \emph{Parameter} and \emph{ParameterDescription}-classes |
280 |
\item Renamed \emph{Agent.name} to \emph{Agent.label}, so that each class has an id and a label. |
281 |
\item Renamed Section~\ref{sec:usecases-implementations} to stress that it deals with implementations. |
282 |
\item Added links to provn and votable-serialization for HiPS-use case, added first part of provn as example in the HiPS-use case section. |
283 |
\item Corrected attribute names in Table~\ref{tab:datasetmapping}. |
284 |
|
285 |
\end{itemize} |
286 |
|
287 |
|
288 |
\section{Implementation details}\label{sec:implementation-details} |
289 |
In this section we will give more details on the classes and attributes which were used |
290 |
in implementations for each use case. This maybe needs to go into a different document, so it can |
291 |
be updated without affecting this standard. |
292 |
|
293 |
TBD. |
294 |
|
295 |
|
296 |
\bibliography{ivoatex/ivoabib,prov-refs} |
297 |
|
298 |
|
299 |
\end{document} |