Skip to content

Commit

Permalink
Add schema for an "object stream" dataset description
Browse files Browse the repository at this point in the history
This is conceptually already quite close to what we need to get at --
for serializing datasets, starting from their Git repository
representation.

This is only a minimal sketch at this point, but it demos how the
recently found duality of semantic-defining and structure-defining
classes would look like in this contex.

There are a bunch of open question that need more consideration.
Something like:

- Should we make the SE-level classes define `exact_mappings` to their
  ontology counterparts, or use discovery like shown in
  #37

- what kind of value do we use for `meta_type`? CURIEs? or plain class
  names?

- ...
  • Loading branch information
mih committed Feb 17, 2024
1 parent f5964b9 commit b95ff73
Show file tree
Hide file tree
Showing 17 changed files with 209 additions and 3 deletions.
9 changes: 7 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ build/context.jsonld: src/linkml/schemas/ontology.yaml
build/linkml-docs: \
build/linkml-docs/ontology \
build/linkml-docs/data-access \
build/linkml-docs/datalad-dataset-components \
build/linkml-docs/datalad-dataset-version
# build/linkml-docs/git-provenance-schema
build/linkml-docs/%: src/linkml/schemas/%.yaml src/extra-docs/%-schema
Expand All @@ -33,11 +34,11 @@ build/linkml-docs/%: src/linkml/schemas/%.yaml src/extra-docs/%-schema
--diagram-type er_diagram \
--metadata \
--format markdown \
--example-directory src/examples/$$(basename $@)-schema \
--example-directory src/examples/$* \
-d $$([ "$*" = "ontology" ] && echo $@ || echo $@-schema) \
$<
# try to inject any extra-docs (if any exist)
-cp -r src/extra-docs/$$(basename $@)-schema/*.md $@
-cp -r src/extra-docs/$*-schema/*.md $@

build/mkdocs-site: build/linkml-docs src/extra-docs/*.md
# top-level content
Expand All @@ -49,6 +50,7 @@ check: check-models check-validation
# add additional schemas to lint here
check-models: \
check-model-data-access \
check-model-datalad-dataset-components \
check-model-datalad-dataset-version \
check-model-ontology
# check-model-git-provenance
Expand Down Expand Up @@ -80,6 +82,8 @@ check-model-%: src/linkml/schemas/%.yaml
check-validation: \
convert-examples-data-access \
check-validation-data-access \
convert-examples-datalad-dataset-components \
check-validation-datalad-dataset-components \
convert-examples-datalad-dataset-version \
check-validation-datalad-dataset-version \
convert-examples-ontology
Expand All @@ -100,6 +104,7 @@ check-invalid-validation-%: tests/%-schema/validation src/linkml/schemas/%.yaml

convert-examples: \
convert-examples-data-access \
convert-examples-datalad-dataset-components \
convert-examples-datalad-dataset-version \
convert-examples-ontology
# convert-examples-git-provenance
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"components": [
{
"meta_id": "datalad:0b76362c-aa27-11ee-be29-b3b123281259",
"meta_type": "dlccs:DataladDatasetSE",
"uuid": "0b76362c-aa27-11ee-be29-b3b123281259"
}
],
"@type": "ContainerSE"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
@prefix datalad: <https://concepts.datalad.org/namespace/dataset-uuid/> .
@prefix dlccs: <https://concepts.datalad.org/schemas/datalad-dataset-components/> .
@prefix dlco: <https://concepts.datalad.org/ontology/> .
@prefix ns1: <https://concepts.datalad.org/schemas/datalad-dataset-components/:> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

datalad:0b76362c-aa27-11ee-be29-b3b123281259 dlco:meta_type "dlccs:DataladDatasetSE"^^xsd:anyURI ;
dlco:uuid "0b76362c-aa27-11ee-be29-b3b123281259"^^<http://purl.obolibrary.org/obo/NCIT_C54100> .

[] a dlccs:Container ;
ns1:components datalad:0b76362c-aa27-11ee-be29-b3b123281259 .


Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
components:
- meta_id: datalad:0b76362c-aa27-11ee-be29-b3b123281259
meta_type: dlccs:DataladDatasetSE
uuid: 0b76362c-aa27-11ee-be29-b3b123281259
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"components": [
{
"meta_id": "datalad:0b76362c-aa27-11ee-be29-b3b123281259",
"meta_type": "dlccs:DataladDatasetSE",
"uuid": "0b76362c-aa27-11ee-be29-b3b123281259"
},
{
"meta_id": "gitsha:558275f650574389dcbbf7cd8ab5046482473fc8",
"meta_type": "dlccs:DataladDatasetVersionSE",
"is_version_of": "datalad:0b76362c-aa27-11ee-be29-b3b123281259"
}
],
"@type": "ContainerSE"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
@prefix DCAT: <http://www.w3.org/ns/dcat#> .
@prefix datalad: <https://concepts.datalad.org/namespace/dataset-uuid/> .
@prefix dlccs: <https://concepts.datalad.org/schemas/datalad-dataset-components/> .
@prefix dlco: <https://concepts.datalad.org/ontology/> .
@prefix gitsha: <https://concepts.datalad.org/namespace/gitsha/> .
@prefix ns1: <https://concepts.datalad.org/schemas/datalad-dataset-components/:> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

gitsha:558275f650574389dcbbf7cd8ab5046482473fc8 DCAT:isVersionOf datalad:0b76362c-aa27-11ee-be29-b3b123281259 ;
dlco:meta_type "dlccs:DataladDatasetVersionSE"^^xsd:anyURI .

datalad:0b76362c-aa27-11ee-be29-b3b123281259 dlco:meta_type "dlccs:DataladDatasetSE"^^xsd:anyURI ;
dlco:uuid "0b76362c-aa27-11ee-be29-b3b123281259"^^<http://purl.obolibrary.org/obo/NCIT_C54100> .

[] a dlccs:Container ;
ns1:components datalad:0b76362c-aa27-11ee-be29-b3b123281259,
gitsha:558275f650574389dcbbf7cd8ab5046482473fc8 .


Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
components:
- meta_id: datalad:0b76362c-aa27-11ee-be29-b3b123281259
meta_type: dlccs:DataladDatasetSE
uuid: 0b76362c-aa27-11ee-be29-b3b123281259
- meta_id: gitsha:558275f650574389dcbbf7cd8ab5046482473fc8
meta_type: dlccs:DataladDatasetVersionSE
is_version_of: datalad:0b76362c-aa27-11ee-be29-b3b123281259
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"meta_id": "datalad:0b76362c-aa27-11ee-be29-b3b123281259",
"meta_type": "dlccs:DataladDatasetSE",
"uuid": "0b76362c-aa27-11ee-be29-b3b123281259",
"@type": "DataladDatasetSE"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
@prefix datalad: <https://concepts.datalad.org/namespace/dataset-uuid/> .
@prefix dlco: <https://concepts.datalad.org/ontology/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

datalad:0b76362c-aa27-11ee-be29-b3b123281259 dlco:meta_type "dlccs:DataladDatasetSE"^^xsd:anyURI ;
dlco:uuid "0b76362c-aa27-11ee-be29-b3b123281259"^^<http://purl.obolibrary.org/obo/NCIT_C54100> .


Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
meta_id: datalad:0b76362c-aa27-11ee-be29-b3b123281259
meta_type: dlccs:DataladDatasetSE
uuid: 0b76362c-aa27-11ee-be29-b3b123281259
Empty file.
1 change: 1 addition & 0 deletions src/extra-docs/index.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
- [DataLad concepts ontology](ontology/)
- [DataLad dataset components schema](datalad-dataset-components-schema/)
- [DataLad dataset version schema](datalad-dataset-version-schema/)
- [Data access schema](data-access-schema/)
- [Git provenance schema](git-provenance-schema/)
2 changes: 2 additions & 0 deletions src/linkml/ontology/datalad.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,5 @@ classes:
distinguishing feature between a DataLad dataset and
a plain Git or git-annex repository branch.
required: true
close_mappings:
- dlco:Dataset
98 changes: 98 additions & 0 deletions src/linkml/schemas/datalad-dataset-components.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
id: https://concepts.datalad.org/schemas/datalad-dataset-components
name: datalad-dataset-components
title: Schema for any DataLad dataset component
description: |
This specification defines a schema for describing a DataLad dataset
via a flat list of linked metadata objects. Each metadata object
has a simple structure with minimal property nesting.
Each object is of type `ComponentSE`, a class that has two required slots:
`meta_id` and `meta_type`. The former must be a unique object identifier,
and the latter must identify a subclass of `ComponentSE` that the object
represents. Both value must be given as a CURIE.
Nesting of objects is avoided, unless a DataLad dataset contains insufficient
information to guarantee the availability of a unique identifier for a given
entity.
Subclasses of `ComponentSE` represent all recognized dataset components.
These classes, within this schema, only define the nature of the employed
identifier, and the linkage to other dataset components. All other
properties are defined by a corresponding ontology class that is applied
as a "mixin".
An additional `ContainerSE` class is provided. It has a single slot
`components`, and can be used to represent a list of components for formats
and use cases that require a root class/instance.
prefixes:
annex: https://concepts.datalad.org/namespace/annex-uuid/
datalad: https://concepts.datalad.org/namespace/dataset-uuid/
DCAT: http://www.w3.org/ns/dcat#
dcterms: http://purl.org/dc/terms/
dlco: https://concepts.datalad.org/ontology/
dlccs: https://concepts.datalad.org/schemas/datalad-dataset-components/
gitsha: https://concepts.datalad.org/namespace/gitsha/
linkml: https://w3id.org/linkml/
prov: http://www.w3.org/ns/prov#
spdx: http://spdx.org/rdf/terms#

imports:
- ../ontology/meta_utils
- ../ontology/datalad
- ../ontology/git-annex

classes:
ContainerSE:
class_uri: dlccs:Container
description: >-
A container for dataset component objects.
tree_root: true
attributes:
components:
description: >-
Component list.
multivalued: true
inlined_as_list: true
range: ComponentSE

ComponentSE:
class_uri: dlccs:Component
description: >-
Base class for any recognized dataset component type. This class
should never be used directly, only its subclasses.
slots:
- meta_id
- meta_type
slot_usage:
meta_type:
required: true

DataladDatasetVersionSE:
class_uri: dlccs:DataladDatasetVersionSE
is_a: ComponentSE
description: >-
TODO
mixins:
- DataladDatasetVersion
slot_usage:
is_version_of:
inlined: false
range: DataladDatasetSE
meta_id:
equals_expression: "gitsha:{gitsha}"

DataladDatasetSE:
class_uri: dlccs:DataladDatasetSE
is_a: ComponentSE
description: >-
Schema element for a `DataladDataset`.
see_also:
- dlco:DataladDataset
comments:
- The required identifier format is `datalad:<dataset-uuid>`.
mixins:
- DataladDataset
slot_usage:
meta_id:
equals_expression: "datalad:{uuid}"
2 changes: 1 addition & 1 deletion src/linkml/schemas/ontology.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
id: https://concepts.datalad.org/schemas/ontology
id: https://concepts.datalad.org/ontology
name: datalad-concepts-ontology
title: DataLad Concepts Ontology (DLCO)
description: |-
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
components:
- meta_id: datalad:0b76362c-aa27-11ee-be29-b3b123281259
meta_type: dlccs:DataladDataset
#meta_type: DataladDatasetSE
uuid: 0b76362c-aa27-11ee-be29-b3b123281259
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
schema: src/linkml/schemas/datalad-dataset-components.yaml
target_class: ContainerSE
data_sources:
- src/examples/datalad-dataset-components/ContainerSE-DataladDataset-minimal.yaml
- src/examples/datalad-dataset-components/ContainerSE-DataladDatasetVersion-linkage.yaml
plugins:
JsonschemaValidationPlugin:
closed: true
include_range_class_descendants: false
RecommendedSlotsPlugin:

0 comments on commit b95ff73

Please sign in to comment.