guile-rdf/rdf/rdf.scm

rdf.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (rdf rdf)
19
  #:use-module (srfi srfi-9)
20
  #:use-module (ice-9 match)
21
  #:export (rdf-datatype
22
            make-rdf-datatype
23
            rdf-datatype?
24
            rdf-datatype-iris
25
            rdf-datatype-description
26
            rdf-datatype-lexical?
27
            rdf-datatype-value?
28
            rdf-datatype-lexical->value
29
            rdf-datatype-value->lexical
30
31
            rdf-dataset
32
            make-rdf-dataset
33
            rdf-dataset?
34
            rdf-dataset-default-graph
35
            rdf-dataset-named-graphs
36
37
            rdf-triple
38
            make-rdf-triple
39
            rdf-triple?
40
            rdf-triple-subject
41
            rdf-triple-predicate
42
            rdf-triple-object
43
44
            rdf-literal
45
            make-rdf-literal
46
            rdf-literal?
47
            rdf-literal-lexical-form
48
            rdf-literal-type
49
            rdf-literal-langtag
50
51
            blank-node?
52
            rdf-graph?
53
54
            merge-graphs
55
            ))
56
57
;; From the specification:
58
;;   Datatypes are used with RDF literals to represent values such as strings,
59
;;   numbers and dates.  A datatype consists of a lexical space, a value space
60
;;   and a lexical-to-value mapping, and is denoted by one or more IRIs.
61
;;
62
;;   The lexical space of a datatype is a set of Unicode [UNICODE] strings.
63
;;
64
;;   The lexical-to-value mapping of a datatype is a set of pairs whose first
65
;;   element belongs to the lexical space, and the second element belongs to the
66
;;   value space of the datatype.  Each member of the lexical space is paired
67
;;   with exactly one value, and is a lexical representation of that value.  The
68
;;   mapping can be seen as a function from the lexical space to the value space.
69
;;
70
;; In addition to the specification, we introduce value->lexical, a canonical
71
;; function to map values to the lexical space.  An important property is that
72
;;   for any val, (value? val) implies:
73
;;           (equal? (lexical->value (value->lexical val)) val)
74
;;
75
;; We also introduce a list of IRIs that denote this type, as more than one
76
;; IRI can denote a type.  This is set to a list of IRIs, but may be changed
77
;; to a function to denote a set in the future.
78
;;
79
;; We also introduce a description, a text that helps humans understand the
80
;; purpose of the datatype.
81
82
(define-record-type rdf-datatype
83
  (make-rdf-datatype iris description lexical? value? lexical->value value->lexical)
84
  rdf-datatype?
85
  (iris           rdf-datatype-iris)
86
  (description    rdf-datatype-description)
87
  (lexical?       rdf-datatype-lexical?)
88
  (value?         rdf-datatype-value?)
89
  (lexical->value rdf-datatype-lexical->value)
90
  (value->lexical rdf-datatype-value->lexical))
91
92
;; From the specification:
93
;;   An RDF dataset is a collection of RDF graphs, and comprises:
94
;;
95
;;   * Exactly one default graph, being an RDF graph.  The default graph does
96
;;     not have a name and MAY be empty.
97
;;   * Zero or more named graphs.  Each named graph is a pair consisting of an
98
;;     IRI or a blank node (the graph name), and an RDF graph.  Graph names are
99
;;     unique within an RDF dataset.
100
;;
101
;; We represent named graphs with a association list whose keys are IRIs or
102
;; blank nodes, and values are RDF graphs.
103
104
(define-record-type rdf-dataset
105
  (make-rdf-dataset default-graph named-graphs)
106
  rdf-dataset?
107
  (default-graph rdf-dataset-default-graph)
108
  (named-graphs  rdf-dataset-named-graphs))
109
110
;; From the specification:
111
;;   An RDF triple consists of three components:
112
;;
113
;;   * the subject, which is an IRI or a blank node
114
;;   * the predicate, which is an IRI
115
;;   * the object, which is an IRI, a literal or a blank node
116
117
(define-record-type rdf-triple
118
  (make-rdf-triple subject predicate object)
119
  rdf-triple?
120
  (subject   rdf-triple-subject)
121
  (predicate rdf-triple-predicate)
122
  (object    rdf-triple-object))
123
124
;; From the specification:
125
;;   A literal in an RDF graph consists of two or three elements:
126
;;
127
;;   * a lexical form, being a Unicode [UNICODE] string, which SHOULD be in
128
;;     Normal Form C [NFC],
129
;;   * a datatype IRI, being an IRI identifying a datatype that determines how
130
;;     the lexical form maps to a literal value, and
131
;;   * if and only if the datatype IRI is `http://www.w3.org/1999/02/22-rdf-syntax-ns#langString`,
132
;;     a non-empty language tag as defined by [BCP47].  The language tag MUST
133
;;     be well-formed according to section 2.2.9 of [BCP47].
134
135
(define-record-type rdf-literal
136
  (make-rdf-literal lexical-form datatype language-tag)
137
  rdf-literal?
138
  (lexical-form rdf-literal-lexical-form)
139
  (datatype     rdf-literal-datatype)
140
  (language-tag rdf-literal-language-tag))
141
142
;; From the specification:
143
;;   Blank nodes are disjoint from IRIs and literals.  Otherwise, the set of
144
;;   possible blank nodes is arbitrary.  RDF makes no reference to any internal
145
;;   structure of blank nodes.
146
;;
147
;; Here, we will use integers as blank nodes
148
149
(define blank-node? integer?)
150
151
;; From the specification:
152
;;   An RDF graph is a set of RDF triples.
153
;;
154
;; We represent a graph as a list of RDF triples
155
156
(define (rdf-graph? graph)
157
  (and (list? graph) (null? (filter (lambda (t) (not (rdf-triple? t))) graph))))
158
159
(define (last-blank g)
160
  "Retun the biggest blank node identifier in g"
161
  (let loop ((g g) (m 0))
162
    (match g
163
      ('() m)
164
      ((triple g ...)
165
       (loop g (max m
166
                   (if (blank-node? (rdf-triple-subject triple))
167
                       (rdf-triple-subject triple)
168
                       0)
169
                   (if (blank-node? (rdf-triple-object triple))
170
                       (rdf-triple-object triple)
171
                       0)))))))
172
173
(define (rename-blanks g num)
174
  "Return the same graph, but blank nodes are renamed from num"
175
  (let loop ((g g) (renamings '()) (num num) (result '()))
176
    (match g
177
      ('() result)
178
      ((triple g ...)
179
       (let* ((subject (rdf-triple-subject triple))
180
              (num (if (and (blank-node? subject)
181
                            (assoc-ref renamings subject))
182
                       num
183
                       (+ num 1)))
184
              (renamings
185
                (if (and (blank-node? subject)
186
                         (assoc-ref renamings subject))
187
                    renamings
188
                    (cons (cons subject num) renamings)))
189
              (subject
190
                (if (blank-node? subject)
191
                    (assoc-ref renamings subject)
192
                    subject))
193
              (predicate (rdf-triple-predicate triple))
194
              (object (rdf-triple-object triple))
195
              (num (if (and (blank-node? object)
196
                            (assoc-ref renamings object))
197
                       num
198
                       (+ num 1)))
199
              (renamings
200
                (if (and (blank-node? object)
201
                         (assoc-ref renamings object))
202
                    renamings
203
                    (cons (cons object num) renamings)))
204
              (object
205
                (if (blank-node? object)
206
                    (assoc-ref renamings object)
207
                    object)))
208
           (loop g renamings num (cons (make-rdf-triple subject predicate object)
209
                                       result)))))))
210
211
(define (merge-graphs g1 g2)
212
  "Merge two graphs g1 and g2.  This is the same as append, but we need to make
213
sure we rename blank nodes, or some nodes will be merged when they shouldn't."
214
  (append g1 (rename-blanks g2 (last-blank g1))))
215