guile-rdf/rdf/rdf.scm

rdf.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (rdf rdf)
19
  #:use-module (srfi srfi-1)
20
  #:use-module (srfi srfi-9)
21
  #:use-module (ice-9 match)
22
  #:export (rdf-datatype
23
            make-rdf-datatype
24
            rdf-datatype?
25
            rdf-datatype-iris
26
            rdf-datatype-description
27
            rdf-datatype-lexical?
28
            rdf-datatype-value?
29
            rdf-datatype-lexical->value
30
            rdf-datatype-value->lexical
31
32
            rdf-dataset
33
            make-rdf-dataset
34
            rdf-dataset?
35
            rdf-dataset-default-graph
36
            rdf-dataset-named-graphs
37
38
            rdf-triple
39
            make-rdf-triple
40
            rdf-triple?
41
            rdf-triple-subject
42
            rdf-triple-predicate
43
            rdf-triple-object
44
45
            rdf-literal
46
            make-rdf-literal
47
            rdf-literal?
48
            rdf-literal-lexical-form
49
            rdf-literal-type
50
            rdf-literal-langtag
51
52
            blank-node?
53
            rdf-graph?
54
55
            merge-graphs
56
            rdf-isomorphic?))
57
58
;; From the specification:
59
;;   Datatypes are used with RDF literals to represent values such as strings,
60
;;   numbers and dates.  A datatype consists of a lexical space, a value space
61
;;   and a lexical-to-value mapping, and is denoted by one or more IRIs.
62
;;
63
;;   The lexical space of a datatype is a set of Unicode [UNICODE] strings.
64
;;
65
;;   The lexical-to-value mapping of a datatype is a set of pairs whose first
66
;;   element belongs to the lexical space, and the second element belongs to the
67
;;   value space of the datatype.  Each member of the lexical space is paired
68
;;   with exactly one value, and is a lexical representation of that value.  The
69
;;   mapping can be seen as a function from the lexical space to the value space.
70
;;
71
;; In addition to the specification, we introduce value->lexical, a canonical
72
;; function to map values to the lexical space.  An important property is that
73
;;   for any val, (value? val) implies:
74
;;           (equal? (lexical->value (value->lexical val)) val)
75
;;
76
;; We also introduce a list of IRIs that denote this type, as more than one
77
;; IRI can denote a type.  This is set to a list of IRIs, but may be changed
78
;; to a function to denote a set in the future.
79
;;
80
;; We also introduce a description, a text that helps humans understand the
81
;; purpose of the datatype.
82
83
(define-record-type rdf-datatype
84
  (make-rdf-datatype iris description lexical? value? lexical->value value->lexical)
85
  rdf-datatype?
86
  (iris           rdf-datatype-iris)
87
  (description    rdf-datatype-description)
88
  (lexical?       rdf-datatype-lexical?)
89
  (value?         rdf-datatype-value?)
90
  (lexical->value rdf-datatype-lexical->value)
91
  (value->lexical rdf-datatype-value->lexical))
92
93
;; From the specification:
94
;;   An RDF dataset is a collection of RDF graphs, and comprises:
95
;;
96
;;   * Exactly one default graph, being an RDF graph.  The default graph does
97
;;     not have a name and MAY be empty.
98
;;   * Zero or more named graphs.  Each named graph is a pair consisting of an
99
;;     IRI or a blank node (the graph name), and an RDF graph.  Graph names are
100
;;     unique within an RDF dataset.
101
;;
102
;; We represent named graphs with a association list whose keys are IRIs or
103
;; blank nodes, and values are RDF graphs.
104
105
(define-record-type rdf-dataset
106
  (make-rdf-dataset default-graph named-graphs)
107
  rdf-dataset?
108
  (default-graph rdf-dataset-default-graph)
109
  (named-graphs  rdf-dataset-named-graphs))
110
111
;; From the specification:
112
;;   An RDF triple consists of three components:
113
;;
114
;;   * the subject, which is an IRI or a blank node
115
;;   * the predicate, which is an IRI
116
;;   * the object, which is an IRI, a literal or a blank node
117
118
(define-record-type rdf-triple
119
  (make-rdf-triple subject predicate object)
120
  rdf-triple?
121
  (subject   rdf-triple-subject)
122
  (predicate rdf-triple-predicate)
123
  (object    rdf-triple-object))
124
125
;; From the specification:
126
;;   A literal in an RDF graph consists of two or three elements:
127
;;
128
;;   * a lexical form, being a Unicode [UNICODE] string, which SHOULD be in
129
;;     Normal Form C [NFC],
130
;;   * a datatype IRI, being an IRI identifying a datatype that determines how
131
;;     the lexical form maps to a literal value, and
132
;;   * if and only if the datatype IRI is `http://www.w3.org/1999/02/22-rdf-syntax-ns#langString`,
133
;;     a non-empty language tag as defined by [BCP47].  The language tag MUST
134
;;     be well-formed according to section 2.2.9 of [BCP47].
135
136
(define-record-type rdf-literal
137
  (make-rdf-literal lexical-form datatype language-tag)
138
  rdf-literal?
139
  (lexical-form rdf-literal-lexical-form)
140
  (datatype     rdf-literal-datatype)
141
  (language-tag rdf-literal-language-tag))
142
143
;; From the specification:
144
;;   Blank nodes are disjoint from IRIs and literals.  Otherwise, the set of
145
;;   possible blank nodes is arbitrary.  RDF makes no reference to any internal
146
;;   structure of blank nodes.
147
;;
148
;; Here, we will use integers as blank nodes
149
150
(define blank-node? integer?)
151
152
;; From the specification:
153
;;   An RDF graph is a set of RDF triples.
154
;;
155
;; We represent a graph as a list of RDF triples
156
157
(define (rdf-graph? graph)
158
  (and (list? graph) (null? (filter (lambda (t) (not (rdf-triple? t))) graph))))
159
160
(define (last-blank g)
161
  "Retun the biggest blank node identifier in g"
162
  (let loop ((g g) (m 0))
163
    (match g
164
      ('() m)
165
      ((triple g ...)
166
       (loop g (max m
167
                   (if (blank-node? (rdf-triple-subject triple))
168
                       (rdf-triple-subject triple)
169
                       0)
170
                   (if (blank-node? (rdf-triple-object triple))
171
                       (rdf-triple-object triple)
172
                       0)))))))
173
174
(define (rename-blanks g num)
175
  "Return the same graph, but blank nodes are renamed from num"
176
  (let loop ((g g) (renamings '()) (num num) (result '()))
177
    (match g
178
      ('() result)
179
      ((triple g ...)
180
       (let* ((subject (rdf-triple-subject triple))
181
              (num (if (and (blank-node? subject)
182
                            (assoc-ref renamings subject))
183
                       num
184
                       (+ num 1)))
185
              (renamings
186
                (if (and (blank-node? subject)
187
                         (assoc-ref renamings subject))
188
                    renamings
189
                    (cons (cons subject num) renamings)))
190
              (subject
191
                (if (blank-node? subject)
192
                    (assoc-ref renamings subject)
193
                    subject))
194
              (predicate (rdf-triple-predicate triple))
195
              (object (rdf-triple-object triple))
196
              (num (if (and (blank-node? object)
197
                            (assoc-ref renamings object))
198
                       num
199
                       (+ num 1)))
200
              (renamings
201
                (if (and (blank-node? object)
202
                         (assoc-ref renamings object))
203
                    renamings
204
                    (cons (cons object num) renamings)))
205
              (object
206
                (if (blank-node? object)
207
                    (assoc-ref renamings object)
208
                    object)))
209
           (loop g renamings num (cons (make-rdf-triple subject predicate object)
210
                                       result)))))))
211
212
(define (merge-graphs g1 g2)
213
  "Merge two graphs g1 and g2.  This is the same as append, but we need to make
214
sure we rename blank nodes, or some nodes will be merged when they shouldn't."
215
  (append g1 (rename-blanks g2 (last-blank g1))))
216
217
;; Next, a predicate on isomorphisms between two graphs.  Two graphs are isomorphic
218
;; when each triple has a corresponding triple in the other graph.
219
;;
220
;; To take blank nodes into account, there needs to be a mapping from blank nodes
221
;; of the first graph to blank nodes of the other graph in order to prove
222
;; isomorphism.
223
;;
224
;; First, we compare the two graphs and find possible constraints on that mapping.
225
;; for instance, if one graph has (_:1, p, o) and the other (_:2, p, o), then
226
;; a possible constraint is that _:1 maps to _:2. If the other graph also has
227
;; (_:3, p, o) then maybe _:1 actually maps to _:3.
228
;;
229
;; Constraints are either "none" (no constraint), "equiv" (a mapping between two
230
;; blank node identifiers), "or" (a disjunction) or "and" (a conjunction).
231
;; By comparing the triples of the first graph, we create an conjunction between
232
;; the constraints collected from each triple. The constraints of a triple is
233
;; a disjunction between every case where it matches a triple from the other graph.
234
;; That creates zero, one or two constraints (depending on the number of blank
235
;; nodes).
236
;;
237
;; These constraints are transformed in a normal form, as a list of lists of
238
;; conjunctions. Each list is a candidate mapping. sat? is used to evaluate the
239
;; candidate mapping and ensure it is an isomorphism between the two sets of
240
;; blank nodes. For every sat? equivalences, we check that the mapping actually
241
;; maps triples of g1 to triples of g2, and its reverse mapping maps triples of
242
;; g2 to triples of g1. Whenever one mapping works, the two graphs are equivalent.
243
;; If no mapping works, the two graphs are not equivalent.
244
245
(define (sat? equivalences)
246
  "Return whether the set of equivalences satisfies the condition that it represents
247
an isomorphism between two blank node sets: for every equality, check that the
248
first component is always associated to the same second component, and that the
249
second component is always associated with the first."
250
  (match equivalences
251
    ('() #t)
252
    (((first . second) equivalences ...)
253
     (if (and (null? (filter
254
                       (lambda (eq)
255
                         (and (equal? (car eq) first)
256
                              (not (equal? (cdr eq) second))))
257
                       equivalences))
258
              (null? (filter
259
                       (lambda (eq)
260
                         (and (not (equal? (car eq) first))
261
                              (equal? (cdr eq) second)))
262
                       equivalences)))
263
         (sat? equivalences)
264
         #f))))
265
266
(define (merge-joins l1 l2)
267
  (cond
268
    ((null? l1) l2)
269
    ((null? l2) l1)
270
    (else
271
      (fold
272
        (lambda (e1 res)
273
          (append
274
            (map (lambda (e2)
275
                   (append e1 e2))
276
                 l2)
277
            res))
278
        '()
279
        l1))))
280
281
(define (to-disjunctions constraints)
282
  (match constraints
283
    (('equiv b1 b2) (list (list (cons b1 b2))))
284
    ('none (list (list)))
285
    (('or e1 e2)
286
     (cond
287
       ((equal? e2 'bot)
288
        (to-disjunctions e1))
289
       ((equal? e1 'bot)
290
        (to-disjunctions e2))
291
       (else
292
         (let ((e1 (to-disjunctions e1))
293
               (e2 (to-disjunctions e2)))
294
           (append e1 e2)))))
295
    (('and e1 e2)
296
     (cond
297
       ((equal? e1 'bot)
298
        'bot)
299
       ((equal? e2 'bot)
300
        'bot)
301
       (else
302
         (let ((e1 (to-disjunctions e1))
303
               (e2 (to-disjunctions e2)))
304
           (merge-joins e1 e2)))))))
305
306
(define (generate-triple-constraints t1 t2)
307
  (match t1
308
    (($ rdf-triple s1 p1 o1)
309
     (match t2
310
       (($ rdf-triple s2 p2 o2)
311
        (if (and (or (equal? s1 s2) (and (blank-node? s1) (blank-node? s2)))
312
                 (equal? p1 p2)
313
                 (or (equal? o1 o2) (and (blank-node? o1) (blank-node? o2))))
314
            (list 'and
315
                  (if (blank-node? s1)
316
                      (list 'equiv s1 s2)
317
                      'none)
318
                  (if (blank-node? o1)
319
                      (list 'equiv o1 o2)
320
                      'none))
321
            #f))))))
322
323
(define (generate-constraints t1 g2)
324
  (match g2
325
    ('() 'bot)
326
    ((t2 g2 ...)
327
     (let ((c (generate-triple-constraints t1 t2)))
328
       (if c
329
         (list 'or c (generate-constraints t1 g2))
330
         (generate-constraints t1 g2))))))
331
332
(define (reverse-mapping mapping)
333
  (let loop ((mapping mapping) (result '()))
334
  (match mapping
335
    ('() result)
336
    (((first . second) mapping ...)
337
     (loop mapping (cons (cons second first) result))))))
338
339
(define (validate-mapping mapping g1 g2)
340
  (match g1
341
    ('() #t)
342
    ((t1 g1 ...)
343
     (and (not (null? (filter
344
                        (lambda (t2)
345
                          (let ((s1 (rdf-triple-subject t1))
346
                                (s2 (rdf-triple-subject t2))
347
                                (p1 (rdf-triple-predicate t1))
348
                                (p2 (rdf-triple-predicate t2))
349
                                (o1 (rdf-triple-object t1))
350
                                (o2 (rdf-triple-object t2)))
351
                            (and
352
                              (if (blank-node? s1)
353
                                  (equal? (assoc-ref mapping s1) s2)
354
                                  (equal? s1 s2))
355
                              (equal? p1 p2)
356
                              (if (blank-node? o1)
357
                                  (equal? (assoc-ref mapping o1) o2)
358
                                  (equal? o1 o2)))))
359
                        g2)))
360
          (validate-mapping mapping g1 g2)))))
361
362
(define (rdf-isomorphic? g1 g2)
363
  "Compare two graphs and return whether they are isomorph."
364
  (let* ((constraints (fold (lambda (t constraints)
365
                              (list 'and (generate-constraints t g2) constraints))
366
                            'none g1))
367
         (disjunctions (to-disjunctions constraints)))
368
    (let loop ((disjunctions disjunctions))
369
      (match (filter sat? disjunctions)
370
        ('() (and (null? g1) (null? g2)))
371
        ((mapping disjunctions ...)
372
         (if (and (validate-mapping mapping g1 g2)
373
                  (validate-mapping (reverse-mapping mapping) g2 g1))
374
           #t
375
           (loop disjunctions)))))))
376