guile-rdf/rdf/rdf.scm

rdf.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (rdf rdf)
19
  #:use-module (srfi srfi-1)
20
  #:use-module (srfi srfi-9)
21
  #:use-module (sxml simple)
22
  #:use-module (ice-9 match)
23
  #:export (rdf-datatype
24
            make-rdf-datatype
25
            rdf-datatype?
26
            rdf-datatype-iris
27
            rdf-datatype-description
28
            rdf-datatype-lexical?
29
            rdf-datatype-value?
30
            rdf-datatype-lexical->value
31
            rdf-datatype-value->lexical
32
33
            rdf:langString
34
            rdf:XMLLiteral
35
36
            rdf-dataset
37
            make-rdf-dataset
38
            rdf-dataset?
39
            rdf-dataset-default-graph
40
            rdf-dataset-named-graphs
41
42
            rdf-triple
43
            make-rdf-triple
44
            rdf-triple?
45
            rdf-triple-subject
46
            rdf-triple-predicate
47
            rdf-triple-object
48
49
            rdf-literal
50
            make-rdf-literal
51
            rdf-literal?
52
            rdf-literal-lexical-form
53
            rdf-literal-type
54
            rdf-literal-langtag
55
56
            blank-node?
57
            rdf-graph?
58
59
            merge-graphs
60
            rdf-isomorphic?
61
            recognize))
62
63
;; From the specification:
64
;;   Datatypes are used with RDF literals to represent values such as strings,
65
;;   numbers and dates.  A datatype consists of a lexical space, a value space
66
;;   and a lexical-to-value mapping, and is denoted by one or more IRIs.
67
;;
68
;;   The lexical space of a datatype is a set of Unicode [UNICODE] strings.
69
;;
70
;;   The lexical-to-value mapping of a datatype is a set of pairs whose first
71
;;   element belongs to the lexical space, and the second element belongs to the
72
;;   value space of the datatype.  Each member of the lexical space is paired
73
;;   with exactly one value, and is a lexical representation of that value.  The
74
;;   mapping can be seen as a function from the lexical space to the value space.
75
;;
76
;; In addition to the specification, we introduce value->lexical, a canonical
77
;; function to map values to the lexical space.  An important property is that
78
;;   for any val, (value? val) implies:
79
;;           (equal? (lexical->value (value->lexical val)) val)
80
;;
81
;; We also introduce a list of IRIs that denote this type, as more than one
82
;; IRI can denote a type.  This is set to a list of IRIs, but may be changed
83
;; to a function to denote a set in the future.
84
;;
85
;; We also introduce a description, a text that helps humans understand the
86
;; purpose of the datatype.
87
88
(define-record-type rdf-datatype
89
  (make-rdf-datatype iris description lexical? value? lexical->value value->lexical)
90
  rdf-datatype?
91
  (iris           rdf-datatype-iris)
92
  (description    rdf-datatype-description)
93
  (lexical?       rdf-datatype-lexical?)
94
  (value?         rdf-datatype-value?)
95
  (lexical->value rdf-datatype-lexical->value)
96
  (value->lexical rdf-datatype-value->lexical))
97
98
(define rdf:langString
99
  (make-rdf-datatype
100
    '("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
101
    "A literal is a language-tagged string if the third element is present.
102
Lexical representations of language tags MAY be converted to lower case.  The
103
value space of language tags is always in lower case."
104
    string?
105
    string?
106
    string-downcase
107
    identity))
108
109
(define rdf:XMLLiteral
110
  (make-rdf-datatype
111
    '("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral")
112
    "RDF provides for XML content as a possible literal value.  Such content
113
is indicated in an RDF graph using a literal whose datatype is set to
114
rdf:XMLLiteral.  This datatype is defined as non-normative because it depends
115
on [DOM4], a specification that has not yet reached W3C Recommendation status."
116
    (lambda (l)
117
      (false-if-exception (xml->sxml l)))
118
    (lambda (v)
119
      (false-if-exception (sxml->xml v)))
120
    xml->sxml
121
    sxml->xml))
122
123
;; From the specification:
124
;;   An RDF dataset is a collection of RDF graphs, and comprises:
125
;;
126
;;   * Exactly one default graph, being an RDF graph.  The default graph does
127
;;     not have a name and MAY be empty.
128
;;   * Zero or more named graphs.  Each named graph is a pair consisting of an
129
;;     IRI or a blank node (the graph name), and an RDF graph.  Graph names are
130
;;     unique within an RDF dataset.
131
;;
132
;; We represent named graphs with a association list whose keys are IRIs or
133
;; blank nodes, and values are RDF graphs.
134
135
(define-record-type rdf-dataset
136
  (make-rdf-dataset default-graph named-graphs)
137
  rdf-dataset?
138
  (default-graph rdf-dataset-default-graph)
139
  (named-graphs  rdf-dataset-named-graphs))
140
141
;; From the specification:
142
;;   An RDF triple consists of three components:
143
;;
144
;;   * the subject, which is an IRI or a blank node
145
;;   * the predicate, which is an IRI
146
;;   * the object, which is an IRI, a literal or a blank node
147
148
(define-record-type rdf-triple
149
  (make-rdf-triple subject predicate object)
150
  rdf-triple?
151
  (subject   rdf-triple-subject)
152
  (predicate rdf-triple-predicate)
153
  (object    rdf-triple-object))
154
155
;; From the specification:
156
;;   A literal in an RDF graph consists of two or three elements:
157
;;
158
;;   * a lexical form, being a Unicode [UNICODE] string, which SHOULD be in
159
;;     Normal Form C [NFC],
160
;;   * a datatype IRI, being an IRI identifying a datatype that determines how
161
;;     the lexical form maps to a literal value, and
162
;;   * if and only if the datatype IRI is `http://www.w3.org/1999/02/22-rdf-syntax-ns#langString`,
163
;;     a non-empty language tag as defined by [BCP47].  The language tag MUST
164
;;     be well-formed according to section 2.2.9 of [BCP47].
165
166
(define-record-type rdf-literal
167
  (make-rdf-literal lexical-form type language-tag)
168
  rdf-literal?
169
  (lexical-form rdf-literal-lexical-form)
170
  (type         rdf-literal-type)
171
  (language-tag rdf-literal-language-tag))
172
173
;; From the specification:
174
;;   Blank nodes are disjoint from IRIs and literals.  Otherwise, the set of
175
;;   possible blank nodes is arbitrary.  RDF makes no reference to any internal
176
;;   structure of blank nodes.
177
;;
178
;; Here, we will use integers as blank nodes
179
180
(define blank-node? integer?)
181
182
;; From the specification:
183
;;   An RDF graph is a set of RDF triples.
184
;;
185
;; We represent a graph as a list of RDF triples
186
187
(define (rdf-graph? graph)
188
  (and (list? graph) (null? (filter (lambda (t) (not (rdf-triple? t))) graph))))
189
190
;; The following is for a merging procedure, where we rename blank nodes to ensure
191
;; we are not merging blank nodes that have the same name
192
193
(define (last-blank g)
194
  "Retun the biggest blank node identifier in g"
195
  (let loop ((g g) (m 0))
196
    (match g
197
      ('() m)
198
      ((triple g ...)
199
       (loop g (max m
200
                   (if (blank-node? (rdf-triple-subject triple))
201
                       (rdf-triple-subject triple)
202
                       0)
203
                   (if (blank-node? (rdf-triple-object triple))
204
                       (rdf-triple-object triple)
205
                       0)))))))
206
207
(define (rename-blanks g num)
208
  "Return the same graph, but blank nodes are renamed from num"
209
  (let loop ((g g) (renamings '()) (num num) (result '()))
210
    (match g
211
      ('() result)
212
      ((triple g ...)
213
       (let* ((subject (rdf-triple-subject triple))
214
              (num (if (and (blank-node? subject)
215
                            (assoc-ref renamings subject))
216
                       num
217
                       (+ num 1)))
218
              (renamings
219
                (if (and (blank-node? subject)
220
                         (assoc-ref renamings subject))
221
                    renamings
222
                    (cons (cons subject num) renamings)))
223
              (subject
224
                (if (blank-node? subject)
225
                    (assoc-ref renamings subject)
226
                    subject))
227
              (predicate (rdf-triple-predicate triple))
228
              (object (rdf-triple-object triple))
229
              (num (if (and (blank-node? object)
230
                            (assoc-ref renamings object))
231
                       num
232
                       (+ num 1)))
233
              (renamings
234
                (if (and (blank-node? object)
235
                         (assoc-ref renamings object))
236
                    renamings
237
                    (cons (cons object num) renamings)))
238
              (object
239
                (if (blank-node? object)
240
                    (assoc-ref renamings object)
241
                    object)))
242
           (loop g renamings num (cons (make-rdf-triple subject predicate object)
243
                                       result)))))))
244
245
(define (merge-graphs g1 g2)
246
  "Merge two graphs g1 and g2.  This is the same as append, but we need to make
247
sure we rename blank nodes, or some nodes will be merged when they shouldn't."
248
  (append g1 (rename-blanks g2 (last-blank g1))))
249
250
;; Next, a predicate on isomorphisms between two graphs.  Two graphs are isomorphic
251
;; when each triple has a corresponding triple in the other graph.
252
;;
253
;; To take blank nodes into account, there needs to be a mapping from blank nodes
254
;; of the first graph to blank nodes of the other graph in order to prove
255
;; isomorphism.
256
;;
257
;; First, we compare the two graphs and find possible constraints on that mapping.
258
;; for instance, if one graph has (_:1, p, o) and the other (_:2, p, o), then
259
;; a possible constraint is that _:1 maps to _:2. If the other graph also has
260
;; (_:3, p, o) then maybe _:1 actually maps to _:3.
261
;;
262
;; Constraints are either "none" (no constraint), "equiv" (a mapping between two
263
;; blank node identifiers), "or" (a disjunction) or "and" (a conjunction).
264
;; By comparing the triples of the first graph, we create an conjunction between
265
;; the constraints collected from each triple. The constraints of a triple is
266
;; a disjunction between every case where it matches a triple from the other graph.
267
;; That creates zero, one or two constraints (depending on the number of blank
268
;; nodes).
269
;;
270
;; These constraints are transformed in a normal form, as a list of lists of
271
;; conjunctions. Each list is a candidate mapping. sat? is used to evaluate the
272
;; candidate mapping and ensure it is an isomorphism between the two sets of
273
;; blank nodes. For every sat? equivalences, we check that the mapping actually
274
;; maps triples of g1 to triples of g2, and its reverse mapping maps triples of
275
;; g2 to triples of g1. Whenever one mapping works, the two graphs are equivalent.
276
;; If no mapping works, the two graphs are not equivalent.
277
278
(define (sat? equivalences)
279
  "Return whether the set of equivalences satisfies the condition that it represents
280
an isomorphism between two blank node sets: for every equality, check that the
281
first component is always associated to the same second component, and that the
282
second component is always associated with the first."
283
  (match equivalences
284
    ('() #t)
285
    (((first . second) equivalences ...)
286
     (if (and (null? (filter
287
                       (lambda (eq)
288
                         (and (equal? (car eq) first)
289
                              (not (equal? (cdr eq) second))))
290
                       equivalences))
291
              (null? (filter
292
                       (lambda (eq)
293
                         (and (not (equal? (car eq) first))
294
                              (equal? (cdr eq) second)))
295
                       equivalences)))
296
         (sat? equivalences)
297
         #f))))
298
299
(define (merge-joins l1 l2)
300
  (cond
301
    ((null? l1) l2)
302
    ((null? l2) l1)
303
    (else
304
      (fold
305
        (lambda (e1 res)
306
          (append
307
            (map (lambda (e2)
308
                   (append e1 e2))
309
                 l2)
310
            res))
311
        '()
312
        l1))))
313
314
(define (to-disjunctions constraints)
315
  (match constraints
316
    (('equiv b1 b2) (list (list (cons b1 b2))))
317
    ('none (list (list)))
318
    (('or e1 e2)
319
     (cond
320
       ((equal? e2 'bot)
321
        (to-disjunctions e1))
322
       ((equal? e1 'bot)
323
        (to-disjunctions e2))
324
       (else
325
         (let ((e1 (to-disjunctions e1))
326
               (e2 (to-disjunctions e2)))
327
           (append e1 e2)))))
328
    (('and e1 e2)
329
     (cond
330
       ((equal? e1 'bot)
331
        'bot)
332
       ((equal? e2 'bot)
333
        'bot)
334
       (else
335
         (let ((e1 (to-disjunctions e1))
336
               (e2 (to-disjunctions e2)))
337
           (merge-joins e1 e2)))))))
338
339
(define (generate-triple-constraints t1 t2)
340
  (match t1
341
    (($ rdf-triple s1 p1 o1)
342
     (match t2
343
       (($ rdf-triple s2 p2 o2)
344
        (if (and (or (equal? s1 s2) (and (blank-node? s1) (blank-node? s2)))
345
                 (equal? p1 p2)
346
                 (or (equal? o1 o2) (and (blank-node? o1) (blank-node? o2))))
347
            (list 'and
348
                  (if (blank-node? s1)
349
                      (list 'equiv s1 s2)
350
                      'none)
351
                  (if (blank-node? o1)
352
                      (list 'equiv o1 o2)
353
                      'none))
354
            #f))))))
355
356
(define (generate-constraints t1 g2)
357
  (match g2
358
    ('() 'bot)
359
    ((t2 g2 ...)
360
     (let ((c (generate-triple-constraints t1 t2)))
361
       (if c
362
         (list 'or c (generate-constraints t1 g2))
363
         (generate-constraints t1 g2))))))
364
365
(define (reverse-mapping mapping)
366
  (let loop ((mapping mapping) (result '()))
367
  (match mapping
368
    ('() result)
369
    (((first . second) mapping ...)
370
     (loop mapping (cons (cons second first) result))))))
371
372
(define (validate-mapping mapping g1 g2)
373
  (match g1
374
    ('() #t)
375
    ((t1 g1 ...)
376
     (and (not (null? (filter
377
                        (lambda (t2)
378
                          (let ((s1 (rdf-triple-subject t1))
379
                                (s2 (rdf-triple-subject t2))
380
                                (p1 (rdf-triple-predicate t1))
381
                                (p2 (rdf-triple-predicate t2))
382
                                (o1 (rdf-triple-object t1))
383
                                (o2 (rdf-triple-object t2)))
384
                            (and
385
                              (if (blank-node? s1)
386
                                  (equal? (assoc-ref mapping s1) s2)
387
                                  (equal? s1 s2))
388
                              (equal? p1 p2)
389
                              (if (blank-node? o1)
390
                                  (equal? (assoc-ref mapping o1) o2)
391
                                  (equal? o1 o2)))))
392
                        g2)))
393
          (validate-mapping mapping g1 g2)))))
394
395
(define (rdf-isomorphic? g1 g2)
396
  "Compare two graphs and return whether they are isomorph."
397
  (let* ((constraints (fold (lambda (t constraints)
398
                              (list 'and (generate-constraints t g2) constraints))
399
                            'none g1))
400
         (disjunctions (to-disjunctions constraints)))
401
    (let loop ((disjunctions disjunctions))
402
      (match (filter sat? disjunctions)
403
        ('() (and (null? g1) (null? g2)))
404
        ((mapping disjunctions ...)
405
         (if (and (validate-mapping mapping g1 g2)
406
                  (validate-mapping (reverse-mapping mapping) g2 g1))
407
           #t
408
           (loop disjunctions)))))))
409
410
;; Recognizing datatypes is a transformation on the graph to add the proper
411
;; datatype to literals, and replace IRIs that represent a datatype with the
412
;; datatype it represents.  This is useful for some entailment regimes, such
413
;; as the RDF or RDFS entailment regimes.
414
415
(define (recognize-data d datatypes)
416
  (match d
417
    ((? string? iri) 
418
     (let loop ((datatypes datatypes))
419
       (if (null? datatypes)
420
           iri
421
           (if (member iri (rdf-datatype-iris (car datatypes)))
422
               (car datatypes)
423
               (loop (cdr datatypes))))))
424
    (($ rdf-literal literal-form type langtag)
425
     (let loop ((datatypes datatypes))
426
       (if (null? datatypes)
427
           (make-rdf-literal literal-form type langtag)
428
           (if (member type (rdf-datatype-iris (car datatypes)))
429
               (make-rdf-literal literal-form (car datatypes) langtag)
430
               (loop (cdr datatypes))))))
431
    (_ d)))
432
433
(define (recognize-triple t datatypes)
434
  (match t
435
    (($ rdf-triple subject predicate object)
436
     (make-rdf-triple
437
       (recognize-data subject datatypes)
438
       predicate
439
       (recognize-data object datatypes)))))
440
441
(define (recognize graph datatypes)
442
  (match graph
443
    (() '())
444
    ((t graph ...)
445
     (cons
446
       (recognize-triple t datatypes)
447
       (recognize graph datatypes)))))
448
449