guile-rdf/rdf/rdf.scm

rdf.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (rdf rdf)
19
  #:use-module (srfi srfi-1)
20
  #:use-module (srfi srfi-9)
21
  #:use-module (sxml simple)
22
  #:use-module (ice-9 match)
23
  #:export (rdf-datatype
24
            make-rdf-datatype
25
            rdf-datatype?
26
            rdf-datatype-iris
27
            rdf-datatype-description
28
            rdf-datatype-lexical?
29
            rdf-datatype-value?
30
            rdf-datatype-lexical->value
31
            rdf-datatype-value->lexical
32
33
            rdf:langString
34
            rdf:XMLLiteral
35
36
            rdf-vocabulary
37
            make-rdf-vocabulary
38
            rdf-vocabulary?
39
            rdf-vocabulary-datatypes
40
            rdf-vocabulary-order
41
            rdf-vocabulary-compatible?
42
43
            rdf-dataset
44
            make-rdf-dataset
45
            rdf-dataset?
46
            rdf-dataset-default-graph
47
            rdf-dataset-named-graphs
48
49
            rdf-triple
50
            make-rdf-triple
51
            rdf-triple?
52
            rdf-triple-subject
53
            rdf-triple-predicate
54
            rdf-triple-object
55
56
            rdf-literal
57
            make-rdf-literal
58
            rdf-literal?
59
            rdf-literal-lexical-form
60
            rdf-literal-type
61
            rdf-literal-langtag
62
63
            blank-node?
64
            rdf-graph?
65
66
            merge-graphs
67
            rdf-isomorphic?
68
            recognize))
69
70
;; From the specification:
71
;;   Datatypes are used with RDF literals to represent values such as strings,
72
;;   numbers and dates.  A datatype consists of a lexical space, a value space
73
;;   and a lexical-to-value mapping, and is denoted by one or more IRIs.
74
;;
75
;;   The lexical space of a datatype is a set of Unicode [UNICODE] strings.
76
;;
77
;;   The lexical-to-value mapping of a datatype is a set of pairs whose first
78
;;   element belongs to the lexical space, and the second element belongs to the
79
;;   value space of the datatype.  Each member of the lexical space is paired
80
;;   with exactly one value, and is a lexical representation of that value.  The
81
;;   mapping can be seen as a function from the lexical space to the value space.
82
;;
83
;; In addition to the specification, we introduce value->lexical, a canonical
84
;; function to map values to the lexical space.  An important property is that
85
;;   for any val, (value? val) implies:
86
;;           (equal? (lexical->value (value->lexical val)) val)
87
;;
88
;; We also introduce a list of IRIs that denote this type, as more than one
89
;; IRI can denote a type.  This is set to a list of IRIs, but may be changed
90
;; to a function to denote a set in the future.
91
;;
92
;; We also introduce a description, a text that helps humans understand the
93
;; purpose of the datatype.
94
95
(define-record-type rdf-datatype
96
  (make-rdf-datatype iris description lexical? value? lexical->value value->lexical)
97
  rdf-datatype?
98
  (iris           rdf-datatype-iris)
99
  (description    rdf-datatype-description)
100
  (lexical?       rdf-datatype-lexical?)
101
  (value?         rdf-datatype-value?)
102
  (lexical->value rdf-datatype-lexical->value)
103
  (value->lexical rdf-datatype-value->lexical))
104
105
(define rdf:langString
106
  (make-rdf-datatype
107
    '("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
108
    "A literal is a language-tagged string if the third element is present.
109
Lexical representations of language tags MAY be converted to lower case.  The
110
value space of language tags is always in lower case."
111
    string?
112
    string?
113
    string-downcase
114
    identity))
115
116
(define rdf:XMLLiteral
117
  (make-rdf-datatype
118
    '("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral")
119
    "RDF provides for XML content as a possible literal value.  Such content
120
is indicated in an RDF graph using a literal whose datatype is set to
121
rdf:XMLLiteral.  This datatype is defined as non-normative because it depends
122
on [DOM4], a specification that has not yet reached W3C Recommendation status."
123
    (lambda (l)
124
      (false-if-exception (xml->sxml l)))
125
    (lambda (v)
126
      (false-if-exception (sxml->xml v)))
127
    xml->sxml
128
    sxml->xml))
129
130
;; In addition to the specification, we define a vocabulary, which will be
131
;; passed to entailments that need one.
132
;;
133
;; datatypes: a list of <rdf-datatype> records.
134
;; order: a procedure that takes two arguments and returns whether the value
135
;;        space of the firts is included in the value space of the second
136
;; compatible?: a procedure that takes two arguments and returns whether the
137
;;              intersection of their value space is not empty
138
139
(define-record-type rdf-vocabulary
140
  (make-rdf-vocabulary datatypes order compatible?)
141
  rdf-vocabulary?
142
  (datatypes   rdf-vocabulary-datatypes)
143
  (order       rdf-vocabulary-order)
144
  (compatible? rdf-vocabulary-compatible?))
145
146
;; From the specification:
147
;;   An RDF dataset is a collection of RDF graphs, and comprises:
148
;;
149
;;   * Exactly one default graph, being an RDF graph.  The default graph does
150
;;     not have a name and MAY be empty.
151
;;   * Zero or more named graphs.  Each named graph is a pair consisting of an
152
;;     IRI or a blank node (the graph name), and an RDF graph.  Graph names are
153
;;     unique within an RDF dataset.
154
;;
155
;; We represent named graphs with a association list whose keys are IRIs or
156
;; blank nodes, and values are RDF graphs.
157
158
(define-record-type rdf-dataset
159
  (make-rdf-dataset default-graph named-graphs)
160
  rdf-dataset?
161
  (default-graph rdf-dataset-default-graph)
162
  (named-graphs  rdf-dataset-named-graphs))
163
164
;; From the specification:
165
;;   An RDF triple consists of three components:
166
;;
167
;;   * the subject, which is an IRI or a blank node
168
;;   * the predicate, which is an IRI
169
;;   * the object, which is an IRI, a literal or a blank node
170
171
(define-record-type rdf-triple
172
  (make-rdf-triple subject predicate object)
173
  rdf-triple?
174
  (subject   rdf-triple-subject)
175
  (predicate rdf-triple-predicate)
176
  (object    rdf-triple-object))
177
178
;; From the specification:
179
;;   A literal in an RDF graph consists of two or three elements:
180
;;
181
;;   * a lexical form, being a Unicode [UNICODE] string, which SHOULD be in
182
;;     Normal Form C [NFC],
183
;;   * a datatype IRI, being an IRI identifying a datatype that determines how
184
;;     the lexical form maps to a literal value, and
185
;;   * if and only if the datatype IRI is `http://www.w3.org/1999/02/22-rdf-syntax-ns#langString`,
186
;;     a non-empty language tag as defined by [BCP47].  The language tag MUST
187
;;     be well-formed according to section 2.2.9 of [BCP47].
188
189
(define-record-type rdf-literal
190
  (make-rdf-literal lexical-form type language-tag)
191
  rdf-literal?
192
  (lexical-form rdf-literal-lexical-form)
193
  (type         rdf-literal-type)
194
  (language-tag rdf-literal-language-tag))
195
196
;; From the specification:
197
;;   Blank nodes are disjoint from IRIs and literals.  Otherwise, the set of
198
;;   possible blank nodes is arbitrary.  RDF makes no reference to any internal
199
;;   structure of blank nodes.
200
;;
201
;; Here, we will use integers as blank nodes
202
203
(define blank-node? integer?)
204
205
;; From the specification:
206
;;   An RDF graph is a set of RDF triples.
207
;;
208
;; We represent a graph as a list of RDF triples
209
210
(define (rdf-graph? graph)
211
  (and (list? graph) (null? (filter (lambda (t) (not (rdf-triple? t))) graph))))
212
213
;; The following is for a merging procedure, where we rename blank nodes to ensure
214
;; we are not merging blank nodes that have the same name
215
216
(define (last-blank g)
217
  "Retun the biggest blank node identifier in g"
218
  (let loop ((g g) (m 0))
219
    (match g
220
      ('() m)
221
      ((triple g ...)
222
       (loop g (max m
223
                   (if (blank-node? (rdf-triple-subject triple))
224
                       (rdf-triple-subject triple)
225
                       0)
226
                   (if (blank-node? (rdf-triple-object triple))
227
                       (rdf-triple-object triple)
228
                       0)))))))
229
230
(define (rename-blanks g num)
231
  "Return the same graph, but blank nodes are renamed from num"
232
  (let loop ((g g) (renamings '()) (num num) (result '()))
233
    (match g
234
      ('() result)
235
      ((triple g ...)
236
       (let* ((subject (rdf-triple-subject triple))
237
              (num (if (and (blank-node? subject)
238
                            (assoc-ref renamings subject))
239
                       num
240
                       (+ num 1)))
241
              (renamings
242
                (if (and (blank-node? subject)
243
                         (assoc-ref renamings subject))
244
                    renamings
245
                    (cons (cons subject num) renamings)))
246
              (subject
247
                (if (blank-node? subject)
248
                    (assoc-ref renamings subject)
249
                    subject))
250
              (predicate (rdf-triple-predicate triple))
251
              (object (rdf-triple-object triple))
252
              (num (if (and (blank-node? object)
253
                            (assoc-ref renamings object))
254
                       num
255
                       (+ num 1)))
256
              (renamings
257
                (if (and (blank-node? object)
258
                         (assoc-ref renamings object))
259
                    renamings
260
                    (cons (cons object num) renamings)))
261
              (object
262
                (if (blank-node? object)
263
                    (assoc-ref renamings object)
264
                    object)))
265
           (loop g renamings num (cons (make-rdf-triple subject predicate object)
266
                                       result)))))))
267
268
(define (merge-graphs g1 g2)
269
  "Merge two graphs g1 and g2.  This is the same as append, but we need to make
270
sure we rename blank nodes, or some nodes will be merged when they shouldn't."
271
  (append g1 (rename-blanks g2 (last-blank g1))))
272
273
;; Next, a predicate on isomorphisms between two graphs.  Two graphs are isomorphic
274
;; when each triple has a corresponding triple in the other graph.
275
;;
276
;; To take blank nodes into account, there needs to be a mapping from blank nodes
277
;; of the first graph to blank nodes of the other graph in order to prove
278
;; isomorphism.
279
;;
280
;; First, we compare the two graphs and find possible constraints on that mapping.
281
;; for instance, if one graph has (_:1, p, o) and the other (_:2, p, o), then
282
;; a possible constraint is that _:1 maps to _:2. If the other graph also has
283
;; (_:3, p, o) then maybe _:1 actually maps to _:3.
284
;;
285
;; Constraints are either "none" (no constraint), "equiv" (a mapping between two
286
;; blank node identifiers), "or" (a disjunction) or "and" (a conjunction).
287
;; By comparing the triples of the first graph, we create an conjunction between
288
;; the constraints collected from each triple. The constraints of a triple is
289
;; a disjunction between every case where it matches a triple from the other graph.
290
;; That creates zero, one or two constraints (depending on the number of blank
291
;; nodes).
292
;;
293
;; These constraints are transformed in a normal form, as a list of lists of
294
;; conjunctions. Each list is a candidate mapping. sat? is used to evaluate the
295
;; candidate mapping and ensure it is an isomorphism between the two sets of
296
;; blank nodes. For every sat? equivalences, we check that the mapping actually
297
;; maps triples of g1 to triples of g2, and its reverse mapping maps triples of
298
;; g2 to triples of g1. Whenever one mapping works, the two graphs are equivalent.
299
;; If no mapping works, the two graphs are not equivalent.
300
301
(define (sat? equivalences)
302
  "Return whether the set of equivalences satisfies the condition that it represents
303
an isomorphism between two blank node sets: for every equality, check that the
304
first component is always associated to the same second component, and that the
305
second component is always associated with the first."
306
  (match equivalences
307
    ('() #t)
308
    (((first . second) equivalences ...)
309
     (if (and (null? (filter
310
                       (lambda (eq)
311
                         (and (equal? (car eq) first)
312
                              (not (equal? (cdr eq) second))))
313
                       equivalences))
314
              (null? (filter
315
                       (lambda (eq)
316
                         (and (not (equal? (car eq) first))
317
                              (equal? (cdr eq) second)))
318
                       equivalences)))
319
         (sat? equivalences)
320
         #f))))
321
322
(define (merge-joins l1 l2)
323
  (cond
324
    ((null? l1) l2)
325
    ((null? l2) l1)
326
    (else
327
      (fold
328
        (lambda (e1 res)
329
          (append
330
            (map (lambda (e2)
331
                   (append e1 e2))
332
                 l2)
333
            res))
334
        '()
335
        l1))))
336
337
(define (to-disjunctions constraints)
338
  (match constraints
339
    (('equiv b1 b2) (list (list (cons b1 b2))))
340
    ('none (list (list)))
341
    (('or e1 e2)
342
     (cond
343
       ((equal? e2 'bot)
344
        (to-disjunctions e1))
345
       ((equal? e1 'bot)
346
        (to-disjunctions e2))
347
       (else
348
         (let ((e1 (to-disjunctions e1))
349
               (e2 (to-disjunctions e2)))
350
           (append e1 e2)))))
351
    (('and e1 e2)
352
     (cond
353
       ((equal? e1 'bot)
354
        'bot)
355
       ((equal? e2 'bot)
356
        'bot)
357
       (else
358
         (let ((e1 (to-disjunctions e1))
359
               (e2 (to-disjunctions e2)))
360
           (merge-joins e1 e2)))))))
361
362
(define (generate-triple-constraints t1 t2)
363
  (match t1
364
    (($ rdf-triple s1 p1 o1)
365
     (match t2
366
       (($ rdf-triple s2 p2 o2)
367
        (if (and (or (equal? s1 s2) (and (blank-node? s1) (blank-node? s2)))
368
                 (equal? p1 p2)
369
                 (or (equal? o1 o2) (and (blank-node? o1) (blank-node? o2))))
370
            (list 'and
371
                  (if (blank-node? s1)
372
                      (list 'equiv s1 s2)
373
                      'none)
374
                  (if (blank-node? o1)
375
                      (list 'equiv o1 o2)
376
                      'none))
377
            #f))))))
378
379
(define (generate-constraints t1 g2)
380
  (match g2
381
    ('() 'bot)
382
    ((t2 g2 ...)
383
     (let ((c (generate-triple-constraints t1 t2)))
384
       (if c
385
         (list 'or c (generate-constraints t1 g2))
386
         (generate-constraints t1 g2))))))
387
388
(define (reverse-mapping mapping)
389
  (let loop ((mapping mapping) (result '()))
390
  (match mapping
391
    ('() result)
392
    (((first . second) mapping ...)
393
     (loop mapping (cons (cons second first) result))))))
394
395
(define (validate-mapping mapping g1 g2)
396
  (match g1
397
    ('() #t)
398
    ((t1 g1 ...)
399
     (and (not (null? (filter
400
                        (lambda (t2)
401
                          (let ((s1 (rdf-triple-subject t1))
402
                                (s2 (rdf-triple-subject t2))
403
                                (p1 (rdf-triple-predicate t1))
404
                                (p2 (rdf-triple-predicate t2))
405
                                (o1 (rdf-triple-object t1))
406
                                (o2 (rdf-triple-object t2)))
407
                            (and
408
                              (if (blank-node? s1)
409
                                  (equal? (assoc-ref mapping s1) s2)
410
                                  (equal? s1 s2))
411
                              (equal? p1 p2)
412
                              (if (blank-node? o1)
413
                                  (equal? (assoc-ref mapping o1) o2)
414
                                  (equal? o1 o2)))))
415
                        g2)))
416
          (validate-mapping mapping g1 g2)))))
417
418
(define (rdf-isomorphic? g1 g2)
419
  "Compare two graphs and return whether they are isomorph."
420
  (let* ((constraints (fold (lambda (t constraints)
421
                              (list 'and (generate-constraints t g2) constraints))
422
                            'none g1))
423
         (disjunctions (to-disjunctions constraints)))
424
    (let loop ((disjunctions disjunctions))
425
      (match (filter sat? disjunctions)
426
        ('() (and (null? g1) (null? g2)))
427
        ((mapping disjunctions ...)
428
         (if (and (validate-mapping mapping g1 g2)
429
                  (validate-mapping (reverse-mapping mapping) g2 g1))
430
           #t
431
           (loop disjunctions)))))))
432
433
;; Recognizing datatypes is a transformation on the graph to add the proper
434
;; datatype to literals, and replace IRIs that represent a datatype with the
435
;; datatype it represents.  This is useful for some entailment regimes, such
436
;; as the RDF or RDFS entailment regimes.
437
438
(define (recognize-data d datatypes)
439
  (match d
440
    ((? string? iri) 
441
     (let loop ((datatypes datatypes))
442
       (if (null? datatypes)
443
           iri
444
           (if (member iri (rdf-datatype-iris (car datatypes)))
445
               (car datatypes)
446
               (loop (cdr datatypes))))))
447
    (($ rdf-literal literal-form type langtag)
448
     (let loop ((datatypes datatypes))
449
       (if (null? datatypes)
450
           (make-rdf-literal literal-form type langtag)
451
           (if (member type (rdf-datatype-iris (car datatypes)))
452
               (make-rdf-literal literal-form (car datatypes) langtag)
453
               (loop (cdr datatypes))))))
454
    (_ d)))
455
456
(define (recognize-triple t datatypes)
457
  (match t
458
    (($ rdf-triple subject predicate object)
459
     (make-rdf-triple
460
       (recognize-data subject datatypes)
461
       predicate
462
       (recognize-data object datatypes)))))
463
464
(define (recognize graph vocabulary)
465
  (match graph
466
    (() '())
467
    ((t graph ...)
468
     (cons
469
       (recognize-triple t (rdf-vocabulary-datatypes vocabulary))
470
       (recognize graph vocabulary)))))
471
472