guile-rdf/rdf/rdf.scm

rdf.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (rdf rdf)
19
  #:use-module (srfi srfi-1)
20
  #:use-module (srfi srfi-9)
21
  #:use-module (sxml simple)
22
  #:use-module (ice-9 match)
23
  #:export (rdf-datatype
24
            make-rdf-datatype
25
            rdf-datatype?
26
            rdf-datatype-iris
27
            rdf-datatype-description
28
            rdf-datatype-lexical?
29
            rdf-datatype-value?
30
            rdf-datatype-lexical->value
31
            rdf-datatype-value->lexical
32
33
            rdf:langString
34
            rdf:XMLLiteral
35
36
            rdf-vocabulary
37
            make-rdf-vocabulary
38
            rdf-vocabulary?
39
            rdf-vocabulary-datatypes
40
            rdf-vocabulary-order
41
            rdf-vocabulary-compatible?
42
43
            rdf-dataset
44
            make-rdf-dataset
45
            rdf-dataset?
46
            rdf-dataset-default-graph
47
            rdf-dataset-named-graphs
48
49
            rdf-triple
50
            make-rdf-triple
51
            rdf-triple?
52
            rdf-triple-subject
53
            rdf-triple-predicate
54
            rdf-triple-object
55
56
            rdf-literal
57
            make-rdf-literal
58
            rdf-literal?
59
            rdf-literal-lexical-form
60
            rdf-literal-type
61
            rdf-literal-langtag
62
63
            blank-node?
64
            rdf-graph?
65
66
            merge-graphs
67
            rdf-isomorphic?
68
            rdf-dataset-isomorphic?
69
            recognize))
70
71
;; From the specification:
72
;;   Datatypes are used with RDF literals to represent values such as strings,
73
;;   numbers and dates.  A datatype consists of a lexical space, a value space
74
;;   and a lexical-to-value mapping, and is denoted by one or more IRIs.
75
;;
76
;;   The lexical space of a datatype is a set of Unicode [UNICODE] strings.
77
;;
78
;;   The lexical-to-value mapping of a datatype is a set of pairs whose first
79
;;   element belongs to the lexical space, and the second element belongs to the
80
;;   value space of the datatype.  Each member of the lexical space is paired
81
;;   with exactly one value, and is a lexical representation of that value.  The
82
;;   mapping can be seen as a function from the lexical space to the value space.
83
;;
84
;; In addition to the specification, we introduce value->lexical, a canonical
85
;; function to map values to the lexical space.  An important property is that
86
;;   for any val, (value? val) implies:
87
;;           (equal? (lexical->value (value->lexical val)) val)
88
;;
89
;; We also introduce a list of IRIs that denote this type, as more than one
90
;; IRI can denote a type.  This is set to a list of IRIs, but may be changed
91
;; to a function to denote a set in the future.
92
;;
93
;; We also introduce a description, a text that helps humans understand the
94
;; purpose of the datatype.
95
96
(define-record-type rdf-datatype
97
  (make-rdf-datatype iris description lexical? value? lexical->value value->lexical)
98
  rdf-datatype?
99
  (iris           rdf-datatype-iris)
100
  (description    rdf-datatype-description)
101
  (lexical?       rdf-datatype-lexical?)
102
  (value?         rdf-datatype-value?)
103
  (lexical->value rdf-datatype-lexical->value)
104
  (value->lexical rdf-datatype-value->lexical))
105
106
(define rdf:langString
107
  (make-rdf-datatype
108
    '("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
109
    "A literal is a language-tagged string if the third element is present.
110
Lexical representations of language tags MAY be converted to lower case.  The
111
value space of language tags is always in lower case."
112
    string?
113
    string?
114
    string-downcase
115
    identity))
116
117
(define rdf:XMLLiteral
118
  (make-rdf-datatype
119
    '("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral")
120
    "RDF provides for XML content as a possible literal value.  Such content
121
is indicated in an RDF graph using a literal whose datatype is set to
122
rdf:XMLLiteral.  This datatype is defined as non-normative because it depends
123
on [DOM4], a specification that has not yet reached W3C Recommendation status."
124
    (lambda (l)
125
      (false-if-exception (xml->sxml l)))
126
    (lambda (v)
127
      (false-if-exception (sxml->xml v)))
128
    xml->sxml
129
    sxml->xml))
130
131
;; In addition to the specification, we define a vocabulary, which will be
132
;; passed to entailments that need one.
133
;;
134
;; datatypes: a list of <rdf-datatype> records.
135
;; order: a procedure that takes two arguments and returns whether the value
136
;;        space of the firts is included in the value space of the second
137
;; compatible?: a procedure that takes two arguments and returns whether the
138
;;              intersection of their value space is not empty
139
140
(define-record-type rdf-vocabulary
141
  (make-rdf-vocabulary datatypes order compatible?)
142
  rdf-vocabulary?
143
  (datatypes   rdf-vocabulary-datatypes)
144
  (order       rdf-vocabulary-order)
145
  (compatible? rdf-vocabulary-compatible?))
146
147
;; From the specification:
148
;;   An RDF dataset is a collection of RDF graphs, and comprises:
149
;;
150
;;   * Exactly one default graph, being an RDF graph.  The default graph does
151
;;     not have a name and MAY be empty.
152
;;   * Zero or more named graphs.  Each named graph is a pair consisting of an
153
;;     IRI or a blank node (the graph name), and an RDF graph.  Graph names are
154
;;     unique within an RDF dataset.
155
;;
156
;; We represent named graphs with a association list whose keys are IRIs or
157
;; blank nodes, and values are RDF graphs.
158
159
(define-record-type rdf-dataset
160
  (make-rdf-dataset default-graph named-graphs)
161
  rdf-dataset?
162
  (default-graph rdf-dataset-default-graph)
163
  (named-graphs  rdf-dataset-named-graphs))
164
165
;; From the specification:
166
;;   An RDF triple consists of three components:
167
;;
168
;;   * the subject, which is an IRI or a blank node
169
;;   * the predicate, which is an IRI
170
;;   * the object, which is an IRI, a literal or a blank node
171
172
(define-record-type rdf-triple
173
  (make-rdf-triple subject predicate object)
174
  rdf-triple?
175
  (subject   rdf-triple-subject)
176
  (predicate rdf-triple-predicate)
177
  (object    rdf-triple-object))
178
179
;; From the specification:
180
;;   A literal in an RDF graph consists of two or three elements:
181
;;
182
;;   * a lexical form, being a Unicode [UNICODE] string, which SHOULD be in
183
;;     Normal Form C [NFC],
184
;;   * a datatype IRI, being an IRI identifying a datatype that determines how
185
;;     the lexical form maps to a literal value, and
186
;;   * if and only if the datatype IRI is `http://www.w3.org/1999/02/22-rdf-syntax-ns#langString`,
187
;;     a non-empty language tag as defined by [BCP47].  The language tag MUST
188
;;     be well-formed according to section 2.2.9 of [BCP47].
189
190
(define-record-type rdf-literal
191
  (make-rdf-literal lexical-form type langtag)
192
  rdf-literal?
193
  (lexical-form rdf-literal-lexical-form)
194
  (type         rdf-literal-type)
195
  (langtag      rdf-literal-langtag))
196
197
;; From the specification:
198
;;   Blank nodes are disjoint from IRIs and literals.  Otherwise, the set of
199
;;   possible blank nodes is arbitrary.  RDF makes no reference to any internal
200
;;   structure of blank nodes.
201
;;
202
;; Here, we will use integers as blank nodes
203
204
(define blank-node? integer?)
205
206
;; From the specification:
207
;;   An RDF graph is a set of RDF triples.
208
;;
209
;; We represent a graph as a list of RDF triples
210
211
(define (rdf-graph? graph)
212
  (and (list? graph) (null? (filter (lambda (t) (not (rdf-triple? t))) graph))))
213
214
;; The following is for a merging procedure, where we rename blank nodes to ensure
215
;; we are not merging blank nodes that have the same name
216
217
(define (last-blank g)
218
  "Retun the biggest blank node identifier in g"
219
  (let loop ((g g) (m 0))
220
    (match g
221
      ('() m)
222
      ((triple g ...)
223
       (loop g (max m
224
                   (if (blank-node? (rdf-triple-subject triple))
225
                       (rdf-triple-subject triple)
226
                       0)
227
                   (if (blank-node? (rdf-triple-object triple))
228
                       (rdf-triple-object triple)
229
                       0)))))))
230
231
(define (rename-blanks g num)
232
  "Return the same graph, but blank nodes are renamed from num"
233
  (let loop ((g g) (renamings '()) (num num) (result '()))
234
    (match g
235
      ('() result)
236
      ((triple g ...)
237
       (let* ((subject (rdf-triple-subject triple))
238
              (num (if (and (blank-node? subject)
239
                            (assoc-ref renamings subject))
240
                       num
241
                       (+ num 1)))
242
              (renamings
243
                (if (and (blank-node? subject)
244
                         (assoc-ref renamings subject))
245
                    renamings
246
                    (cons (cons subject num) renamings)))
247
              (subject
248
                (if (blank-node? subject)
249
                    (assoc-ref renamings subject)
250
                    subject))
251
              (predicate (rdf-triple-predicate triple))
252
              (object (rdf-triple-object triple))
253
              (num (if (and (blank-node? object)
254
                            (assoc-ref renamings object))
255
                       num
256
                       (+ num 1)))
257
              (renamings
258
                (if (and (blank-node? object)
259
                         (assoc-ref renamings object))
260
                    renamings
261
                    (cons (cons object num) renamings)))
262
              (object
263
                (if (blank-node? object)
264
                    (assoc-ref renamings object)
265
                    object)))
266
           (loop g renamings num (cons (make-rdf-triple subject predicate object)
267
                                       result)))))))
268
269
(define (merge-graphs g1 g2)
270
  "Merge two graphs g1 and g2.  This is the same as append, but we need to make
271
sure we rename blank nodes, or some nodes will be merged when they shouldn't."
272
  (append g1 (rename-blanks g2 (last-blank g1))))
273
274
;; Next, a predicate on isomorphisms between two graphs.  Two graphs are isomorphic
275
;; when each triple has a corresponding triple in the other graph.
276
;;
277
;; To take blank nodes into account, there needs to be a mapping from blank nodes
278
;; of the first graph to blank nodes of the other graph in order to prove
279
;; isomorphism.
280
;;
281
;; First, we compare the two graphs and find possible constraints on that mapping.
282
;; for instance, if one graph has (_:1, p, o) and the other (_:2, p, o), then
283
;; a possible constraint is that _:1 maps to _:2. If the other graph also has
284
;; (_:3, p, o) then maybe _:1 actually maps to _:3.
285
;;
286
;; Constraints are either "none" (no constraint), "equiv" (a mapping between two
287
;; blank node identifiers), "or" (a disjunction) or "and" (a conjunction).
288
;; By comparing the triples of the first graph, we create an conjunction between
289
;; the constraints collected from each triple. The constraints of a triple is
290
;; a disjunction between every case where it matches a triple from the other graph.
291
;; That creates zero, one or two constraints (depending on the number of blank
292
;; nodes).
293
;;
294
;; These constraints are transformed in a normal form, as a list of lists of
295
;; conjunctions. Each list is a candidate mapping. sat? is used to evaluate the
296
;; candidate mapping and ensure it is an isomorphism between the two sets of
297
;; blank nodes. For every sat? equivalences, we check that the mapping actually
298
;; maps triples of g1 to triples of g2, and its reverse mapping maps triples of
299
;; g2 to triples of g1. Whenever one mapping works, the two graphs are equivalent.
300
;; If no mapping works, the two graphs are not equivalent.
301
302
(define (sat? equivalences)
303
  "Return whether the set of equivalences satisfies the condition that it represents
304
an isomorphism between two blank node sets: for every equality, check that the
305
first component is always associated to the same second component, and that the
306
second component is always associated with the first."
307
  (match equivalences
308
    ('() #t)
309
    (((first . second) equivalences ...)
310
     (if (and (null? (filter
311
                       (lambda (eq)
312
                         (and (equal? (car eq) first)
313
                              (not (equal? (cdr eq) second))))
314
                       equivalences))
315
              (null? (filter
316
                       (lambda (eq)
317
                         (and (not (equal? (car eq) first))
318
                              (equal? (cdr eq) second)))
319
                       equivalences)))
320
         (sat? equivalences)
321
         #f))))
322
323
(define (filter-sat equivalences)
324
  (if (list? equivalences)
325
      (filter
326
        (lambda (eq)
327
          (if (member eq '(bot none))
328
              eq
329
              (if (sat? eq)
330
                  eq
331
                  'bot)))
332
        equivalences)
333
      equivalences))
334
335
(define (merge-joins l1 l2)
336
  (cond
337
    ((null? l1) l2)
338
    ((null? l2) l1)
339
    (else
340
      (fold
341
        (lambda (e1 res)
342
          (append
343
            (map (lambda (e2)
344
                   (append e1 e2))
345
                 l2)
346
            res))
347
        '()
348
        l1))))
349
350
(define (to-disjunctions constraints)
351
  (match constraints
352
    (('equiv b1 b2) (list (list (cons b1 b2))))
353
    ('none (list (list)))
354
    ('bot 'bot)
355
    (('or e1 e2)
356
     (let ((e1 (filter-sat (to-disjunctions e1)))
357
           (e2 (filter-sat (to-disjunctions e2))))
358
       (cond
359
         ((equal? e2 'bot)
360
          e1)
361
         ((equal? e1 'bot)
362
          e2)
363
         ((equal? e2 'none)
364
          'none)
365
         ((equal? e1 'none)
366
          'none)
367
         ((equal? e1 e2)
368
          e1)
369
         (else
370
           (append e1 e2)))))
371
    (('and e1 e2)
372
     (let ((e1 (filter-sat (to-disjunctions e1)))
373
           (e2 (filter-sat (to-disjunctions e2))))
374
       (cond
375
         ((equal? e1 'bot)
376
          'bot)
377
         ((equal? e2 'bot)
378
          'bot)
379
         ((equal? e2 'none)
380
          e1)
381
         ((equal? e1 'none)
382
          e2)
383
         ((equal? e1 e2)
384
          e1)
385
         (else
386
           (merge-joins e1 e2)))))))
387
388
(define (generate-triple-constraints t1 t2)
389
  (match t1
390
    (($ rdf-triple s1 p1 o1)
391
     (match t2
392
       (($ rdf-triple s2 p2 o2)
393
        (if (and (or (equal? s1 s2) (and (blank-node? s1) (blank-node? s2)))
394
                 (equal? p1 p2)
395
                 (or (equal? o1 o2) (and (blank-node? o1) (blank-node? o2))))
396
            (list 'and
397
                  (if (blank-node? s1)
398
                      (list 'equiv s1 s2)
399
                      'none)
400
                  (if (blank-node? o1)
401
                      (list 'equiv o1 o2)
402
                      'none))
403
            #f))))))
404
405
(define (generate-constraints t1 g2)
406
  (match g2
407
    ('() 'bot)
408
    ((t2 g2 ...)
409
     (let ((c (generate-triple-constraints t1 t2)))
410
       (if c
411
         (list 'or c (generate-constraints t1 g2))
412
         (generate-constraints t1 g2))))))
413
414
(define (generate-graph-constraints g1 g2)
415
  (fold (lambda (t constraints)
416
          (list 'and (generate-constraints t g2) constraints))
417
        'none g1))
418
419
(define (reverse-mapping mapping)
420
  (let loop ((mapping mapping) (result '()))
421
  (match mapping
422
    ('() result)
423
    (((first . second) mapping ...)
424
     (loop mapping (cons (cons second first) result))))))
425
426
(define (validate-mapping mapping g1 g2)
427
  (match g1
428
    ('() #t)
429
    ((t1 g1 ...)
430
     (and (not (null? (filter
431
                        (lambda (t2)
432
                          (let ((s1 (rdf-triple-subject t1))
433
                                (s2 (rdf-triple-subject t2))
434
                                (p1 (rdf-triple-predicate t1))
435
                                (p2 (rdf-triple-predicate t2))
436
                                (o1 (rdf-triple-object t1))
437
                                (o2 (rdf-triple-object t2)))
438
                            (and
439
                              (if (blank-node? s1)
440
                                  (equal? (assoc-ref mapping s1) s2)
441
                                  (equal? s1 s2))
442
                              (equal? p1 p2)
443
                              (if (blank-node? o1)
444
                                  (equal? (assoc-ref mapping o1) o2)
445
                                  (equal? o1 o2)))))
446
                        g2)))
447
          (validate-mapping mapping g1 g2)))))
448
449
(define (rdf-isomorphic? g1 g2)
450
  "Compare two graphs and return whether they are isomorph."
451
  (let* ((constraints (generate-graph-constraints g1 g2))
452
         (disjunctions (to-disjunctions (simplify-constraints constraints))))
453
    (if (list? disjunctions)
454
        (let loop ((disjunctions (filter sat? disjunctions)))
455
          (match disjunctions
456
            ('() (and (null? g1) (null? g2)))
457
            ((mapping disjunctions ...)
458
             (if (and (validate-mapping mapping g1 g2)
459
                      (validate-mapping (reverse-mapping mapping) g2 g1))
460
               #t
461
               (loop disjunctions)))))
462
        #f)))
463
464
(define* (simplify-constraints c #:optional (equivalences '()))
465
  (match c
466
    ('bot 'bot)
467
    ('none 'none)
468
    (('equiv a b)
469
     (if (assoc-ref equivalences a)
470
         (if (equal? (assoc-ref equivalences a) b)
471
             'none
472
             'bot)
473
         c))
474
    (('and e1 e2)
475
     (match (simplify-constraints e1 equivalences)
476
       ('bot 'bot)
477
       ('none (simplify-constraints e2 equivalences))
478
       (('equiv a b)
479
        (let ((e2 (simplify-constraints e2 (cons (cons a b) equivalences))))
480
          (match e2
481
            ('bot 'bot)
482
            ('none (list 'equiv a b))
483
            (e2 (list 'and (list 'equiv a b) (simplify-constraints e2 (cons (cons a b) equivalences)))))))
484
       (e1
485
         (match (simplify-constraints e2 equivalences)
486
           ('bot 'bot)
487
           ('none e1)
488
           (('equiv a b)
489
            (list 'and (list 'equiv a b) (simplify-constraints e1 (cons (cons a b) equivalences))))
490
           (e2
491
             (list 'and e1 e2))))))
492
    (('or e1 e2)
493
     (let ((e1 (simplify-constraints e1 equivalences))
494
           (e2 (simplify-constraints e2 equivalences)))
495
       (cond
496
         ((equal? e1 'bot)
497
          e2)
498
         ((equal? e2 'bot)
499
          e1)
500
         ((equal? e1 'none)
501
          'none)
502
         ((equal? e2 'none)
503
          'none)
504
         ((equal? e1 e2)
505
          e1)
506
         (else
507
           (list 'or e1 e2)))))))
508
509
(define (generate-dataset-constraints d1 d2)
510
  (let ((g1 (rdf-dataset-default-graph d1))
511
        (g2 (rdf-dataset-default-graph d2))
512
        (ng1 (rdf-dataset-named-graphs d1))
513
        (ng2 (rdf-dataset-named-graphs d2)))
514
    (list 'and (simplify-constraints (generate-graph-constraints g1 g2))
515
          (if (null? ng1)
516
              'none
517
              (fold (lambda (ng1 constraints)
518
                      (match ng1
519
                        ((n1 . g1)
520
                         (list
521
                           'and
522
                           (if (blank-node? n1)
523
                             (fold (lambda (ng2 constraints)
524
                                     (list 'or (list 'and (list 'equiv n1 (car ng2))
525
                                                     (simplify-constraints
526
                                                       (generate-graph-constraints
527
                                                         g1 (cdr ng2))))
528
                                           constraints))
529
                                   'bot
530
                                   (filter (lambda (g2) (blank-node? (car g2))) ng2))
531
                             (let ((g2 (assoc-ref ng2 n1)))
532
                               (if g2
533
                                   (list 'and (simplify-constraints
534
                                                (generate-graph-constraints g1 g2))
535
                                         constraints)
536
                                   'bot)))
537
                           constraints))))
538
                    'none ng1)))))
539
540
(define (validate-dataset-mapping mapping d1 d2)
541
  (define (validate-named-graph name graph)
542
    (let ((graph2 (if (blank-node? name)
543
                      (assoc-ref (rdf-dataset-named-graphs d2)
544
                                 (assoc-ref mapping name))
545
                      (assoc-ref (rdf-dataset-named-graphs d2) name))))
546
      (validate-mapping mapping graph graph2)))
547
548
  (and (validate-mapping mapping (rdf-dataset-default-graph d1)
549
                         (rdf-dataset-default-graph d2))
550
       (null? (filter
551
                (lambda (ng1)
552
                  (match ng1
553
                    ((name . graph)
554
                     (not (validate-named-graph name graph)))))
555
                (rdf-dataset-named-graphs d1)))))
556
557
(define (rdf-dataset-isomorphic? d1 d2)
558
  "Compare two datasets and return whether they are isomorphic."
559
  (let* ((constraints (generate-dataset-constraints d1 d2))
560
         (disjunctions (to-disjunctions constraints)))
561
    (if (list? disjunctions)
562
        (let loop ((disjunctions (filter sat? disjunctions)))
563
          (match disjunctions
564
            ('() (and (null? (rdf-dataset-default-graph d1))
565
                      (null? (rdf-dataset-default-graph d2))
566
                      (null? (rdf-dataset-named-graphs d1))
567
                      (null? (rdf-dataset-named-graphs d2))))
568
            ((mapping disjunctions ...)
569
             (or (and (validate-dataset-mapping mapping d1 d2)
570
                      (validate-dataset-mapping (reverse-mapping mapping) d2 d1))
571
                 (loop disjunctions)))))
572
        #f)))
573
574
;; Recognizing datatypes is a transformation on the graph to add the proper
575
;; datatype to literals, and replace IRIs that represent a datatype with the
576
;; datatype it represents.  This is useful for some entailment regimes, such
577
;; as the RDF or RDFS entailment regimes.
578
579
(define (recognize-data d datatypes)
580
  (match d
581
    ((? string? iri) 
582
     (let loop ((datatypes datatypes))
583
       (if (null? datatypes)
584
           iri
585
           (if (member iri (rdf-datatype-iris (car datatypes)))
586
               (car datatypes)
587
               (loop (cdr datatypes))))))
588
    (($ rdf-literal literal-form type langtag)
589
     (let loop ((datatypes datatypes))
590
       (if (null? datatypes)
591
           (make-rdf-literal literal-form type langtag)
592
           (if (member type (rdf-datatype-iris (car datatypes)))
593
               (make-rdf-literal literal-form (car datatypes) langtag)
594
               (loop (cdr datatypes))))))
595
    (_ d)))
596
597
(define (recognize-triple t datatypes)
598
  (match t
599
    (($ rdf-triple subject predicate object)
600
     (make-rdf-triple
601
       (recognize-data subject datatypes)
602
       predicate
603
       (recognize-data object datatypes)))))
604
605
(define (recognize graph vocabulary)
606
  (match graph
607
    (() '())
608
    ((t graph ...)
609
     (cons
610
       (recognize-triple t (rdf-vocabulary-datatypes vocabulary))
611
       (recognize graph vocabulary)))))
612
613