guile-rdf/rdf/rdf.scm

rdf.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (rdf rdf)
19
  #:use-module (srfi srfi-1)
20
  #:use-module (srfi srfi-9)
21
  #:use-module (ice-9 match)
22
  #:export (rdf-datatype
23
            make-rdf-datatype
24
            rdf-datatype?
25
            rdf-datatype-iris
26
            rdf-datatype-description
27
            rdf-datatype-lexical?
28
            rdf-datatype-value?
29
            rdf-datatype-lexical->value
30
            rdf-datatype-value->lexical
31
32
            rdf:langString
33
34
            rdf-dataset
35
            make-rdf-dataset
36
            rdf-dataset?
37
            rdf-dataset-default-graph
38
            rdf-dataset-named-graphs
39
40
            rdf-triple
41
            make-rdf-triple
42
            rdf-triple?
43
            rdf-triple-subject
44
            rdf-triple-predicate
45
            rdf-triple-object
46
47
            rdf-literal
48
            make-rdf-literal
49
            rdf-literal?
50
            rdf-literal-lexical-form
51
            rdf-literal-type
52
            rdf-literal-langtag
53
54
            blank-node?
55
            rdf-graph?
56
57
            merge-graphs
58
            rdf-isomorphic?
59
            recognize))
60
61
;; From the specification:
62
;;   Datatypes are used with RDF literals to represent values such as strings,
63
;;   numbers and dates.  A datatype consists of a lexical space, a value space
64
;;   and a lexical-to-value mapping, and is denoted by one or more IRIs.
65
;;
66
;;   The lexical space of a datatype is a set of Unicode [UNICODE] strings.
67
;;
68
;;   The lexical-to-value mapping of a datatype is a set of pairs whose first
69
;;   element belongs to the lexical space, and the second element belongs to the
70
;;   value space of the datatype.  Each member of the lexical space is paired
71
;;   with exactly one value, and is a lexical representation of that value.  The
72
;;   mapping can be seen as a function from the lexical space to the value space.
73
;;
74
;; In addition to the specification, we introduce value->lexical, a canonical
75
;; function to map values to the lexical space.  An important property is that
76
;;   for any val, (value? val) implies:
77
;;           (equal? (lexical->value (value->lexical val)) val)
78
;;
79
;; We also introduce a list of IRIs that denote this type, as more than one
80
;; IRI can denote a type.  This is set to a list of IRIs, but may be changed
81
;; to a function to denote a set in the future.
82
;;
83
;; We also introduce a description, a text that helps humans understand the
84
;; purpose of the datatype.
85
86
(define-record-type rdf-datatype
87
  (make-rdf-datatype iris description lexical? value? lexical->value value->lexical)
88
  rdf-datatype?
89
  (iris           rdf-datatype-iris)
90
  (description    rdf-datatype-description)
91
  (lexical?       rdf-datatype-lexical?)
92
  (value?         rdf-datatype-value?)
93
  (lexical->value rdf-datatype-lexical->value)
94
  (value->lexical rdf-datatype-value->lexical))
95
96
(define rdf:langString
97
  (make-rdf-datatype
98
    '("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
99
    "A literal is a language-tagged string if the third element is present.
100
Lexical representations of language tags MAY be converted to lower case.  The
101
value space of language tags is always in lower case."
102
    string?
103
    string?
104
    string-downcase
105
    identity))
106
107
;; From the specification:
108
;;   An RDF dataset is a collection of RDF graphs, and comprises:
109
;;
110
;;   * Exactly one default graph, being an RDF graph.  The default graph does
111
;;     not have a name and MAY be empty.
112
;;   * Zero or more named graphs.  Each named graph is a pair consisting of an
113
;;     IRI or a blank node (the graph name), and an RDF graph.  Graph names are
114
;;     unique within an RDF dataset.
115
;;
116
;; We represent named graphs with a association list whose keys are IRIs or
117
;; blank nodes, and values are RDF graphs.
118
119
(define-record-type rdf-dataset
120
  (make-rdf-dataset default-graph named-graphs)
121
  rdf-dataset?
122
  (default-graph rdf-dataset-default-graph)
123
  (named-graphs  rdf-dataset-named-graphs))
124
125
;; From the specification:
126
;;   An RDF triple consists of three components:
127
;;
128
;;   * the subject, which is an IRI or a blank node
129
;;   * the predicate, which is an IRI
130
;;   * the object, which is an IRI, a literal or a blank node
131
132
(define-record-type rdf-triple
133
  (make-rdf-triple subject predicate object)
134
  rdf-triple?
135
  (subject   rdf-triple-subject)
136
  (predicate rdf-triple-predicate)
137
  (object    rdf-triple-object))
138
139
;; From the specification:
140
;;   A literal in an RDF graph consists of two or three elements:
141
;;
142
;;   * a lexical form, being a Unicode [UNICODE] string, which SHOULD be in
143
;;     Normal Form C [NFC],
144
;;   * a datatype IRI, being an IRI identifying a datatype that determines how
145
;;     the lexical form maps to a literal value, and
146
;;   * if and only if the datatype IRI is `http://www.w3.org/1999/02/22-rdf-syntax-ns#langString`,
147
;;     a non-empty language tag as defined by [BCP47].  The language tag MUST
148
;;     be well-formed according to section 2.2.9 of [BCP47].
149
150
(define-record-type rdf-literal
151
  (make-rdf-literal lexical-form type language-tag)
152
  rdf-literal?
153
  (lexical-form rdf-literal-lexical-form)
154
  (type         rdf-literal-type)
155
  (language-tag rdf-literal-language-tag))
156
157
;; From the specification:
158
;;   Blank nodes are disjoint from IRIs and literals.  Otherwise, the set of
159
;;   possible blank nodes is arbitrary.  RDF makes no reference to any internal
160
;;   structure of blank nodes.
161
;;
162
;; Here, we will use integers as blank nodes
163
164
(define blank-node? integer?)
165
166
;; From the specification:
167
;;   An RDF graph is a set of RDF triples.
168
;;
169
;; We represent a graph as a list of RDF triples
170
171
(define (rdf-graph? graph)
172
  (and (list? graph) (null? (filter (lambda (t) (not (rdf-triple? t))) graph))))
173
174
;; The following is for a merging procedure, where we rename blank nodes to ensure
175
;; we are not merging blank nodes that have the same name
176
177
(define (last-blank g)
178
  "Retun the biggest blank node identifier in g"
179
  (let loop ((g g) (m 0))
180
    (match g
181
      ('() m)
182
      ((triple g ...)
183
       (loop g (max m
184
                   (if (blank-node? (rdf-triple-subject triple))
185
                       (rdf-triple-subject triple)
186
                       0)
187
                   (if (blank-node? (rdf-triple-object triple))
188
                       (rdf-triple-object triple)
189
                       0)))))))
190
191
(define (rename-blanks g num)
192
  "Return the same graph, but blank nodes are renamed from num"
193
  (let loop ((g g) (renamings '()) (num num) (result '()))
194
    (match g
195
      ('() result)
196
      ((triple g ...)
197
       (let* ((subject (rdf-triple-subject triple))
198
              (num (if (and (blank-node? subject)
199
                            (assoc-ref renamings subject))
200
                       num
201
                       (+ num 1)))
202
              (renamings
203
                (if (and (blank-node? subject)
204
                         (assoc-ref renamings subject))
205
                    renamings
206
                    (cons (cons subject num) renamings)))
207
              (subject
208
                (if (blank-node? subject)
209
                    (assoc-ref renamings subject)
210
                    subject))
211
              (predicate (rdf-triple-predicate triple))
212
              (object (rdf-triple-object triple))
213
              (num (if (and (blank-node? object)
214
                            (assoc-ref renamings object))
215
                       num
216
                       (+ num 1)))
217
              (renamings
218
                (if (and (blank-node? object)
219
                         (assoc-ref renamings object))
220
                    renamings
221
                    (cons (cons object num) renamings)))
222
              (object
223
                (if (blank-node? object)
224
                    (assoc-ref renamings object)
225
                    object)))
226
           (loop g renamings num (cons (make-rdf-triple subject predicate object)
227
                                       result)))))))
228
229
(define (merge-graphs g1 g2)
230
  "Merge two graphs g1 and g2.  This is the same as append, but we need to make
231
sure we rename blank nodes, or some nodes will be merged when they shouldn't."
232
  (append g1 (rename-blanks g2 (last-blank g1))))
233
234
;; Next, a predicate on isomorphisms between two graphs.  Two graphs are isomorphic
235
;; when each triple has a corresponding triple in the other graph.
236
;;
237
;; To take blank nodes into account, there needs to be a mapping from blank nodes
238
;; of the first graph to blank nodes of the other graph in order to prove
239
;; isomorphism.
240
;;
241
;; First, we compare the two graphs and find possible constraints on that mapping.
242
;; for instance, if one graph has (_:1, p, o) and the other (_:2, p, o), then
243
;; a possible constraint is that _:1 maps to _:2. If the other graph also has
244
;; (_:3, p, o) then maybe _:1 actually maps to _:3.
245
;;
246
;; Constraints are either "none" (no constraint), "equiv" (a mapping between two
247
;; blank node identifiers), "or" (a disjunction) or "and" (a conjunction).
248
;; By comparing the triples of the first graph, we create an conjunction between
249
;; the constraints collected from each triple. The constraints of a triple is
250
;; a disjunction between every case where it matches a triple from the other graph.
251
;; That creates zero, one or two constraints (depending on the number of blank
252
;; nodes).
253
;;
254
;; These constraints are transformed in a normal form, as a list of lists of
255
;; conjunctions. Each list is a candidate mapping. sat? is used to evaluate the
256
;; candidate mapping and ensure it is an isomorphism between the two sets of
257
;; blank nodes. For every sat? equivalences, we check that the mapping actually
258
;; maps triples of g1 to triples of g2, and its reverse mapping maps triples of
259
;; g2 to triples of g1. Whenever one mapping works, the two graphs are equivalent.
260
;; If no mapping works, the two graphs are not equivalent.
261
262
(define (sat? equivalences)
263
  "Return whether the set of equivalences satisfies the condition that it represents
264
an isomorphism between two blank node sets: for every equality, check that the
265
first component is always associated to the same second component, and that the
266
second component is always associated with the first."
267
  (match equivalences
268
    ('() #t)
269
    (((first . second) equivalences ...)
270
     (if (and (null? (filter
271
                       (lambda (eq)
272
                         (and (equal? (car eq) first)
273
                              (not (equal? (cdr eq) second))))
274
                       equivalences))
275
              (null? (filter
276
                       (lambda (eq)
277
                         (and (not (equal? (car eq) first))
278
                              (equal? (cdr eq) second)))
279
                       equivalences)))
280
         (sat? equivalences)
281
         #f))))
282
283
(define (merge-joins l1 l2)
284
  (cond
285
    ((null? l1) l2)
286
    ((null? l2) l1)
287
    (else
288
      (fold
289
        (lambda (e1 res)
290
          (append
291
            (map (lambda (e2)
292
                   (append e1 e2))
293
                 l2)
294
            res))
295
        '()
296
        l1))))
297
298
(define (to-disjunctions constraints)
299
  (match constraints
300
    (('equiv b1 b2) (list (list (cons b1 b2))))
301
    ('none (list (list)))
302
    (('or e1 e2)
303
     (cond
304
       ((equal? e2 'bot)
305
        (to-disjunctions e1))
306
       ((equal? e1 'bot)
307
        (to-disjunctions e2))
308
       (else
309
         (let ((e1 (to-disjunctions e1))
310
               (e2 (to-disjunctions e2)))
311
           (append e1 e2)))))
312
    (('and e1 e2)
313
     (cond
314
       ((equal? e1 'bot)
315
        'bot)
316
       ((equal? e2 'bot)
317
        'bot)
318
       (else
319
         (let ((e1 (to-disjunctions e1))
320
               (e2 (to-disjunctions e2)))
321
           (merge-joins e1 e2)))))))
322
323
(define (generate-triple-constraints t1 t2)
324
  (match t1
325
    (($ rdf-triple s1 p1 o1)
326
     (match t2
327
       (($ rdf-triple s2 p2 o2)
328
        (if (and (or (equal? s1 s2) (and (blank-node? s1) (blank-node? s2)))
329
                 (equal? p1 p2)
330
                 (or (equal? o1 o2) (and (blank-node? o1) (blank-node? o2))))
331
            (list 'and
332
                  (if (blank-node? s1)
333
                      (list 'equiv s1 s2)
334
                      'none)
335
                  (if (blank-node? o1)
336
                      (list 'equiv o1 o2)
337
                      'none))
338
            #f))))))
339
340
(define (generate-constraints t1 g2)
341
  (match g2
342
    ('() 'bot)
343
    ((t2 g2 ...)
344
     (let ((c (generate-triple-constraints t1 t2)))
345
       (if c
346
         (list 'or c (generate-constraints t1 g2))
347
         (generate-constraints t1 g2))))))
348
349
(define (reverse-mapping mapping)
350
  (let loop ((mapping mapping) (result '()))
351
  (match mapping
352
    ('() result)
353
    (((first . second) mapping ...)
354
     (loop mapping (cons (cons second first) result))))))
355
356
(define (validate-mapping mapping g1 g2)
357
  (match g1
358
    ('() #t)
359
    ((t1 g1 ...)
360
     (and (not (null? (filter
361
                        (lambda (t2)
362
                          (let ((s1 (rdf-triple-subject t1))
363
                                (s2 (rdf-triple-subject t2))
364
                                (p1 (rdf-triple-predicate t1))
365
                                (p2 (rdf-triple-predicate t2))
366
                                (o1 (rdf-triple-object t1))
367
                                (o2 (rdf-triple-object t2)))
368
                            (and
369
                              (if (blank-node? s1)
370
                                  (equal? (assoc-ref mapping s1) s2)
371
                                  (equal? s1 s2))
372
                              (equal? p1 p2)
373
                              (if (blank-node? o1)
374
                                  (equal? (assoc-ref mapping o1) o2)
375
                                  (equal? o1 o2)))))
376
                        g2)))
377
          (validate-mapping mapping g1 g2)))))
378
379
(define (rdf-isomorphic? g1 g2)
380
  "Compare two graphs and return whether they are isomorph."
381
  (let* ((constraints (fold (lambda (t constraints)
382
                              (list 'and (generate-constraints t g2) constraints))
383
                            'none g1))
384
         (disjunctions (to-disjunctions constraints)))
385
    (let loop ((disjunctions disjunctions))
386
      (match (filter sat? disjunctions)
387
        ('() (and (null? g1) (null? g2)))
388
        ((mapping disjunctions ...)
389
         (if (and (validate-mapping mapping g1 g2)
390
                  (validate-mapping (reverse-mapping mapping) g2 g1))
391
           #t
392
           (loop disjunctions)))))))
393
394
;; Recognizing datatypes is a transformation on the graph to add the proper
395
;; datatype to literals, and replace IRIs that represent a datatype with the
396
;; datatype it represents.  This is useful for some entailment regimes, such
397
;; as the RDF or RDFS entailment regimes.
398
399
(define (recognize-data d datatypes)
400
  (match d
401
    ((? string? iri) 
402
     (let loop ((datatypes datatypes))
403
       (if (null? datatypes)
404
           iri
405
           (if (member iri (rdf-datatype-iris (car datatypes)))
406
               (car datatypes)
407
               (loop (cdr datatypes))))))
408
    (($ rdf-literal literal-form type langtag)
409
     (let loop ((datatypes datatypes))
410
       (if (null? datatypes)
411
           (make-rdf-literal literal-form type langtag)
412
           (if (member type (rdf-datatype-iris (car datatypes)))
413
               (make-rdf-literal literal-form (car datatypes) langtag)
414
               (loop (cdr datatypes))))))
415
    (_ d)))
416
417
(define (recognize-triple t datatypes)
418
  (match t
419
    (($ rdf-triple subject predicate object)
420
     (make-rdf-triple
421
       (recognize-data subject datatypes)
422
       predicate
423
       (recognize-data object datatypes)))))
424
425
(define (recognize graph datatypes)
426
  (match graph
427
    (() '())
428
    ((t graph ...)
429
     (cons
430
       (recognize-triple t datatypes)
431
       (recognize graph datatypes)))))
432
433