guile-rdf/turtle/tordf.scm

tordf.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (turtle tordf)
19
  #:use-module (ice-9 match)
20
  #:use-module (ice-9 textual-ports)
21
  #:use-module (iri iri)
22
  #:use-module (turtle parser)
23
  #:use-module (srfi srfi-9)
24
  #:use-module (rdf rdf)
25
  #:export (turtle->rdf))
26
27
(define-record-type parser-state
28
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
29
                     cur-object blank-node-gen result)
30
  parser-state?
31
  (base-uri       parser-state-base-uri)
32
  (namespaces     parser-state-namespaces)
33
  (bnode-labels   parser-state-bnode-labels)
34
  (cur-subject    parser-state-cur-subject)
35
  (cur-predicate  parser-state-cur-predicate)
36
  (cur-object     parser-state-cur-object)
37
  (blank-node-gen parser-state-blank-node-gen)
38
  (result         parser-state-result))
39
40
(define* (update-parser-state
41
           state #:key (base-uri (parser-state-base-uri state))
42
                       (namespaces (parser-state-namespaces state))
43
                       (bnode-labels (parser-state-bnode-labels state))
44
                       (cur-subject (parser-state-cur-subject state))
45
                       (cur-predicate (parser-state-cur-predicate state))
46
                       (cur-object (parser-state-cur-object state))
47
                       (blank-node-gen (parser-state-blank-node-gen state))
48
                       (result (parser-state-result state)))
49
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
50
                     cur-object blank-node-gen result))
51
52
(define (create-generate-blank-node)
53
  (define num 0)
54
  (lambda ()
55
    (set! num (+ num 1))
56
    num))
57
58
(define (add-ns-to-state state ns iri)
59
  (update-parser-state state
60
    #:namespaces (cons (cons ns iri) (parser-state-namespaces state))))
61
62
63
(define (parse-iri iri state)
64
  (match iri
65
    (('iri ('prefixed-name ('pname-ln ('pname-ns ns) suffix)))
66
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns)
67
                               suffix))
68
       ("state" . ,state)))
69
    (('iri ('prefixed-name ('pname-ln ('pname-ns suffix))))
70
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "")
71
                               suffix))
72
       ("state" . ,state)))
73
    (('iri ('prefixed-name 'pname-ns))
74
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ""))
75
       ("state" . ,state)))
76
    (('iri 'iriref)
77
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) ""))
78
       ("state" . ,state)))
79
    (('iri ('iriref iri))
80
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) iri))
81
       ("state" . ,state)))))
82
83
(define (parse-verb verb state)
84
  (match verb
85
    ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
86
           ("state" . ,state)))
87
    (('predicate iri)
88
     (let ((res (parse-iri iri state)))
89
       `(("verb" . ,(assoc-ref res "iri"))
90
         ("state" . ,(assoc-ref res "state")))))))
91
92
(define (parse-object object state)
93
  (match object
94
    (('rdf-literal ('string-pat (_ str)))
95
     (let ((object
96
             (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f)))
97
       (update-parser-state state
98
         #:cur-object object
99
         #:result
100
         (cons
101
           (make-rdf-triple
102
             (parser-state-cur-subject state)
103
             (parser-state-cur-predicate state)
104
             (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f))
105
           (parser-state-result state)))))
106
    (('rdf-literal ('string-pat (_ str)) ("^^" iri))
107
     (let* ((res (parse-iri iri state))
108
            (iri (assoc-ref res "iri"))
109
            (state (assoc-ref res "state"))
110
            (object (make-rdf-literal str iri #f)))
111
       (update-parser-state state
112
         #:cur-object object
113
         #:result
114
         (cons
115
           (make-rdf-triple
116
             (parser-state-cur-subject state)
117
             (parser-state-cur-predicate state)
118
             (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f))
119
           (parser-state-result state)))))
120
    (('rdf-literal ('string-pat (_ str)) ('langtag lang))
121
     (let ((object
122
           (make-rdf-literal
123
             str "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" lang)))
124
       (update-parser-state state
125
         #:cur-object object
126
         #:result
127
         (cons
128
           (make-rdf-triple
129
             (parser-state-cur-subject state)
130
             (parser-state-cur-predicate state)
131
             object)
132
           (parser-state-result state)))))
133
    (('numeric-literal ('decimal num))
134
     (let ((object
135
            (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#decimal" #f)))
136
       (update-parser-state state
137
         #:cur-object object
138
         #:result
139
         (cons
140
           (make-rdf-triple
141
             (parser-state-cur-subject state)
142
             (parser-state-cur-predicate state)
143
             object)
144
           (parser-state-result state)))))
145
    (('numeric-literal ('integer int))
146
     (let ((object
147
           (make-rdf-literal int "http://www.w3.org/2001/XMLSchema#integer" #f)))
148
       (update-parser-state state
149
         #:cur-object object
150
         #:result
151
         (cons
152
           (make-rdf-triple
153
             (parser-state-cur-subject state)
154
             (parser-state-cur-predicate state)
155
             object)
156
           (parser-state-result state)))))
157
    (('numeric-literal ('double num))
158
     (let ((object
159
            (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#double" #f)))
160
       (update-parser-state state
161
         #:cur-object object
162
         #:result
163
         (cons
164
           (make-rdf-triple
165
             (parser-state-cur-subject state)
166
             (parser-state-cur-predicate state)
167
             object)
168
           (parser-state-result state)))))
169
    (('boolean-literal bool)
170
     (let ((object
171
             (make-rdf-literal bool "http://www.w3.org/2001/XMLSchema#boolean" #f)))
172
       (update-parser-state state
173
         #:cur-object object
174
         #:result
175
         (cons
176
           (make-rdf-triple
177
             (parser-state-cur-subject state)
178
             (parser-state-cur-predicate state)
179
             object)
180
           (parser-state-result state)))))
181
    (('blank-node ('anon _))
182
     (let ((node ((parser-state-blank-node-gen state))))
183
       (update-parser-state state
184
         #:cur-object node
185
         #:result
186
         (cons
187
           (make-rdf-triple
188
             (parser-state-cur-subject state)
189
             (parser-state-cur-predicate state)
190
             node)
191
           (parser-state-result state)))))
192
    (('blank-node ('blank-node-label label))
193
     (let* ((node
194
              (or (assoc-ref (parser-state-bnode-labels state) label)
195
                  ((parser-state-blank-node-gen state)))))
196
       (update-parser-state state
197
         #:cur-object node
198
         #:result
199
         (cons
200
           (make-rdf-triple
201
             (parser-state-cur-subject state)
202
             (parser-state-cur-predicate state)
203
             node)
204
           (parser-state-result state)))))
205
    (('blank-node-property-list ('predicate-object-list po ...))
206
     (let* ((node ((parser-state-blank-node-gen state)))
207
            (new-state (parse-predicate-object
208
                         po (update-parser-state state #:cur-subject node))))
209
       (update-parser-state new-state
210
         #:cur-object node
211
         #:cur-subject (parser-state-cur-subject state)
212
         #:cur-predicate (parser-state-cur-predicate state)
213
         #:result
214
         (cons
215
           (make-rdf-triple
216
             (parser-state-cur-subject state)
217
             (parser-state-cur-predicate state)
218
             node)
219
           (parser-state-result new-state)))))
220
    ('collection
221
     (let ((object "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"))
222
       (update-parser-state state
223
         #:cur-object object
224
         #:result
225
         (cons
226
           (make-rdf-triple
227
             (parser-state-cur-subject state)
228
             (parser-state-cur-predicate state)
229
             object)
230
           (parser-state-result state)))))
231
    (('collection objects ...)
232
     (let ((state (parse-collection objects state)))
233
       (update-parser-state state
234
         #:result
235
         (cons
236
           (make-rdf-triple
237
             (parser-state-cur-subject state)
238
             (parser-state-cur-predicate state)
239
             (parser-state-cur-object state))
240
           (parser-state-result state)))))
241
    (('iri _)
242
     (let* ((res (parse-iri object state))
243
            (iri (assoc-ref res "iri"))
244
            (state (assoc-ref res "state")))
245
       (update-parser-state state
246
         #:cur-object iri
247
         #:result
248
         (cons
249
           (make-rdf-triple
250
             (parser-state-cur-subject state)
251
             (parser-state-cur-predicate state)
252
             iri)
253
           (parser-state-result state)))))))
254
255
(define (parse-collection collection state)
256
  (let ((node ((parser-state-blank-node-gen state))))
257
    (let loop ((objects collection)
258
               (new-state
259
                 (update-parser-state state
260
                   #:cur-subject node
261
                   #:cur-predicate
262
                   "http://www.w3.org/1999/02/22-rdf-syntax-ns#first"))
263
               (previous-object #nil))
264
      (match objects
265
        ('()
266
         (update-parser-state new-state
267
           #:cur-object node
268
           #:cur-subject (parser-state-cur-subject state)
269
           #:cur-predicate (parser-state-cur-predicate state)
270
           #:result
271
           (cons
272
             (make-rdf-triple
273
               previous-object
274
               "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
275
               "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
276
             (parser-state-result new-state))))
277
        ((('object object) objects ...)
278
         (if (equal? previous-object #nil)
279
             (let ((new-state (parse-object object new-state)))
280
               (loop objects new-state node))
281
             (let* ((node ((parser-state-blank-node-gen new-state)))
282
                    (new-state
283
                      (update-parser-state new-state
284
                        #:cur-subject node))
285
                    (new-state (parse-object object new-state)))
286
               (loop
287
                 objects
288
                 (update-parser-state new-state
289
                   #:result
290
                   (cons
291
                     (make-rdf-triple
292
                       previous-object
293
                       "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
294
                       node)
295
                     (parser-state-result new-state)))
296
                 node))))))))
297
298
(define (parse-object-list ol state)
299
  (let loop ((ol ol) (state state))
300
    (match ol
301
      ('() state)
302
      ((('object object) ol ...)
303
       (loop ol (parse-object object state)))
304
      ((ol)
305
       (loop ol state)))))
306
307
(define (parse-predicate-object po state)
308
  (let loop ((po po) (state state))
309
    (match po
310
      ((('verb verb) ('object-list ol ...) po)
311
       (let* ((verb (parse-verb verb state))
312
              (state (assoc-ref verb "state"))
313
              (verb (assoc-ref verb "verb"))
314
              (new-state (update-parser-state state #:cur-predicate verb))
315
              (res (parse-object-list ol new-state)))
316
         (loop po res)))
317
      ((('verb verb) ('object-list ol ...))
318
       (let* ((verb (parse-verb verb state))
319
              (state (assoc-ref verb "state"))
320
              (verb (assoc-ref verb "verb"))
321
              (new-state (update-parser-state state #:cur-predicate verb))
322
              (res (parse-object-list ol new-state)))
323
         res))
324
      (((('verb verb) ('object-list ol ...)) po ...)
325
       (let* ((verb (parse-verb verb state))
326
              (state (assoc-ref verb "state"))
327
              (verb (assoc-ref verb "verb"))
328
              (new-state (update-parser-state state #:cur-predicate verb))
329
              (res (parse-object-list ol new-state)))
330
         (loop po res)))
331
      ('() state)
332
      ((po)
333
       (loop po state)))))
334
335
(define (parse-subject s state)
336
  (match s
337
    (('iri _ ...)
338
     (let ((res (parse-iri s state)))
339
       `(("subject" . ,(assoc-ref res "iri"))
340
         ("state" . ,(assoc-ref res "state")))))
341
    (('collection objects ...)
342
     (let ((res (parse-collection objects state)))
343
       `(("subject" . ,(parser-state-cur-object res))
344
         ("state" . ,res))))
345
    ('collection
346
     `(("subject" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
347
       ("state" . ,state)))
348
    (('blank-node ('anon _))
349
     (let ((node ((parser-state-blank-node-gen state))))
350
       `(("subject" . ,node)
351
         ("state" . ,state))))
352
    (('blank-node ('blank-node-label label))
353
     (if (assoc-ref (parser-state-bnode-labels state) label)
354
         `(("subject" . ,(assoc-ref (parser-state-bnode-labels state) label))
355
           ("state" . ,state))
356
         (let ((node ((parser-state-blank-node-gen state))))
357
           `(("subject" . ,node)
358
             ("state" . ,(update-parser-state state
359
                           #:bnode-labels
360
                           (cons
361
                             (cons label node)
362
                             (parser-state-bnode-labels state))))))))))
363
364
(define (parse-triples t state)
365
  (match t
366
    ((('subject subject) ('predicate-object-list predicate-object ...))
367
     (let* ((res (parse-subject subject state))
368
            (subject (assoc-ref res "subject"))
369
            (state (assoc-ref res "state"))
370
            (state (update-parser-state state
371
                    #:cur-subject subject)))
372
       (parse-predicate-object predicate-object state)))
373
    ((('blank-node-property-list ('predicate-object-list po ...))
374
      ('predicate-object-list predicate-object ...))
375
     (let* ((subject ((parser-state-blank-node-gen state)))
376
            (new-state (parse-predicate-object
377
                         po (update-parser-state state #:cur-subject subject))))
378
       (parse-predicate-object predicate-object new-state)))
379
    ((('blank-node-property-list ('predicate-object-list po ...)))
380
     (let* ((subject ((parser-state-blank-node-gen state))))
381
       (parse-predicate-object po (update-parser-state state
382
                                    #:cur-subject subject))))))
383
384
(define (parse-turtle-doc parse-tree state)
385
  (let loop ((parse-tree parse-tree) (state state))
386
    (match parse-tree
387
      ('() (parser-state-result state))
388
      ((('prefix-id ('pname-ns ns) ('iriref iri)) parse-tree ...)
389
       (loop parse-tree
390
             (add-ns-to-state
391
               state ns (resolve-iri (parser-state-base-uri state) iri))))
392
      ((('prefix-id ('pname-ns ('iriref iri))) parse-tree ...)
393
       (loop parse-tree
394
             (add-ns-to-state
395
               state "" (resolve-iri (parser-state-base-uri state) iri))))
396
      ((('sparql-prefix ('pname-ns ns) ('iriref iri)) parse-tree ...)
397
       (loop parse-tree
398
             (add-ns-to-state
399
               state ns (resolve-iri (parser-state-base-uri state) iri))))
400
      ((('sparql-prefix ('pname-ns ('iriref iri))) parse-tree ...)
401
       (loop parse-tree
402
             (add-ns-to-state
403
               state "" (resolve-iri (parser-state-base-uri state) iri))))
404
      ((('base ('iriref iri)) parse-tree ...)
405
       (loop parse-tree
406
             (update-parser-state
407
               state #:base-uri (resolve-iri (parser-state-base-uri state) iri))))
408
      ((('sparql-base ('iriref iri)) parse-tree ...)
409
       (loop parse-tree
410
             (update-parser-state
411
               state #:base-uri (resolve-iri iri (parser-state-base-uri state)))))
412
      ((('triples t ...) parse-tree ...)
413
       (let ((res (parse-triples t state)))
414
         (loop parse-tree (parse-triples t state))))
415
      ;; otherwise, it's a single element, not a list of statements
416
      (((? symbol? _) _ ...) (loop (list parse-tree) state)))))
417
418
(define (tordf parse-tree base)
419
  (define state
420
    (make-parser-state base '() '() #f #f #f (create-generate-blank-node) '()))
421
  (parse-turtle-doc parse-tree state))
422
423
(define (turtle->rdf str-or-file base)
424
  (define str
425
    (cond
426
      ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all))
427
      ((string? str-or-file) str-or-file)))
428
      
429
  (let ((parse-tree (parse-turtle str)))
430
    (tordf parse-tree base)))
431