guile-rdf/turtle/tordf.scm

tordf.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (turtle tordf)
19
  #:use-module (ice-9 match)
20
  #:use-module (ice-9 textual-ports)
21
  #:use-module (iri iri)
22
  #:use-module (turtle parser)
23
  #:use-module (srfi srfi-9)
24
  #:use-module (rdf rdf)
25
  #:export (turtle->rdf))
26
27
(define-record-type parser-state
28
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
29
                     cur-object blank-node-gen result)
30
  parser-state?
31
  (base-uri       parser-state-base-uri)
32
  (namespaces     parser-state-namespaces)
33
  (bnode-labels   parser-state-bnode-labels)
34
  (cur-subject    parser-state-cur-subject)
35
  (cur-predicate  parser-state-cur-predicate)
36
  (cur-object     parser-state-cur-object)
37
  (blank-node-gen parser-state-blank-node-gen)
38
  (result         parser-state-result))
39
40
(define* (update-parser-state
41
           state #:key (base-uri (parser-state-base-uri state))
42
                       (namespaces (parser-state-namespaces state))
43
                       (bnode-labels (parser-state-bnode-labels state))
44
                       (cur-subject (parser-state-cur-subject state))
45
                       (cur-predicate (parser-state-cur-predicate state))
46
                       (cur-object (parser-state-cur-object state))
47
                       (blank-node-gen (parser-state-blank-node-gen state))
48
                       (result (parser-state-result state)))
49
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
50
                     cur-object blank-node-gen result))
51
52
(define (create-generate-blank-node)
53
  (define num 0)
54
  (lambda ()
55
    (set! num (+ num 1))
56
    num))
57
58
(define (add-ns-to-state state ns iri)
59
  (update-parser-state state
60
    #:namespaces (cons (cons ns iri) (parser-state-namespaces state))))
61
62
(define (parse-string str)
63
  (match str
64
    ((? string? str) str)
65
    ((component str ...)
66
     (match component
67
       ((? string? str1)
68
        (string-append str1 (parse-string str)))
69
       (('uchar n)
70
        (string-append (string (integer->char (string->number n 16)))
71
                       (parse-string str)))
72
       (('echar e)
73
        (string-append
74
          (match e
75
            ("\\t" "\t")
76
            ("\\b" "\b")
77
            ("\\n" "\n")
78
            ("\\r" "\r")
79
            ("\\f" "\f")
80
            ("\\\\" "\\")
81
            ("\\\"" "\"")
82
            ("\\'" "'"))
83
          (parse-string str)))))
84
    (() "")))
85
86
(define (parse-iri iri state)
87
  (match iri
88
    (('iri ('prefixed-name ('pname-ln ('pname-ns ns) ('pn-local suffix))))
89
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns)
90
                                suffix))
91
       ("state" . ,state)))
92
    (('iri ('prefixed-name ('pname-ln ('pname-ns ('pn-local suffix)))))
93
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "")
94
                                suffix))
95
       ("state" . ,state)))
96
    (('iri ('prefixed-name ('pname-ns ns)))
97
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ns))
98
       ("state" . ,state)))
99
    (('iri ('prefixed-name 'pname-ns))
100
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ""))
101
       ("state" . ,state)))
102
    (('iri 'iriref)
103
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) ""))
104
       ("state" . ,state)))
105
    (('iri ('iriref iri ...))
106
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) (parse-string iri)))
107
       ("state" . ,state)))))
108
109
(define (parse-verb verb state)
110
  (match verb
111
    ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
112
           ("state" . ,state)))
113
    (('predicate iri)
114
     (let ((res (parse-iri iri state)))
115
       `(("verb" . ,(assoc-ref res "iri"))
116
         ("state" . ,(assoc-ref res "state")))))))
117
118
(define (parse-object object state)
119
  (match object
120
    (('rdf-literal ('string-pat (_ str ...)))
121
     (let ((object
122
             (make-rdf-literal (parse-string str)
123
                               "http://www.w3.org/2001/XMLSchema#string" #f)))
124
       (update-parser-state state
125
         #:cur-object object
126
         #:result
127
         (cons
128
           (make-rdf-triple
129
             (parser-state-cur-subject state)
130
             (parser-state-cur-predicate state)
131
             object)
132
           (parser-state-result state)))))
133
    (('rdf-literal ('string-pat (_ str ...)) ("^^" iri))
134
     (let* ((res (parse-iri iri state))
135
            (iri (assoc-ref res "iri"))
136
            (state (assoc-ref res "state"))
137
            (object (make-rdf-literal (parse-string str) iri #f)))
138
       (update-parser-state state
139
         #:cur-object object
140
         #:result
141
         (cons
142
           (make-rdf-triple
143
             (parser-state-cur-subject state)
144
             (parser-state-cur-predicate state)
145
             object)
146
           (parser-state-result state)))))
147
    (('rdf-literal ('string-pat (_ str ...)) ('langtag lang))
148
     (let ((object
149
           (make-rdf-literal
150
             (parse-string str)
151
             "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
152
             lang)))
153
       (update-parser-state state
154
         #:cur-object object
155
         #:result
156
         (cons
157
           (make-rdf-triple
158
             (parser-state-cur-subject state)
159
             (parser-state-cur-predicate state)
160
             object)
161
           (parser-state-result state)))))
162
    (('rdf-literal ('string-pat _) ("^^" iri))
163
     (let* ((res (parse-iri iri state))
164
            (iri (assoc-ref res "iri"))
165
            (state (assoc-ref res "state"))
166
            (object (make-rdf-literal "" iri #f)))
167
       (update-parser-state state
168
         #:cur-object object
169
         #:result
170
         (cons
171
           (make-rdf-triple
172
             (parser-state-cur-subject state)
173
             (parser-state-cur-predicate state)
174
             object)
175
           (parser-state-result state)))))
176
    (('rdf-literal ('string-pat _) ('langtag lang))
177
     (let ((object
178
           (make-rdf-literal
179
             ""
180
             "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
181
             lang)))
182
       (update-parser-state state
183
         #:cur-object object
184
         #:result
185
         (cons
186
           (make-rdf-triple
187
             (parser-state-cur-subject state)
188
             (parser-state-cur-predicate state)
189
             object)
190
           (parser-state-result state)))))
191
    (('rdf-literal ('string-pat _))
192
     (let ((object
193
           (make-rdf-literal
194
             ""
195
             "http://www.w3.org/1999/02/22-rdf-syntax-ns#string"
196
             #f)))
197
       (update-parser-state state
198
         #:cur-object object
199
         #:result
200
         (cons
201
           (make-rdf-triple
202
             (parser-state-cur-subject state)
203
             (parser-state-cur-predicate state)
204
             object)
205
           (parser-state-result state)))))
206
    (('numeric-literal ('decimal num))
207
     (let ((object
208
            (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#decimal" #f)))
209
       (update-parser-state state
210
         #:cur-object object
211
         #:result
212
         (cons
213
           (make-rdf-triple
214
             (parser-state-cur-subject state)
215
             (parser-state-cur-predicate state)
216
             object)
217
           (parser-state-result state)))))
218
    (('numeric-literal ('integer int))
219
     (let ((object
220
           (make-rdf-literal int "http://www.w3.org/2001/XMLSchema#integer" #f)))
221
       (update-parser-state state
222
         #:cur-object object
223
         #:result
224
         (cons
225
           (make-rdf-triple
226
             (parser-state-cur-subject state)
227
             (parser-state-cur-predicate state)
228
             object)
229
           (parser-state-result state)))))
230
    (('numeric-literal ('double num))
231
     (let ((object
232
            (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#double" #f)))
233
       (update-parser-state state
234
         #:cur-object object
235
         #:result
236
         (cons
237
           (make-rdf-triple
238
             (parser-state-cur-subject state)
239
             (parser-state-cur-predicate state)
240
             object)
241
           (parser-state-result state)))))
242
    (('boolean-literal bool)
243
     (let ((object
244
             (make-rdf-literal bool "http://www.w3.org/2001/XMLSchema#boolean" #f)))
245
       (update-parser-state state
246
         #:cur-object object
247
         #:result
248
         (cons
249
           (make-rdf-triple
250
             (parser-state-cur-subject state)
251
             (parser-state-cur-predicate state)
252
             object)
253
           (parser-state-result state)))))
254
    (('blank-node ('anon _))
255
     (let ((node ((parser-state-blank-node-gen state))))
256
       (update-parser-state state
257
         #:cur-object node
258
         #:result
259
         (cons
260
           (make-rdf-triple
261
             (parser-state-cur-subject state)
262
             (parser-state-cur-predicate state)
263
             node)
264
           (parser-state-result state)))))
265
    (('blank-node ('blank-node-label label))
266
     (let* ((node
267
              (or (assoc-ref (parser-state-bnode-labels state) label)
268
                  ((parser-state-blank-node-gen state))))
269
            (state
270
              (if (assoc-ref (parser-state-bnode-labels state) label)
271
                  state
272
                  (update-parser-state state
273
                    #:bnode-labels
274
                    (cons
275
                      (cons label node)
276
                      (parser-state-bnode-labels state))))))
277
       (update-parser-state state
278
         #:cur-object node
279
         #:result
280
         (cons
281
           (make-rdf-triple
282
             (parser-state-cur-subject state)
283
             (parser-state-cur-predicate state)
284
             node)
285
           (parser-state-result state)))))
286
    (('blank-node-property-list ('predicate-object-list po ...))
287
     (let* ((node ((parser-state-blank-node-gen state)))
288
            (new-state (parse-predicate-object
289
                         po (update-parser-state state #:cur-subject node))))
290
       (update-parser-state new-state
291
         #:cur-object node
292
         #:cur-subject (parser-state-cur-subject state)
293
         #:cur-predicate (parser-state-cur-predicate state)
294
         #:result
295
         (cons
296
           (make-rdf-triple
297
             (parser-state-cur-subject state)
298
             (parser-state-cur-predicate state)
299
             node)
300
           (parser-state-result new-state)))))
301
    ('collection
302
     (let ((object "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"))
303
       (update-parser-state state
304
         #:cur-object object
305
         #:result
306
         (cons
307
           (make-rdf-triple
308
             (parser-state-cur-subject state)
309
             (parser-state-cur-predicate state)
310
             object)
311
           (parser-state-result state)))))
312
    (('collection objects ...)
313
     (let ((state (parse-collection objects state)))
314
       (update-parser-state state
315
         #:result
316
         (cons
317
           (make-rdf-triple
318
             (parser-state-cur-subject state)
319
             (parser-state-cur-predicate state)
320
             (parser-state-cur-object state))
321
           (parser-state-result state)))))
322
    (('iri _)
323
     (let* ((res (parse-iri object state))
324
            (iri (assoc-ref res "iri"))
325
            (state (assoc-ref res "state")))
326
       (update-parser-state state
327
         #:cur-object iri
328
         #:result
329
         (cons
330
           (make-rdf-triple
331
             (parser-state-cur-subject state)
332
             (parser-state-cur-predicate state)
333
             iri)
334
           (parser-state-result state)))))))
335
336
(define (parse-collection collection state)
337
  (let ((node ((parser-state-blank-node-gen state))))
338
    (let loop ((objects collection)
339
               (new-state
340
                 (update-parser-state state
341
                   #:cur-subject node
342
                   #:cur-predicate
343
                   "http://www.w3.org/1999/02/22-rdf-syntax-ns#first"))
344
               (previous-object #nil))
345
      (match objects
346
        ('()
347
         (update-parser-state new-state
348
           #:cur-object node
349
           #:cur-subject (parser-state-cur-subject state)
350
           #:cur-predicate (parser-state-cur-predicate state)
351
           #:result
352
           (cons
353
             (make-rdf-triple
354
               previous-object
355
               "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
356
               "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
357
             (parser-state-result new-state))))
358
        ((('object object) objects ...)
359
         (if (equal? previous-object #nil)
360
             (let ((new-state (parse-object object new-state)))
361
               (loop objects new-state node))
362
             (let* ((node ((parser-state-blank-node-gen new-state)))
363
                    (new-state
364
                      (update-parser-state new-state
365
                        #:cur-subject node))
366
                    (new-state (parse-object object new-state)))
367
               (loop
368
                 objects
369
                 (update-parser-state new-state
370
                   #:result
371
                   (cons
372
                     (make-rdf-triple
373
                       previous-object
374
                       "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
375
                       node)
376
                     (parser-state-result new-state)))
377
                 node))))))))
378
379
(define (parse-object-list ol state)
380
  (let loop ((ol ol) (state state))
381
    (match ol
382
      ('() state)
383
      ((('object object) ol ...)
384
       (loop ol (parse-object object state)))
385
      ((ol)
386
       (loop ol state)))))
387
388
(define (parse-predicate-object po state)
389
  (let loop ((po po) (state state))
390
    (match po
391
      ((('verb verb) ('object-list ol ...) po)
392
       (let* ((verb (parse-verb verb state))
393
              (state (assoc-ref verb "state"))
394
              (verb (assoc-ref verb "verb"))
395
              (new-state (update-parser-state state #:cur-predicate verb))
396
              (res (parse-object-list ol new-state)))
397
         (loop po res)))
398
      ((('verb verb) ('object-list ol ...))
399
       (let* ((verb (parse-verb verb state))
400
              (state (assoc-ref verb "state"))
401
              (verb (assoc-ref verb "verb"))
402
              (new-state (update-parser-state state #:cur-predicate verb))
403
              (res (parse-object-list ol new-state)))
404
         res))
405
      (((('verb verb) ('object-list ol ...)) po ...)
406
       (let* ((verb (parse-verb verb state))
407
              (state (assoc-ref verb "state"))
408
              (verb (assoc-ref verb "verb"))
409
              (new-state (update-parser-state state #:cur-predicate verb))
410
              (res (parse-object-list ol new-state)))
411
         (loop po res)))
412
      ('() state)
413
      ((po)
414
       (loop po state)))))
415
416
(define (parse-subject s state)
417
  (match s
418
    (('iri _ ...)
419
     (let ((res (parse-iri s state)))
420
       `(("subject" . ,(assoc-ref res "iri"))
421
         ("state" . ,(assoc-ref res "state")))))
422
    (('collection objects ...)
423
     (let ((res (parse-collection objects state)))
424
       `(("subject" . ,(parser-state-cur-object res))
425
         ("state" . ,res))))
426
    ('collection
427
     `(("subject" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
428
       ("state" . ,state)))
429
    (('blank-node ('anon _))
430
     (let ((node ((parser-state-blank-node-gen state))))
431
       `(("subject" . ,node)
432
         ("state" . ,state))))
433
    (('blank-node ('blank-node-label label))
434
     (if (assoc-ref (parser-state-bnode-labels state) label)
435
         `(("subject" . ,(assoc-ref (parser-state-bnode-labels state) label))
436
           ("state" . ,state))
437
         (let ((node ((parser-state-blank-node-gen state))))
438
           `(("subject" . ,node)
439
             ("state" . ,(update-parser-state state
440
                           #:bnode-labels
441
                           (cons
442
                             (cons label node)
443
                             (parser-state-bnode-labels state))))))))))
444
445
(define (parse-triples t state)
446
  (match t
447
    ((('subject subject) ('predicate-object-list predicate-object ...))
448
     (let* ((res (parse-subject subject state))
449
            (subject (assoc-ref res "subject"))
450
            (state (assoc-ref res "state"))
451
            (state (update-parser-state state
452
                    #:cur-subject subject)))
453
       (parse-predicate-object predicate-object state)))
454
    ((('blank-node-property-list ('predicate-object-list po ...))
455
      ('predicate-object-list predicate-object ...))
456
     (let* ((subject ((parser-state-blank-node-gen state)))
457
            (new-state (parse-predicate-object
458
                         po (update-parser-state state #:cur-subject subject))))
459
       (parse-predicate-object predicate-object new-state)))
460
    ((('blank-node-property-list ('predicate-object-list po ...)))
461
     (let* ((subject ((parser-state-blank-node-gen state))))
462
       (parse-predicate-object po (update-parser-state state
463
                                    #:cur-subject subject))))))
464
465
(define (parse-turtle-doc parse-tree state)
466
  (let loop ((parse-tree parse-tree) (state state))
467
    (match parse-tree
468
      ('() (parser-state-result state))
469
      ((('prefix-id ('pname-ns ns) ('iriref iri ...)) parse-tree ...)
470
       (loop parse-tree
471
             (add-ns-to-state
472
               state ns (resolve-iri (parser-state-base-uri state)
473
                                     (parse-string iri)))))
474
      ((('prefix-id ('pname-ns ('iriref iri ...))) parse-tree ...)
475
       (loop parse-tree
476
             (add-ns-to-state
477
               state "" (resolve-iri (parser-state-base-uri state)
478
                                     (parse-string iri)))))
479
      ((('sparql-prefix ('pname-ns ns) ('iriref iri ...)) parse-tree ...)
480
       (loop parse-tree
481
             (add-ns-to-state
482
               state ns (resolve-iri (parser-state-base-uri state)
483
                                     (parse-string iri)))))
484
      ((('sparql-prefix ('pname-ns ('iriref iri ...))) parse-tree ...)
485
       (loop parse-tree
486
             (add-ns-to-state
487
               state "" (resolve-iri (parser-state-base-uri state)
488
                                     (parse-string iri)))))
489
      ((('base ('iriref iri ...)) parse-tree ...)
490
       (loop parse-tree
491
             (update-parser-state
492
               state #:base-uri (resolve-iri (parser-state-base-uri state)
493
                                             (parse-string iri)))))
494
      ((('sparql-base ('iriref iri ...)) parse-tree ...)
495
       (loop parse-tree
496
             (update-parser-state
497
               state #:base-uri (resolve-iri (parser-state-base-uri state)
498
                                             (parse-string iri)))))
499
      ((('triples t ...) parse-tree ...)
500
       (let ((res (parse-triples t state)))
501
         (loop parse-tree (parse-triples t state))))
502
      ;; otherwise, it's a single element, not a list of statements
503
      (((? symbol? _) _ ...) (loop (list parse-tree) state)))))
504
505
(define (tordf parse-tree base)
506
  (define state
507
    (make-parser-state base '() '() #f #f #f (create-generate-blank-node) '()))
508
  (parse-turtle-doc parse-tree state))
509
510
(define (turtle->rdf str-or-file base)
511
  (define str
512
    (cond
513
      ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all))
514
      ((string? str-or-file) str-or-file)))
515
      
516
  (let ((parse-tree (parse-turtle str)))
517
    (tordf parse-tree base)))
518