guile-rdf/turtle/tordf.scm

tordf.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (turtle tordf)
19
  #:use-module (ice-9 match)
20
  #:use-module (ice-9 textual-ports)
21
  #:use-module (iri iri)
22
  #:use-module (turtle parser)
23
  #:use-module (srfi srfi-9)
24
  #:use-module (rdf rdf)
25
  #:export (turtle->rdf))
26
27
(define-record-type parser-state
28
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
29
                     cur-object blank-node-gen result)
30
  parser-state?
31
  (base-uri       parser-state-base-uri)
32
  (namespaces     parser-state-namespaces)
33
  (bnode-labels   parser-state-bnode-labels)
34
  (cur-subject    parser-state-cur-subject)
35
  (cur-predicate  parser-state-cur-predicate)
36
  (cur-object     parser-state-cur-object)
37
  (blank-node-gen parser-state-blank-node-gen)
38
  (result         parser-state-result))
39
40
(define* (update-parser-state
41
           state #:key (base-uri (parser-state-base-uri state))
42
                       (namespaces (parser-state-namespaces state))
43
                       (bnode-labels (parser-state-bnode-labels state))
44
                       (cur-subject (parser-state-cur-subject state))
45
                       (cur-predicate (parser-state-cur-predicate state))
46
                       (cur-object (parser-state-cur-object state))
47
                       (blank-node-gen (parser-state-blank-node-gen state))
48
                       (result (parser-state-result state)))
49
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
50
                     cur-object blank-node-gen result))
51
52
(define (create-generate-blank-node)
53
  (define num 0)
54
  (lambda ()
55
    (set! num (+ num 1))
56
    num))
57
58
(define (add-ns-to-state state ns iri)
59
  (update-parser-state state
60
    #:namespaces (cons (cons ns iri) (parser-state-namespaces state))))
61
62
(define* (parse-string str #:optional for-iri?)
63
  (match str
64
    ((? string? str) str)
65
    ((component str ...)
66
     (match component
67
       ((? string? str1)
68
        (string-append str1 (parse-string str)))
69
       (('uchar n)
70
        (string-append (string (integer->char (string->number n 16)))
71
                       (parse-string str)))
72
       (('echar e)
73
        (string-append
74
          (match e
75
            ("\\t" "\t")
76
            ("\\b" "\b")
77
            ("\\n" "\n")
78
            ("\\r" "\r")
79
            ("\\f" "\f")
80
            ("\\\\" "\\")
81
            ("\\\"" "\"")
82
            ("\\'" "'"))
83
          (parse-string str)))))
84
    (() "")))
85
86
(define (valid-iri? iri)
87
  (and (not (string-any (ucs-range->char-set 0 33) iri))
88
       (not (string-any #\< iri))
89
       (not (string-any #\> iri))))
90
91
(define (parse-iri iri state)
92
  (match iri
93
    (('iri ('prefixed-name ('pname-ln ('pname-ns ns) ('pn-local suffix))))
94
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns)
95
                                suffix))
96
       ("state" . ,state)))
97
    (('iri ('prefixed-name ('pname-ln ('pname-ns ('pn-local suffix)))))
98
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "")
99
                                suffix))
100
       ("state" . ,state)))
101
    (('iri ('prefixed-name ('pname-ns ns)))
102
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ns))
103
       ("state" . ,state)))
104
    (('iri ('prefixed-name 'pname-ns))
105
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ""))
106
       ("state" . ,state)))
107
    (('iri 'iriref)
108
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) ""))
109
       ("state" . ,state)))
110
    (('iri ('iriref iri ...))
111
     (let ((iri (resolve-iri (parser-state-base-uri state) (parse-string iri))))
112
       (if (valid-iri? iri)
113
           `(("iri" . ,iri)
114
             ("state" . ,state))
115
           (throw 'invalid-iri iri))))))
116
117
(define (parse-verb verb state)
118
  (match verb
119
    ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
120
           ("state" . ,state)))
121
    (('predicate iri)
122
     (let ((res (parse-iri iri state)))
123
       `(("verb" . ,(assoc-ref res "iri"))
124
         ("state" . ,(assoc-ref res "state")))))))
125
126
(define (parse-object object state)
127
  (match object
128
    (('rdf-literal ('string-pat (_ str ...)))
129
     (let ((object
130
             (make-rdf-literal (parse-string str)
131
                               "http://www.w3.org/2001/XMLSchema#string" #f)))
132
       (update-parser-state state
133
         #:cur-object object
134
         #:result
135
         (cons
136
           (make-rdf-triple
137
             (parser-state-cur-subject state)
138
             (parser-state-cur-predicate state)
139
             object)
140
           (parser-state-result state)))))
141
    (('rdf-literal ('string-pat (_ str ...)) ("^^" iri))
142
     (let* ((res (parse-iri iri state))
143
            (iri (assoc-ref res "iri"))
144
            (state (assoc-ref res "state"))
145
            (object (make-rdf-literal (parse-string str) iri #f)))
146
       (update-parser-state state
147
         #:cur-object object
148
         #:result
149
         (cons
150
           (make-rdf-triple
151
             (parser-state-cur-subject state)
152
             (parser-state-cur-predicate state)
153
             object)
154
           (parser-state-result state)))))
155
    (('rdf-literal ('string-pat (_ str ...)) ('langtag lang))
156
     (let ((object
157
           (make-rdf-literal
158
             (parse-string str)
159
             "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
160
             lang)))
161
       (update-parser-state state
162
         #:cur-object object
163
         #:result
164
         (cons
165
           (make-rdf-triple
166
             (parser-state-cur-subject state)
167
             (parser-state-cur-predicate state)
168
             object)
169
           (parser-state-result state)))))
170
    (('rdf-literal ('string-pat _) ("^^" iri))
171
     (let* ((res (parse-iri iri state))
172
            (iri (assoc-ref res "iri"))
173
            (state (assoc-ref res "state"))
174
            (object (make-rdf-literal "" iri #f)))
175
       (update-parser-state state
176
         #:cur-object object
177
         #:result
178
         (cons
179
           (make-rdf-triple
180
             (parser-state-cur-subject state)
181
             (parser-state-cur-predicate state)
182
             object)
183
           (parser-state-result state)))))
184
    (('rdf-literal ('string-pat _) ('langtag lang))
185
     (let ((object
186
           (make-rdf-literal
187
             ""
188
             "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
189
             lang)))
190
       (update-parser-state state
191
         #:cur-object object
192
         #:result
193
         (cons
194
           (make-rdf-triple
195
             (parser-state-cur-subject state)
196
             (parser-state-cur-predicate state)
197
             object)
198
           (parser-state-result state)))))
199
    (('rdf-literal ('string-pat _))
200
     (let ((object
201
           (make-rdf-literal
202
             ""
203
             "http://www.w3.org/1999/02/22-rdf-syntax-ns#string"
204
             #f)))
205
       (update-parser-state state
206
         #:cur-object object
207
         #:result
208
         (cons
209
           (make-rdf-triple
210
             (parser-state-cur-subject state)
211
             (parser-state-cur-predicate state)
212
             object)
213
           (parser-state-result state)))))
214
    (('numeric-literal ('decimal num))
215
     (let ((object
216
            (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#decimal" #f)))
217
       (update-parser-state state
218
         #:cur-object object
219
         #:result
220
         (cons
221
           (make-rdf-triple
222
             (parser-state-cur-subject state)
223
             (parser-state-cur-predicate state)
224
             object)
225
           (parser-state-result state)))))
226
    (('numeric-literal ('integer int))
227
     (let ((object
228
           (make-rdf-literal int "http://www.w3.org/2001/XMLSchema#integer" #f)))
229
       (update-parser-state state
230
         #:cur-object object
231
         #:result
232
         (cons
233
           (make-rdf-triple
234
             (parser-state-cur-subject state)
235
             (parser-state-cur-predicate state)
236
             object)
237
           (parser-state-result state)))))
238
    (('numeric-literal ('double num))
239
     (let ((object
240
            (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#double" #f)))
241
       (update-parser-state state
242
         #:cur-object object
243
         #:result
244
         (cons
245
           (make-rdf-triple
246
             (parser-state-cur-subject state)
247
             (parser-state-cur-predicate state)
248
             object)
249
           (parser-state-result state)))))
250
    (('boolean-literal bool)
251
     (let ((object
252
             (make-rdf-literal bool "http://www.w3.org/2001/XMLSchema#boolean" #f)))
253
       (update-parser-state state
254
         #:cur-object object
255
         #:result
256
         (cons
257
           (make-rdf-triple
258
             (parser-state-cur-subject state)
259
             (parser-state-cur-predicate state)
260
             object)
261
           (parser-state-result state)))))
262
    (('blank-node ('anon _))
263
     (let ((node ((parser-state-blank-node-gen state))))
264
       (update-parser-state state
265
         #:cur-object node
266
         #:result
267
         (cons
268
           (make-rdf-triple
269
             (parser-state-cur-subject state)
270
             (parser-state-cur-predicate state)
271
             node)
272
           (parser-state-result state)))))
273
    (('blank-node ('blank-node-label label))
274
     (let* ((node
275
              (or (assoc-ref (parser-state-bnode-labels state) label)
276
                  ((parser-state-blank-node-gen state))))
277
            (state
278
              (if (assoc-ref (parser-state-bnode-labels state) label)
279
                  state
280
                  (update-parser-state state
281
                    #:bnode-labels
282
                    (cons
283
                      (cons label node)
284
                      (parser-state-bnode-labels state))))))
285
       (update-parser-state state
286
         #:cur-object node
287
         #:result
288
         (cons
289
           (make-rdf-triple
290
             (parser-state-cur-subject state)
291
             (parser-state-cur-predicate state)
292
             node)
293
           (parser-state-result state)))))
294
    (('blank-node-property-list ('predicate-object-list po ...))
295
     (let* ((node ((parser-state-blank-node-gen state)))
296
            (new-state (parse-predicate-object
297
                         po (update-parser-state state #:cur-subject node))))
298
       (update-parser-state new-state
299
         #:cur-object node
300
         #:cur-subject (parser-state-cur-subject state)
301
         #:cur-predicate (parser-state-cur-predicate state)
302
         #:result
303
         (cons
304
           (make-rdf-triple
305
             (parser-state-cur-subject state)
306
             (parser-state-cur-predicate state)
307
             node)
308
           (parser-state-result new-state)))))
309
    ('collection
310
     (let ((object "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"))
311
       (update-parser-state state
312
         #:cur-object object
313
         #:result
314
         (cons
315
           (make-rdf-triple
316
             (parser-state-cur-subject state)
317
             (parser-state-cur-predicate state)
318
             object)
319
           (parser-state-result state)))))
320
    (('collection objects ...)
321
     (let ((state (parse-collection objects state)))
322
       (update-parser-state state
323
         #:result
324
         (cons
325
           (make-rdf-triple
326
             (parser-state-cur-subject state)
327
             (parser-state-cur-predicate state)
328
             (parser-state-cur-object state))
329
           (parser-state-result state)))))
330
    (('iri _)
331
     (let* ((res (parse-iri object state))
332
            (iri (assoc-ref res "iri"))
333
            (state (assoc-ref res "state")))
334
       (update-parser-state state
335
         #:cur-object iri
336
         #:result
337
         (cons
338
           (make-rdf-triple
339
             (parser-state-cur-subject state)
340
             (parser-state-cur-predicate state)
341
             iri)
342
           (parser-state-result state)))))))
343
344
(define (parse-collection collection state)
345
  (let ((node ((parser-state-blank-node-gen state))))
346
    (let loop ((objects collection)
347
               (new-state
348
                 (update-parser-state state
349
                   #:cur-subject node
350
                   #:cur-predicate
351
                   "http://www.w3.org/1999/02/22-rdf-syntax-ns#first"))
352
               (previous-object #nil))
353
      (match objects
354
        ('()
355
         (update-parser-state new-state
356
           #:cur-object node
357
           #:cur-subject (parser-state-cur-subject state)
358
           #:cur-predicate (parser-state-cur-predicate state)
359
           #:result
360
           (cons
361
             (make-rdf-triple
362
               previous-object
363
               "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
364
               "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
365
             (parser-state-result new-state))))
366
        ((('object object) objects ...)
367
         (if (equal? previous-object #nil)
368
             (let ((new-state (parse-object object new-state)))
369
               (loop objects new-state node))
370
             (let* ((node ((parser-state-blank-node-gen new-state)))
371
                    (new-state
372
                      (update-parser-state new-state
373
                        #:cur-subject node))
374
                    (new-state (parse-object object new-state)))
375
               (loop
376
                 objects
377
                 (update-parser-state new-state
378
                   #:result
379
                   (cons
380
                     (make-rdf-triple
381
                       previous-object
382
                       "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
383
                       node)
384
                     (parser-state-result new-state)))
385
                 node))))))))
386
387
(define (parse-object-list ol state)
388
  (let loop ((ol ol) (state state))
389
    (match ol
390
      ('() state)
391
      ((('object object) ol ...)
392
       (loop ol (parse-object object state)))
393
      ((ol)
394
       (loop ol state)))))
395
396
(define (parse-predicate-object po state)
397
  (let loop ((po po) (state state))
398
    (match po
399
      ((('verb verb) ('object-list ol ...) po)
400
       (let* ((verb (parse-verb verb state))
401
              (state (assoc-ref verb "state"))
402
              (verb (assoc-ref verb "verb"))
403
              (new-state (update-parser-state state #:cur-predicate verb))
404
              (res (parse-object-list ol new-state)))
405
         (loop po res)))
406
      ((('verb verb) ('object-list ol ...))
407
       (let* ((verb (parse-verb verb state))
408
              (state (assoc-ref verb "state"))
409
              (verb (assoc-ref verb "verb"))
410
              (new-state (update-parser-state state #:cur-predicate verb))
411
              (res (parse-object-list ol new-state)))
412
         res))
413
      (((('verb verb) ('object-list ol ...)) po ...)
414
       (let* ((verb (parse-verb verb state))
415
              (state (assoc-ref verb "state"))
416
              (verb (assoc-ref verb "verb"))
417
              (new-state (update-parser-state state #:cur-predicate verb))
418
              (res (parse-object-list ol new-state)))
419
         (loop po res)))
420
      ('() state)
421
      ((po)
422
       (loop po state)))))
423
424
(define (parse-subject s state)
425
  (match s
426
    (('iri _ ...)
427
     (let ((res (parse-iri s state)))
428
       `(("subject" . ,(assoc-ref res "iri"))
429
         ("state" . ,(assoc-ref res "state")))))
430
    (('collection objects ...)
431
     (let ((res (parse-collection objects state)))
432
       `(("subject" . ,(parser-state-cur-object res))
433
         ("state" . ,res))))
434
    ('collection
435
     `(("subject" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
436
       ("state" . ,state)))
437
    (('blank-node ('anon _))
438
     (let ((node ((parser-state-blank-node-gen state))))
439
       `(("subject" . ,node)
440
         ("state" . ,state))))
441
    (('blank-node ('blank-node-label label))
442
     (if (assoc-ref (parser-state-bnode-labels state) label)
443
         `(("subject" . ,(assoc-ref (parser-state-bnode-labels state) label))
444
           ("state" . ,state))
445
         (let ((node ((parser-state-blank-node-gen state))))
446
           `(("subject" . ,node)
447
             ("state" . ,(update-parser-state state
448
                           #:bnode-labels
449
                           (cons
450
                             (cons label node)
451
                             (parser-state-bnode-labels state))))))))))
452
453
(define (parse-triples t state)
454
  (match t
455
    ((('subject subject) ('predicate-object-list predicate-object ...))
456
     (let* ((res (parse-subject subject state))
457
            (subject (assoc-ref res "subject"))
458
            (state (assoc-ref res "state"))
459
            (state (update-parser-state state
460
                    #:cur-subject subject)))
461
       (parse-predicate-object predicate-object state)))
462
    ((('blank-node-property-list ('predicate-object-list po ...))
463
      ('predicate-object-list predicate-object ...))
464
     (let* ((subject ((parser-state-blank-node-gen state)))
465
            (new-state (parse-predicate-object
466
                         po (update-parser-state state #:cur-subject subject))))
467
       (parse-predicate-object predicate-object new-state)))
468
    ((('blank-node-property-list ('predicate-object-list po ...)))
469
     (let* ((subject ((parser-state-blank-node-gen state))))
470
       (parse-predicate-object po (update-parser-state state
471
                                    #:cur-subject subject))))))
472
473
(define (parse-turtle-doc parse-tree state)
474
  (let loop ((parse-tree parse-tree) (state state))
475
    (match parse-tree
476
      ('() (parser-state-result state))
477
      ((('prefix-id ('pname-ns ns) ('iriref iri ...)) parse-tree ...)
478
       (loop parse-tree
479
             (add-ns-to-state
480
               state ns (resolve-iri (parser-state-base-uri state)
481
                                     (parse-string iri)))))
482
      ((('prefix-id ('pname-ns ('iriref iri ...))) parse-tree ...)
483
       (loop parse-tree
484
             (add-ns-to-state
485
               state "" (resolve-iri (parser-state-base-uri state)
486
                                     (parse-string iri)))))
487
      ((('sparql-prefix ('pname-ns ns) ('iriref iri ...)) parse-tree ...)
488
       (loop parse-tree
489
             (add-ns-to-state
490
               state ns (resolve-iri (parser-state-base-uri state)
491
                                     (parse-string iri)))))
492
      ((('sparql-prefix ('pname-ns ('iriref iri ...))) parse-tree ...)
493
       (loop parse-tree
494
             (add-ns-to-state
495
               state "" (resolve-iri (parser-state-base-uri state)
496
                                     (parse-string iri)))))
497
      ((('base ('iriref iri ...)) parse-tree ...)
498
       (loop parse-tree
499
             (update-parser-state
500
               state #:base-uri (resolve-iri (parser-state-base-uri state)
501
                                             (parse-string iri)))))
502
      ((('sparql-base ('iriref iri ...)) parse-tree ...)
503
       (loop parse-tree
504
             (update-parser-state
505
               state #:base-uri (resolve-iri (parser-state-base-uri state)
506
                                             (parse-string iri)))))
507
      ((('triples t ...) parse-tree ...)
508
       (let ((res (parse-triples t state)))
509
         (loop parse-tree (parse-triples t state))))
510
      ;; otherwise, it's a single element, not a list of statements
511
      (((? symbol? _) _ ...) (loop (list parse-tree) state)))))
512
513
(define (tordf parse-tree base)
514
  (define state
515
    (make-parser-state base '() '() #f #f #f (create-generate-blank-node) '()))
516
  (parse-turtle-doc parse-tree state))
517
518
(define (turtle->rdf str-or-file base)
519
  (define str
520
    (cond
521
      ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all))
522
      ((string? str-or-file) str-or-file)))
523
      
524
  (let ((parse-tree (parse-turtle str)))
525
    (tordf parse-tree base)))
526