guile-rdf/turtle/tordf.scm

tordf.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (turtle tordf)
19
  #:use-module (ice-9 match)
20
  #:use-module (ice-9 textual-ports)
21
  #:use-module (iri iri)
22
  #:use-module (turtle parser)
23
  #:use-module (srfi srfi-9)
24
  #:use-module (rdf rdf)
25
  #:export (turtle->rdf))
26
27
(define-record-type parser-state
28
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
29
                     cur-object blank-node-gen result)
30
  parser-state?
31
  (base-uri       parser-state-base-uri)
32
  (namespaces     parser-state-namespaces)
33
  (bnode-labels   parser-state-bnode-labels)
34
  (cur-subject    parser-state-cur-subject)
35
  (cur-predicate  parser-state-cur-predicate)
36
  (cur-object     parser-state-cur-object)
37
  (blank-node-gen parser-state-blank-node-gen)
38
  (result         parser-state-result))
39
40
(define* (update-parser-state
41
           state #:key (base-uri (parser-state-base-uri state))
42
                       (namespaces (parser-state-namespaces state))
43
                       (bnode-labels (parser-state-bnode-labels state))
44
                       (cur-subject (parser-state-cur-subject state))
45
                       (cur-predicate (parser-state-cur-predicate state))
46
                       (cur-object (parser-state-cur-object state))
47
                       (blank-node-gen (parser-state-blank-node-gen state))
48
                       (result (parser-state-result state)))
49
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
50
                     cur-object blank-node-gen result))
51
52
(define (create-generate-blank-node)
53
  (define num 0)
54
  (lambda ()
55
    (set! num (+ num 1))
56
    num))
57
58
(define (add-ns-to-state state ns iri)
59
  (update-parser-state state
60
    #:namespaces (cons (cons ns iri) (parser-state-namespaces state))))
61
62
(define* (parse-string str #:optional for-iri?)
63
  (match str
64
    ((? string? str) str)
65
    ((component str ...)
66
     (match component
67
       ((? string? str1)
68
        (string-append str1 (parse-string str)))
69
       (('uchar n)
70
        (string-append (string (integer->char (string->number n 16)))
71
                       (parse-string str)))
72
       (('echar e)
73
        (string-append
74
          (match e
75
            ("\\t" "\t")
76
            ("\\b" "\b")
77
            ("\\n" "\n")
78
            ("\\r" "\r")
79
            ("\\f" "\f")
80
            ("\\\\" "\\")
81
            ("\\\"" "\"")
82
            ("\\'" "'"))
83
          (parse-string str)))))
84
    (() "")))
85
86
(define (valid-iri? iri)
87
  (and (not (string-any (ucs-range->char-set 0 33) iri))
88
       (not (string-any #\< iri))
89
       (not (string-any #\> iri))))
90
91
(define (parse-iri iri state)
92
  (pk 'iri iri)
93
  (match iri
94
    (('iri ('prefixed-name ('pname-ln ('pname-ns ns) ('pn-local suffix))))
95
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns)
96
                                suffix))
97
       ("state" . ,state)))
98
    (('iri ('prefixed-name ('pname-ln ('pname-ns ('pn-local suffix)))))
99
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "")
100
                                suffix))
101
       ("state" . ,state)))
102
    (('iri ('prefixed-name ('pname-ns ns)))
103
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ns))
104
       ("state" . ,state)))
105
    (('iri ('prefixed-name 'pname-ns))
106
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ""))
107
       ("state" . ,state)))
108
    (('iri 'iriref)
109
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) ""))
110
       ("state" . ,state)))
111
    (('iri ('iriref iri ...))
112
     (let ((iri (resolve-iri (parser-state-base-uri state) (parse-string iri))))
113
       (if (valid-iri? iri)
114
           `(("iri" . ,iri)
115
             ("state" . ,state))
116
           (throw 'invalid-iri iri))))))
117
118
(define (parse-verb verb state)
119
  (match verb
120
    ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
121
           ("state" . ,state)))
122
    (('predicate iri)
123
     (let ((res (parse-iri iri state)))
124
       `(("verb" . ,(assoc-ref res "iri"))
125
         ("state" . ,(assoc-ref res "state")))))))
126
127
(define (parse-object object state)
128
  (match object
129
    (('rdf-literal ('string-pat (_ str ...)))
130
     (let ((object
131
             (make-rdf-literal (parse-string str)
132
                               "http://www.w3.org/2001/XMLSchema#string" #f)))
133
       (update-parser-state state
134
         #:cur-object object
135
         #:result
136
         (cons
137
           (make-rdf-triple
138
             (parser-state-cur-subject state)
139
             (parser-state-cur-predicate state)
140
             object)
141
           (parser-state-result state)))))
142
    (('rdf-literal ('string-pat (_ str ...)) ("^^" iri))
143
     (let* ((res (parse-iri iri state))
144
            (iri (assoc-ref res "iri"))
145
            (state (assoc-ref res "state"))
146
            (object (make-rdf-literal (parse-string str) iri #f)))
147
       (update-parser-state state
148
         #:cur-object object
149
         #:result
150
         (cons
151
           (make-rdf-triple
152
             (parser-state-cur-subject state)
153
             (parser-state-cur-predicate state)
154
             object)
155
           (parser-state-result state)))))
156
    (('rdf-literal ('string-pat (_ str ...)) ('langtag lang))
157
     (let ((object
158
           (make-rdf-literal
159
             (parse-string str)
160
             "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
161
             lang)))
162
       (update-parser-state state
163
         #:cur-object object
164
         #:result
165
         (cons
166
           (make-rdf-triple
167
             (parser-state-cur-subject state)
168
             (parser-state-cur-predicate state)
169
             object)
170
           (parser-state-result state)))))
171
    (('rdf-literal ('string-pat _) ("^^" iri))
172
     (let* ((res (parse-iri iri state))
173
            (iri (assoc-ref res "iri"))
174
            (state (assoc-ref res "state"))
175
            (object (make-rdf-literal "" iri #f)))
176
       (update-parser-state state
177
         #:cur-object object
178
         #:result
179
         (cons
180
           (make-rdf-triple
181
             (parser-state-cur-subject state)
182
             (parser-state-cur-predicate state)
183
             object)
184
           (parser-state-result state)))))
185
    (('rdf-literal ('string-pat _) ('langtag lang))
186
     (let ((object
187
           (make-rdf-literal
188
             ""
189
             "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
190
             lang)))
191
       (update-parser-state state
192
         #:cur-object object
193
         #:result
194
         (cons
195
           (make-rdf-triple
196
             (parser-state-cur-subject state)
197
             (parser-state-cur-predicate state)
198
             object)
199
           (parser-state-result state)))))
200
    (('rdf-literal ('string-pat _))
201
     (let ((object
202
           (make-rdf-literal
203
             ""
204
             "http://www.w3.org/1999/02/22-rdf-syntax-ns#string"
205
             #f)))
206
       (update-parser-state state
207
         #:cur-object object
208
         #:result
209
         (cons
210
           (make-rdf-triple
211
             (parser-state-cur-subject state)
212
             (parser-state-cur-predicate state)
213
             object)
214
           (parser-state-result state)))))
215
    (('numeric-literal ('decimal num))
216
     (let ((object
217
            (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#decimal" #f)))
218
       (update-parser-state state
219
         #:cur-object object
220
         #:result
221
         (cons
222
           (make-rdf-triple
223
             (parser-state-cur-subject state)
224
             (parser-state-cur-predicate state)
225
             object)
226
           (parser-state-result state)))))
227
    (('numeric-literal ('integer int))
228
     (let ((object
229
           (make-rdf-literal int "http://www.w3.org/2001/XMLSchema#integer" #f)))
230
       (update-parser-state state
231
         #:cur-object object
232
         #:result
233
         (cons
234
           (make-rdf-triple
235
             (parser-state-cur-subject state)
236
             (parser-state-cur-predicate state)
237
             object)
238
           (parser-state-result state)))))
239
    (('numeric-literal ('double num))
240
     (let ((object
241
            (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#double" #f)))
242
       (update-parser-state state
243
         #:cur-object object
244
         #:result
245
         (cons
246
           (make-rdf-triple
247
             (parser-state-cur-subject state)
248
             (parser-state-cur-predicate state)
249
             object)
250
           (parser-state-result state)))))
251
    (('boolean-literal bool)
252
     (let ((object
253
             (make-rdf-literal bool "http://www.w3.org/2001/XMLSchema#boolean" #f)))
254
       (update-parser-state state
255
         #:cur-object object
256
         #:result
257
         (cons
258
           (make-rdf-triple
259
             (parser-state-cur-subject state)
260
             (parser-state-cur-predicate state)
261
             object)
262
           (parser-state-result state)))))
263
    (('blank-node ('anon _))
264
     (let ((node ((parser-state-blank-node-gen state))))
265
       (update-parser-state state
266
         #:cur-object node
267
         #:result
268
         (cons
269
           (make-rdf-triple
270
             (parser-state-cur-subject state)
271
             (parser-state-cur-predicate state)
272
             node)
273
           (parser-state-result state)))))
274
    (('blank-node ('blank-node-label label))
275
     (let* ((node
276
              (or (assoc-ref (parser-state-bnode-labels state) label)
277
                  ((parser-state-blank-node-gen state))))
278
            (state
279
              (if (assoc-ref (parser-state-bnode-labels state) label)
280
                  state
281
                  (update-parser-state state
282
                    #:bnode-labels
283
                    (cons
284
                      (cons label node)
285
                      (parser-state-bnode-labels state))))))
286
       (update-parser-state state
287
         #:cur-object node
288
         #:result
289
         (cons
290
           (make-rdf-triple
291
             (parser-state-cur-subject state)
292
             (parser-state-cur-predicate state)
293
             node)
294
           (parser-state-result state)))))
295
    (('blank-node-property-list ('predicate-object-list po ...))
296
     (let* ((node ((parser-state-blank-node-gen state)))
297
            (new-state (parse-predicate-object
298
                         po (update-parser-state state #:cur-subject node))))
299
       (update-parser-state new-state
300
         #:cur-object node
301
         #:cur-subject (parser-state-cur-subject state)
302
         #:cur-predicate (parser-state-cur-predicate state)
303
         #:result
304
         (cons
305
           (make-rdf-triple
306
             (parser-state-cur-subject state)
307
             (parser-state-cur-predicate state)
308
             node)
309
           (parser-state-result new-state)))))
310
    ('collection
311
     (let ((object "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"))
312
       (update-parser-state state
313
         #:cur-object object
314
         #:result
315
         (cons
316
           (make-rdf-triple
317
             (parser-state-cur-subject state)
318
             (parser-state-cur-predicate state)
319
             object)
320
           (parser-state-result state)))))
321
    (('collection objects ...)
322
     (let ((state (parse-collection objects state)))
323
       (update-parser-state state
324
         #:result
325
         (cons
326
           (make-rdf-triple
327
             (parser-state-cur-subject state)
328
             (parser-state-cur-predicate state)
329
             (parser-state-cur-object state))
330
           (parser-state-result state)))))
331
    (('iri _)
332
     (let* ((res (parse-iri object state))
333
            (iri (assoc-ref res "iri"))
334
            (state (assoc-ref res "state")))
335
       (update-parser-state state
336
         #:cur-object iri
337
         #:result
338
         (cons
339
           (make-rdf-triple
340
             (parser-state-cur-subject state)
341
             (parser-state-cur-predicate state)
342
             iri)
343
           (parser-state-result state)))))))
344
345
(define (parse-collection collection state)
346
  (let ((node ((parser-state-blank-node-gen state))))
347
    (let loop ((objects collection)
348
               (new-state
349
                 (update-parser-state state
350
                   #:cur-subject node
351
                   #:cur-predicate
352
                   "http://www.w3.org/1999/02/22-rdf-syntax-ns#first"))
353
               (previous-object #nil))
354
      (match objects
355
        ('()
356
         (update-parser-state new-state
357
           #:cur-object node
358
           #:cur-subject (parser-state-cur-subject state)
359
           #:cur-predicate (parser-state-cur-predicate state)
360
           #:result
361
           (cons
362
             (make-rdf-triple
363
               previous-object
364
               "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
365
               "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
366
             (parser-state-result new-state))))
367
        ((('object object) objects ...)
368
         (if (equal? previous-object #nil)
369
             (let ((new-state (parse-object object new-state)))
370
               (loop objects new-state node))
371
             (let* ((node ((parser-state-blank-node-gen new-state)))
372
                    (new-state
373
                      (update-parser-state new-state
374
                        #:cur-subject node))
375
                    (new-state (parse-object object new-state)))
376
               (loop
377
                 objects
378
                 (update-parser-state new-state
379
                   #:result
380
                   (cons
381
                     (make-rdf-triple
382
                       previous-object
383
                       "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
384
                       node)
385
                     (parser-state-result new-state)))
386
                 node))))))))
387
388
(define (parse-object-list ol state)
389
  (let loop ((ol ol) (state state))
390
    (match ol
391
      ('() state)
392
      ((('object object) ol ...)
393
       (loop ol (parse-object object state)))
394
      ((ol)
395
       (loop ol state)))))
396
397
(define (parse-predicate-object po state)
398
  (let loop ((po po) (state state))
399
    (match po
400
      ((('verb verb) ('object-list ol ...) po)
401
       (let* ((verb (parse-verb verb state))
402
              (state (assoc-ref verb "state"))
403
              (verb (assoc-ref verb "verb"))
404
              (new-state (update-parser-state state #:cur-predicate verb))
405
              (res (parse-object-list ol new-state)))
406
         (loop po res)))
407
      ((('verb verb) ('object-list ol ...))
408
       (let* ((verb (parse-verb verb state))
409
              (state (assoc-ref verb "state"))
410
              (verb (assoc-ref verb "verb"))
411
              (new-state (update-parser-state state #:cur-predicate verb))
412
              (res (parse-object-list ol new-state)))
413
         res))
414
      (((('verb verb) ('object-list ol ...)) po ...)
415
       (let* ((verb (parse-verb verb state))
416
              (state (assoc-ref verb "state"))
417
              (verb (assoc-ref verb "verb"))
418
              (new-state (update-parser-state state #:cur-predicate verb))
419
              (res (parse-object-list ol new-state)))
420
         (loop po res)))
421
      ('() state)
422
      ((po)
423
       (loop po state)))))
424
425
(define (parse-subject s state)
426
  (match s
427
    (('iri _ ...)
428
     (let ((res (parse-iri s state)))
429
       `(("subject" . ,(assoc-ref res "iri"))
430
         ("state" . ,(assoc-ref res "state")))))
431
    (('collection objects ...)
432
     (let ((res (parse-collection objects state)))
433
       `(("subject" . ,(parser-state-cur-object res))
434
         ("state" . ,res))))
435
    ('collection
436
     `(("subject" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
437
       ("state" . ,state)))
438
    (('blank-node ('anon _))
439
     (let ((node ((parser-state-blank-node-gen state))))
440
       `(("subject" . ,node)
441
         ("state" . ,state))))
442
    (('blank-node ('blank-node-label label))
443
     (if (assoc-ref (parser-state-bnode-labels state) label)
444
         `(("subject" . ,(assoc-ref (parser-state-bnode-labels state) label))
445
           ("state" . ,state))
446
         (let ((node ((parser-state-blank-node-gen state))))
447
           `(("subject" . ,node)
448
             ("state" . ,(update-parser-state state
449
                           #:bnode-labels
450
                           (cons
451
                             (cons label node)
452
                             (parser-state-bnode-labels state))))))))))
453
454
(define (parse-triples t state)
455
  (match t
456
    ((('subject subject) ('predicate-object-list predicate-object ...))
457
     (let* ((res (parse-subject subject state))
458
            (subject (assoc-ref res "subject"))
459
            (state (assoc-ref res "state"))
460
            (state (update-parser-state state
461
                    #:cur-subject subject)))
462
       (parse-predicate-object predicate-object state)))
463
    ((('blank-node-property-list ('predicate-object-list po ...))
464
      ('predicate-object-list predicate-object ...))
465
     (let* ((subject ((parser-state-blank-node-gen state)))
466
            (new-state (parse-predicate-object
467
                         po (update-parser-state state #:cur-subject subject))))
468
       (parse-predicate-object predicate-object new-state)))
469
    ((('blank-node-property-list ('predicate-object-list po ...)))
470
     (let* ((subject ((parser-state-blank-node-gen state))))
471
       (parse-predicate-object po (update-parser-state state
472
                                    #:cur-subject subject))))))
473
474
(define (parse-turtle-doc parse-tree state)
475
  (let loop ((parse-tree parse-tree) (state state))
476
    (match parse-tree
477
      ('() (parser-state-result state))
478
      ((('prefix-id ('pname-ns ns) ('iriref iri ...)) parse-tree ...)
479
       (loop parse-tree
480
             (add-ns-to-state
481
               state ns (resolve-iri (parser-state-base-uri state)
482
                                     (parse-string iri)))))
483
      ((('prefix-id ('pname-ns ('iriref iri ...))) parse-tree ...)
484
       (loop parse-tree
485
             (add-ns-to-state
486
               state "" (resolve-iri (parser-state-base-uri state)
487
                                     (parse-string iri)))))
488
      ((('sparql-prefix ('pname-ns ns) ('iriref iri ...)) parse-tree ...)
489
       (loop parse-tree
490
             (add-ns-to-state
491
               state ns (resolve-iri (parser-state-base-uri state)
492
                                     (parse-string iri)))))
493
      ((('sparql-prefix ('pname-ns ('iriref iri ...))) parse-tree ...)
494
       (loop parse-tree
495
             (add-ns-to-state
496
               state "" (resolve-iri (parser-state-base-uri state)
497
                                     (parse-string iri)))))
498
      ((('base ('iriref iri ...)) parse-tree ...)
499
       (loop parse-tree
500
             (update-parser-state
501
               state #:base-uri (resolve-iri (parser-state-base-uri state)
502
                                             (parse-string iri)))))
503
      ((('sparql-base ('iriref iri ...)) parse-tree ...)
504
       (loop parse-tree
505
             (update-parser-state
506
               state #:base-uri (resolve-iri (parser-state-base-uri state)
507
                                             (parse-string iri)))))
508
      ((('triples t ...) parse-tree ...)
509
       (let ((res (parse-triples t state)))
510
         (loop parse-tree (parse-triples t state))))
511
      ;; otherwise, it's a single element, not a list of statements
512
      (((? symbol? _) _ ...) (loop (list parse-tree) state)))))
513
514
(define (tordf parse-tree base)
515
  (define state
516
    (make-parser-state base '() '() #f #f #f (create-generate-blank-node) '()))
517
  (parse-turtle-doc parse-tree state))
518
519
(define (turtle->rdf str-or-file base)
520
  (define str
521
    (cond
522
      ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all))
523
      ((string? str-or-file) str-or-file)))
524
      
525
  (let ((parse-tree (parse-turtle str)))
526
    (tordf parse-tree base)))
527