guile-rdf/turtle/tordf.scm

tordf.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (turtle tordf)
19
  #:use-module (ice-9 match)
20
  #:use-module (ice-9 textual-ports)
21
  #:use-module (iri iri)
22
  #:use-module (turtle parser)
23
  #:use-module (srfi srfi-9)
24
  #:use-module (rdf rdf)
25
  #:export (turtle->rdf))
26
27
(define-record-type parser-state
28
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
29
                     cur-object blank-node-gen result)
30
  parser-state?
31
  (base-uri       parser-state-base-uri)
32
  (namespaces     parser-state-namespaces)
33
  (bnode-labels   parser-state-bnode-labels)
34
  (cur-subject    parser-state-cur-subject)
35
  (cur-predicate  parser-state-cur-predicate)
36
  (cur-object     parser-state-cur-object)
37
  (blank-node-gen parser-state-blank-node-gen)
38
  (result         parser-state-result))
39
40
(define* (update-parser-state
41
           state #:key (base-uri (parser-state-base-uri state))
42
                       (namespaces (parser-state-namespaces state))
43
                       (bnode-labels (parser-state-bnode-labels state))
44
                       (cur-subject (parser-state-cur-subject state))
45
                       (cur-predicate (parser-state-cur-predicate state))
46
                       (cur-object (parser-state-cur-object state))
47
                       (blank-node-gen (parser-state-blank-node-gen state))
48
                       (result (parser-state-result state)))
49
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
50
                     cur-object blank-node-gen result))
51
52
(define (create-generate-blank-node)
53
  (define num 0)
54
  (lambda ()
55
    (set! num (+ num 1))
56
    num))
57
58
(define (add-ns-to-state state ns iri)
59
  (update-parser-state state
60
    #:namespaces (cons (cons ns iri) (parser-state-namespaces state))))
61
62
(define (parse-string str)
63
  (match str
64
    ((? string? str) str)
65
    ((component str ...)
66
     (match component
67
       ((? string? str1)
68
        (string-append str1 (parse-string str)))
69
       (('uchar n)
70
        (string-append (string (integer->char (string->number n 16)))
71
                       (parse-string str)))
72
       (('echar e)
73
        (string-append
74
          (match e
75
            ("\\t" "\t")
76
            ("\\b" "\b")
77
            ("\\n" "\n")
78
            ("\\r" "\r")
79
            ("\\f" "\f")
80
            ("\\\\" "\\")
81
            ("\\\"" "\"")
82
            ("\\'" "'"))
83
          (parse-string str)))))
84
    (() "")))
85
86
(define (parse-iri iri state)
87
  (match iri
88
    (('iri ('prefixed-name ('pname-ln ('pname-ns ns) ('pn-local suffix))))
89
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns)
90
                                suffix))
91
       ("state" . ,state)))
92
    (('iri ('prefixed-name ('pname-ln ('pname-ns ('pn-local suffix)))))
93
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "")
94
                                suffix))
95
       ("state" . ,state)))
96
    (('iri ('prefixed-name ('pname-ns ns)))
97
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ns))
98
       ("state" . ,state)))
99
    (('iri ('prefixed-name 'pname-ns))
100
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ""))
101
       ("state" . ,state)))
102
    (('iri 'iriref)
103
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) ""))
104
       ("state" . ,state)))
105
    (('iri ('iriref iri ...))
106
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) (parse-string iri)))
107
       ("state" . ,state)))))
108
109
(define (parse-verb verb state)
110
  (match verb
111
    ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
112
           ("state" . ,state)))
113
    (('predicate iri)
114
     (let ((res (parse-iri iri state)))
115
       `(("verb" . ,(assoc-ref res "iri"))
116
         ("state" . ,(assoc-ref res "state")))))))
117
118
(define (parse-object object state)
119
  (match object
120
    (('rdf-literal ('string-pat (_ str ...)))
121
     (let ((object
122
             (make-rdf-literal (parse-string str)
123
                               "http://www.w3.org/2001/XMLSchema#string" #f)))
124
       (update-parser-state state
125
         #:cur-object object
126
         #:result
127
         (cons
128
           (make-rdf-triple
129
             (parser-state-cur-subject state)
130
             (parser-state-cur-predicate state)
131
             object)
132
           (parser-state-result state)))))
133
    (('rdf-literal ('string-pat (_ str ...)) ("^^" iri))
134
     (let* ((res (parse-iri iri state))
135
            (iri (assoc-ref res "iri"))
136
            (state (assoc-ref res "state"))
137
            (object (make-rdf-literal (parse-string str) iri #f)))
138
       (update-parser-state state
139
         #:cur-object object
140
         #:result
141
         (cons
142
           (make-rdf-triple
143
             (parser-state-cur-subject state)
144
             (parser-state-cur-predicate state)
145
             object)
146
           (parser-state-result state)))))
147
    (('rdf-literal ('string-pat (_ str ...)) ('langtag lang))
148
     (let ((object
149
           (make-rdf-literal
150
             (parse-string str)
151
             "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
152
             lang)))
153
       (update-parser-state state
154
         #:cur-object object
155
         #:result
156
         (cons
157
           (make-rdf-triple
158
             (parser-state-cur-subject state)
159
             (parser-state-cur-predicate state)
160
             object)
161
           (parser-state-result state)))))
162
    (('numeric-literal ('decimal num))
163
     (let ((object
164
            (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#decimal" #f)))
165
       (update-parser-state state
166
         #:cur-object object
167
         #:result
168
         (cons
169
           (make-rdf-triple
170
             (parser-state-cur-subject state)
171
             (parser-state-cur-predicate state)
172
             object)
173
           (parser-state-result state)))))
174
    (('numeric-literal ('integer int))
175
     (let ((object
176
           (make-rdf-literal int "http://www.w3.org/2001/XMLSchema#integer" #f)))
177
       (update-parser-state state
178
         #:cur-object object
179
         #:result
180
         (cons
181
           (make-rdf-triple
182
             (parser-state-cur-subject state)
183
             (parser-state-cur-predicate state)
184
             object)
185
           (parser-state-result state)))))
186
    (('numeric-literal ('double num))
187
     (let ((object
188
            (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#double" #f)))
189
       (update-parser-state state
190
         #:cur-object object
191
         #:result
192
         (cons
193
           (make-rdf-triple
194
             (parser-state-cur-subject state)
195
             (parser-state-cur-predicate state)
196
             object)
197
           (parser-state-result state)))))
198
    (('boolean-literal bool)
199
     (let ((object
200
             (make-rdf-literal bool "http://www.w3.org/2001/XMLSchema#boolean" #f)))
201
       (update-parser-state state
202
         #:cur-object object
203
         #:result
204
         (cons
205
           (make-rdf-triple
206
             (parser-state-cur-subject state)
207
             (parser-state-cur-predicate state)
208
             object)
209
           (parser-state-result state)))))
210
    (('blank-node ('anon _))
211
     (let ((node ((parser-state-blank-node-gen state))))
212
       (update-parser-state state
213
         #:cur-object node
214
         #:result
215
         (cons
216
           (make-rdf-triple
217
             (parser-state-cur-subject state)
218
             (parser-state-cur-predicate state)
219
             node)
220
           (parser-state-result state)))))
221
    (('blank-node ('blank-node-label label))
222
     (let* ((node
223
              (or (assoc-ref (parser-state-bnode-labels state) label)
224
                  ((parser-state-blank-node-gen state))))
225
            (state
226
              (if (assoc-ref (parser-state-bnode-labels state) label)
227
                  state
228
                  (update-parser-state state
229
                    #:bnode-labels
230
                    (cons
231
                      (cons label node)
232
                      (parser-state-bnode-labels state))))))
233
       (update-parser-state state
234
         #:cur-object node
235
         #:result
236
         (cons
237
           (make-rdf-triple
238
             (parser-state-cur-subject state)
239
             (parser-state-cur-predicate state)
240
             node)
241
           (parser-state-result state)))))
242
    (('blank-node-property-list ('predicate-object-list po ...))
243
     (let* ((node ((parser-state-blank-node-gen state)))
244
            (new-state (parse-predicate-object
245
                         po (update-parser-state state #:cur-subject node))))
246
       (update-parser-state new-state
247
         #:cur-object node
248
         #:cur-subject (parser-state-cur-subject state)
249
         #:cur-predicate (parser-state-cur-predicate state)
250
         #:result
251
         (cons
252
           (make-rdf-triple
253
             (parser-state-cur-subject state)
254
             (parser-state-cur-predicate state)
255
             node)
256
           (parser-state-result new-state)))))
257
    ('collection
258
     (let ((object "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"))
259
       (update-parser-state state
260
         #:cur-object object
261
         #:result
262
         (cons
263
           (make-rdf-triple
264
             (parser-state-cur-subject state)
265
             (parser-state-cur-predicate state)
266
             object)
267
           (parser-state-result state)))))
268
    (('collection objects ...)
269
     (let ((state (parse-collection objects state)))
270
       (update-parser-state state
271
         #:result
272
         (cons
273
           (make-rdf-triple
274
             (parser-state-cur-subject state)
275
             (parser-state-cur-predicate state)
276
             (parser-state-cur-object state))
277
           (parser-state-result state)))))
278
    (('iri _)
279
     (let* ((res (parse-iri object state))
280
            (iri (assoc-ref res "iri"))
281
            (state (assoc-ref res "state")))
282
       (update-parser-state state
283
         #:cur-object iri
284
         #:result
285
         (cons
286
           (make-rdf-triple
287
             (parser-state-cur-subject state)
288
             (parser-state-cur-predicate state)
289
             iri)
290
           (parser-state-result state)))))))
291
292
(define (parse-collection collection state)
293
  (let ((node ((parser-state-blank-node-gen state))))
294
    (let loop ((objects collection)
295
               (new-state
296
                 (update-parser-state state
297
                   #:cur-subject node
298
                   #:cur-predicate
299
                   "http://www.w3.org/1999/02/22-rdf-syntax-ns#first"))
300
               (previous-object #nil))
301
      (match objects
302
        ('()
303
         (update-parser-state new-state
304
           #:cur-object node
305
           #:cur-subject (parser-state-cur-subject state)
306
           #:cur-predicate (parser-state-cur-predicate state)
307
           #:result
308
           (cons
309
             (make-rdf-triple
310
               previous-object
311
               "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
312
               "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
313
             (parser-state-result new-state))))
314
        ((('object object) objects ...)
315
         (if (equal? previous-object #nil)
316
             (let ((new-state (parse-object object new-state)))
317
               (loop objects new-state node))
318
             (let* ((node ((parser-state-blank-node-gen new-state)))
319
                    (new-state
320
                      (update-parser-state new-state
321
                        #:cur-subject node))
322
                    (new-state (parse-object object new-state)))
323
               (loop
324
                 objects
325
                 (update-parser-state new-state
326
                   #:result
327
                   (cons
328
                     (make-rdf-triple
329
                       previous-object
330
                       "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
331
                       node)
332
                     (parser-state-result new-state)))
333
                 node))))))))
334
335
(define (parse-object-list ol state)
336
  (let loop ((ol ol) (state state))
337
    (match ol
338
      ('() state)
339
      ((('object object) ol ...)
340
       (loop ol (parse-object object state)))
341
      ((ol)
342
       (loop ol state)))))
343
344
(define (parse-predicate-object po state)
345
  (let loop ((po po) (state state))
346
    (match po
347
      ((('verb verb) ('object-list ol ...) po)
348
       (let* ((verb (parse-verb verb state))
349
              (state (assoc-ref verb "state"))
350
              (verb (assoc-ref verb "verb"))
351
              (new-state (update-parser-state state #:cur-predicate verb))
352
              (res (parse-object-list ol new-state)))
353
         (loop po res)))
354
      ((('verb verb) ('object-list ol ...))
355
       (let* ((verb (parse-verb verb state))
356
              (state (assoc-ref verb "state"))
357
              (verb (assoc-ref verb "verb"))
358
              (new-state (update-parser-state state #:cur-predicate verb))
359
              (res (parse-object-list ol new-state)))
360
         res))
361
      (((('verb verb) ('object-list ol ...)) po ...)
362
       (let* ((verb (parse-verb verb state))
363
              (state (assoc-ref verb "state"))
364
              (verb (assoc-ref verb "verb"))
365
              (new-state (update-parser-state state #:cur-predicate verb))
366
              (res (parse-object-list ol new-state)))
367
         (loop po res)))
368
      ('() state)
369
      ((po)
370
       (loop po state)))))
371
372
(define (parse-subject s state)
373
  (match s
374
    (('iri _ ...)
375
     (let ((res (parse-iri s state)))
376
       `(("subject" . ,(assoc-ref res "iri"))
377
         ("state" . ,(assoc-ref res "state")))))
378
    (('collection objects ...)
379
     (let ((res (parse-collection objects state)))
380
       `(("subject" . ,(parser-state-cur-object res))
381
         ("state" . ,res))))
382
    ('collection
383
     `(("subject" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
384
       ("state" . ,state)))
385
    (('blank-node ('anon _))
386
     (let ((node ((parser-state-blank-node-gen state))))
387
       `(("subject" . ,node)
388
         ("state" . ,state))))
389
    (('blank-node ('blank-node-label label))
390
     (if (assoc-ref (parser-state-bnode-labels state) label)
391
         `(("subject" . ,(assoc-ref (parser-state-bnode-labels state) label))
392
           ("state" . ,state))
393
         (let ((node ((parser-state-blank-node-gen state))))
394
           `(("subject" . ,node)
395
             ("state" . ,(update-parser-state state
396
                           #:bnode-labels
397
                           (cons
398
                             (cons label node)
399
                             (parser-state-bnode-labels state))))))))))
400
401
(define (parse-triples t state)
402
  (match t
403
    ((('subject subject) ('predicate-object-list predicate-object ...))
404
     (let* ((res (parse-subject subject state))
405
            (subject (assoc-ref res "subject"))
406
            (state (assoc-ref res "state"))
407
            (state (update-parser-state state
408
                    #:cur-subject subject)))
409
       (parse-predicate-object predicate-object state)))
410
    ((('blank-node-property-list ('predicate-object-list po ...))
411
      ('predicate-object-list predicate-object ...))
412
     (let* ((subject ((parser-state-blank-node-gen state)))
413
            (new-state (parse-predicate-object
414
                         po (update-parser-state state #:cur-subject subject))))
415
       (parse-predicate-object predicate-object new-state)))
416
    ((('blank-node-property-list ('predicate-object-list po ...)))
417
     (let* ((subject ((parser-state-blank-node-gen state))))
418
       (parse-predicate-object po (update-parser-state state
419
                                    #:cur-subject subject))))))
420
421
(define (parse-turtle-doc parse-tree state)
422
  (let loop ((parse-tree parse-tree) (state state))
423
    (match parse-tree
424
      ('() (parser-state-result state))
425
      ((('prefix-id ('pname-ns ns) ('iriref iri ...)) parse-tree ...)
426
       (loop parse-tree
427
             (add-ns-to-state
428
               state ns (resolve-iri (parser-state-base-uri state)
429
                                     (parse-string iri)))))
430
      ((('prefix-id ('pname-ns ('iriref iri ...))) parse-tree ...)
431
       (loop parse-tree
432
             (add-ns-to-state
433
               state "" (resolve-iri (parser-state-base-uri state)
434
                                     (parse-string iri)))))
435
      ((('sparql-prefix ('pname-ns ns) ('iriref iri ...)) parse-tree ...)
436
       (loop parse-tree
437
             (add-ns-to-state
438
               state ns (resolve-iri (parser-state-base-uri state)
439
                                     (parse-string iri)))))
440
      ((('sparql-prefix ('pname-ns ('iriref iri ...))) parse-tree ...)
441
       (loop parse-tree
442
             (add-ns-to-state
443
               state "" (resolve-iri (parser-state-base-uri state)
444
                                     (parse-string iri)))))
445
      ((('base ('iriref iri ...)) parse-tree ...)
446
       (loop parse-tree
447
             (update-parser-state
448
               state #:base-uri (resolve-iri (parser-state-base-uri state)
449
                                             (parse-string iri)))))
450
      ((('sparql-base ('iriref iri ...)) parse-tree ...)
451
       (loop parse-tree
452
             (update-parser-state
453
               state #:base-uri (resolve-iri (parser-state-base-uri state)
454
                                             (parse-string iri)))))
455
      ((('triples t ...) parse-tree ...)
456
       (let ((res (parse-triples t state)))
457
         (loop parse-tree (parse-triples t state))))
458
      ;; otherwise, it's a single element, not a list of statements
459
      (((? symbol? _) _ ...) (loop (list parse-tree) state)))))
460
461
(define (tordf parse-tree base)
462
  (define state
463
    (make-parser-state base '() '() #f #f #f (create-generate-blank-node) '()))
464
  (parse-turtle-doc parse-tree state))
465
466
(define (turtle->rdf str-or-file base)
467
  (define str
468
    (cond
469
      ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all))
470
      ((string? str-or-file) str-or-file)))
471
      
472
  (let ((parse-tree (parse-turtle str)))
473
    (tordf parse-tree base)))
474