guile-rdf/turtle/tordf.scm

tordf.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (turtle tordf)
19
  #:use-module (ice-9 match)
20
  #:use-module (ice-9 textual-ports)
21
  #:use-module (iri iri)
22
  #:use-module (turtle parser)
23
  #:use-module (srfi srfi-9)
24
  #:use-module (rdf rdf)
25
  #:export (turtle->rdf))
26
27
(define-record-type parser-state
28
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
29
                     cur-object blank-node-gen result)
30
  parser-state?
31
  (base-uri       parser-state-base-uri)
32
  (namespaces     parser-state-namespaces)
33
  (bnode-labels   parser-state-bnode-labels)
34
  (cur-subject    parser-state-cur-subject)
35
  (cur-predicate  parser-state-cur-predicate)
36
  (cur-object     parser-state-cur-object)
37
  (blank-node-gen parser-state-blank-node-gen)
38
  (result         parser-state-result))
39
40
(define* (update-parser-state
41
           state #:key (base-uri (parser-state-base-uri state))
42
                       (namespaces (parser-state-namespaces state))
43
                       (bnode-labels (parser-state-bnode-labels state))
44
                       (cur-subject (parser-state-cur-subject state))
45
                       (cur-predicate (parser-state-cur-predicate state))
46
                       (cur-object (parser-state-cur-object state))
47
                       (blank-node-gen (parser-state-blank-node-gen state))
48
                       (result (parser-state-result state)))
49
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
50
                     cur-object blank-node-gen result))
51
52
(define (create-generate-blank-node)
53
  (define num 0)
54
  (lambda ()
55
    (set! num (+ num 1))
56
    num))
57
58
(define (add-ns-to-state state ns iri)
59
  (update-parser-state state
60
    #:namespaces (cons (cons ns iri) (parser-state-namespaces state))))
61
62
(define (parse-string str)
63
  (match str
64
    ((? string? str) str)
65
    ((component str ...)
66
     (match component
67
       ((? string? str1)
68
        (string-append str1 (parse-string str)))
69
       (('uchar n)
70
        (string-append (string (integer->char (string->number n 16)))
71
                       (parse-string str)))
72
       (('echar e)
73
        (string-append
74
          (match e
75
            ("\\t" "\t")
76
            ("\\b" "\b")
77
            ("\\n" "\n")
78
            ("\\r" "\r")
79
            ("\\f" "\f")
80
            ("\\\\" "\\")
81
            ("\\\"" "\"")
82
            ("\\'" "'"))
83
          (parse-string str)))))
84
    (() "")))
85
86
(define (parse-iri iri state)
87
  (pk 'iri iri)
88
  (match iri
89
    (('iri ('prefixed-name ('pname-ln ('pname-ns ns) ('pn-local suffix))))
90
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns)
91
                                suffix))
92
       ("state" . ,state)))
93
    (('iri ('prefixed-name ('pname-ln ('pname-ns ('pn-local suffix)))))
94
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "")
95
                                suffix))
96
       ("state" . ,state)))
97
    (('iri ('prefixed-name ('pname-ns ns)))
98
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ns))
99
       ("state" . ,state)))
100
    (('iri ('prefixed-name 'pname-ns))
101
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ""))
102
       ("state" . ,state)))
103
    (('iri 'iriref)
104
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) ""))
105
       ("state" . ,state)))
106
    (('iri ('iriref iri ...))
107
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) (parse-string iri)))
108
       ("state" . ,state)))))
109
110
(define (parse-verb verb state)
111
  (match verb
112
    ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
113
           ("state" . ,state)))
114
    (('predicate iri)
115
     (let ((res (parse-iri iri state)))
116
       `(("verb" . ,(assoc-ref res "iri"))
117
         ("state" . ,(assoc-ref res "state")))))))
118
119
(define (parse-object object state)
120
  (match object
121
    (('rdf-literal ('string-pat (_ str ...)))
122
     (let ((object
123
             (make-rdf-literal (parse-string str)
124
                               "http://www.w3.org/2001/XMLSchema#string" #f)))
125
       (update-parser-state state
126
         #:cur-object object
127
         #:result
128
         (cons
129
           (make-rdf-triple
130
             (parser-state-cur-subject state)
131
             (parser-state-cur-predicate state)
132
             (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f))
133
           (parser-state-result state)))))
134
    (('rdf-literal ('string-pat (_ str ...)) ("^^" iri))
135
     (let* ((res (parse-iri iri state))
136
            (iri (assoc-ref res "iri"))
137
            (state (assoc-ref res "state"))
138
            (object (make-rdf-literal (parse-string str) iri #f)))
139
       (update-parser-state state
140
         #:cur-object object
141
         #:result
142
         (cons
143
           (make-rdf-triple
144
             (parser-state-cur-subject state)
145
             (parser-state-cur-predicate state)
146
             (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f))
147
           (parser-state-result state)))))
148
    (('rdf-literal ('string-pat (_ str ...)) ('langtag lang))
149
     (let ((object
150
           (make-rdf-literal
151
             (parse-string str)
152
             "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
153
             lang)))
154
       (update-parser-state state
155
         #:cur-object object
156
         #:result
157
         (cons
158
           (make-rdf-triple
159
             (parser-state-cur-subject state)
160
             (parser-state-cur-predicate state)
161
             object)
162
           (parser-state-result state)))))
163
    (('numeric-literal ('decimal num))
164
     (let ((object
165
            (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#decimal" #f)))
166
       (update-parser-state state
167
         #:cur-object object
168
         #:result
169
         (cons
170
           (make-rdf-triple
171
             (parser-state-cur-subject state)
172
             (parser-state-cur-predicate state)
173
             object)
174
           (parser-state-result state)))))
175
    (('numeric-literal ('integer int))
176
     (let ((object
177
           (make-rdf-literal int "http://www.w3.org/2001/XMLSchema#integer" #f)))
178
       (update-parser-state state
179
         #:cur-object object
180
         #:result
181
         (cons
182
           (make-rdf-triple
183
             (parser-state-cur-subject state)
184
             (parser-state-cur-predicate state)
185
             object)
186
           (parser-state-result state)))))
187
    (('numeric-literal ('double num))
188
     (let ((object
189
            (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#double" #f)))
190
       (update-parser-state state
191
         #:cur-object object
192
         #:result
193
         (cons
194
           (make-rdf-triple
195
             (parser-state-cur-subject state)
196
             (parser-state-cur-predicate state)
197
             object)
198
           (parser-state-result state)))))
199
    (('boolean-literal bool)
200
     (let ((object
201
             (make-rdf-literal bool "http://www.w3.org/2001/XMLSchema#boolean" #f)))
202
       (update-parser-state state
203
         #:cur-object object
204
         #:result
205
         (cons
206
           (make-rdf-triple
207
             (parser-state-cur-subject state)
208
             (parser-state-cur-predicate state)
209
             object)
210
           (parser-state-result state)))))
211
    (('blank-node ('anon _))
212
     (let ((node ((parser-state-blank-node-gen state))))
213
       (update-parser-state state
214
         #:cur-object node
215
         #:result
216
         (cons
217
           (make-rdf-triple
218
             (parser-state-cur-subject state)
219
             (parser-state-cur-predicate state)
220
             node)
221
           (parser-state-result state)))))
222
    (('blank-node ('blank-node-label label))
223
     (let* ((node
224
              (or (assoc-ref (parser-state-bnode-labels state) label)
225
                  ((parser-state-blank-node-gen state)))))
226
       (update-parser-state state
227
         #:cur-object node
228
         #:result
229
         (cons
230
           (make-rdf-triple
231
             (parser-state-cur-subject state)
232
             (parser-state-cur-predicate state)
233
             node)
234
           (parser-state-result state)))))
235
    (('blank-node-property-list ('predicate-object-list po ...))
236
     (let* ((node ((parser-state-blank-node-gen state)))
237
            (new-state (parse-predicate-object
238
                         po (update-parser-state state #:cur-subject node))))
239
       (update-parser-state new-state
240
         #:cur-object node
241
         #:cur-subject (parser-state-cur-subject state)
242
         #:cur-predicate (parser-state-cur-predicate state)
243
         #:result
244
         (cons
245
           (make-rdf-triple
246
             (parser-state-cur-subject state)
247
             (parser-state-cur-predicate state)
248
             node)
249
           (parser-state-result new-state)))))
250
    ('collection
251
     (let ((object "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"))
252
       (update-parser-state state
253
         #:cur-object object
254
         #:result
255
         (cons
256
           (make-rdf-triple
257
             (parser-state-cur-subject state)
258
             (parser-state-cur-predicate state)
259
             object)
260
           (parser-state-result state)))))
261
    (('collection objects ...)
262
     (let ((state (parse-collection objects state)))
263
       (update-parser-state state
264
         #:result
265
         (cons
266
           (make-rdf-triple
267
             (parser-state-cur-subject state)
268
             (parser-state-cur-predicate state)
269
             (parser-state-cur-object state))
270
           (parser-state-result state)))))
271
    (('iri _)
272
     (let* ((res (parse-iri object state))
273
            (iri (assoc-ref res "iri"))
274
            (state (assoc-ref res "state")))
275
       (update-parser-state state
276
         #:cur-object iri
277
         #:result
278
         (cons
279
           (make-rdf-triple
280
             (parser-state-cur-subject state)
281
             (parser-state-cur-predicate state)
282
             iri)
283
           (parser-state-result state)))))))
284
285
(define (parse-collection collection state)
286
  (let ((node ((parser-state-blank-node-gen state))))
287
    (let loop ((objects collection)
288
               (new-state
289
                 (update-parser-state state
290
                   #:cur-subject node
291
                   #:cur-predicate
292
                   "http://www.w3.org/1999/02/22-rdf-syntax-ns#first"))
293
               (previous-object #nil))
294
      (match objects
295
        ('()
296
         (update-parser-state new-state
297
           #:cur-object node
298
           #:cur-subject (parser-state-cur-subject state)
299
           #:cur-predicate (parser-state-cur-predicate state)
300
           #:result
301
           (cons
302
             (make-rdf-triple
303
               previous-object
304
               "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
305
               "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
306
             (parser-state-result new-state))))
307
        ((('object object) objects ...)
308
         (if (equal? previous-object #nil)
309
             (let ((new-state (parse-object object new-state)))
310
               (loop objects new-state node))
311
             (let* ((node ((parser-state-blank-node-gen new-state)))
312
                    (new-state
313
                      (update-parser-state new-state
314
                        #:cur-subject node))
315
                    (new-state (parse-object object new-state)))
316
               (loop
317
                 objects
318
                 (update-parser-state new-state
319
                   #:result
320
                   (cons
321
                     (make-rdf-triple
322
                       previous-object
323
                       "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
324
                       node)
325
                     (parser-state-result new-state)))
326
                 node))))))))
327
328
(define (parse-object-list ol state)
329
  (let loop ((ol ol) (state state))
330
    (match ol
331
      ('() state)
332
      ((('object object) ol ...)
333
       (loop ol (parse-object object state)))
334
      ((ol)
335
       (loop ol state)))))
336
337
(define (parse-predicate-object po state)
338
  (let loop ((po po) (state state))
339
    (match po
340
      ((('verb verb) ('object-list ol ...) po)
341
       (let* ((verb (parse-verb verb state))
342
              (state (assoc-ref verb "state"))
343
              (verb (assoc-ref verb "verb"))
344
              (new-state (update-parser-state state #:cur-predicate verb))
345
              (res (parse-object-list ol new-state)))
346
         (loop po res)))
347
      ((('verb verb) ('object-list ol ...))
348
       (let* ((verb (parse-verb verb state))
349
              (state (assoc-ref verb "state"))
350
              (verb (assoc-ref verb "verb"))
351
              (new-state (update-parser-state state #:cur-predicate verb))
352
              (res (parse-object-list ol new-state)))
353
         res))
354
      (((('verb verb) ('object-list ol ...)) po ...)
355
       (let* ((verb (parse-verb verb state))
356
              (state (assoc-ref verb "state"))
357
              (verb (assoc-ref verb "verb"))
358
              (new-state (update-parser-state state #:cur-predicate verb))
359
              (res (parse-object-list ol new-state)))
360
         (loop po res)))
361
      ('() state)
362
      ((po)
363
       (loop po state)))))
364
365
(define (parse-subject s state)
366
  (match s
367
    (('iri _ ...)
368
     (let ((res (parse-iri s state)))
369
       `(("subject" . ,(assoc-ref res "iri"))
370
         ("state" . ,(assoc-ref res "state")))))
371
    (('collection objects ...)
372
     (let ((res (parse-collection objects state)))
373
       `(("subject" . ,(parser-state-cur-object res))
374
         ("state" . ,res))))
375
    ('collection
376
     `(("subject" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
377
       ("state" . ,state)))
378
    (('blank-node ('anon _))
379
     (let ((node ((parser-state-blank-node-gen state))))
380
       `(("subject" . ,node)
381
         ("state" . ,state))))
382
    (('blank-node ('blank-node-label label))
383
     (if (assoc-ref (parser-state-bnode-labels state) label)
384
         `(("subject" . ,(assoc-ref (parser-state-bnode-labels state) label))
385
           ("state" . ,state))
386
         (let ((node ((parser-state-blank-node-gen state))))
387
           `(("subject" . ,node)
388
             ("state" . ,(update-parser-state state
389
                           #:bnode-labels
390
                           (cons
391
                             (cons label node)
392
                             (parser-state-bnode-labels state))))))))))
393
394
(define (parse-triples t state)
395
  (match t
396
    ((('subject subject) ('predicate-object-list predicate-object ...))
397
     (let* ((res (parse-subject subject state))
398
            (subject (assoc-ref res "subject"))
399
            (state (assoc-ref res "state"))
400
            (state (update-parser-state state
401
                    #:cur-subject subject)))
402
       (parse-predicate-object predicate-object state)))
403
    ((('blank-node-property-list ('predicate-object-list po ...))
404
      ('predicate-object-list predicate-object ...))
405
     (let* ((subject ((parser-state-blank-node-gen state)))
406
            (new-state (parse-predicate-object
407
                         po (update-parser-state state #:cur-subject subject))))
408
       (parse-predicate-object predicate-object new-state)))
409
    ((('blank-node-property-list ('predicate-object-list po ...)))
410
     (let* ((subject ((parser-state-blank-node-gen state))))
411
       (parse-predicate-object po (update-parser-state state
412
                                    #:cur-subject subject))))))
413
414
(define (parse-turtle-doc parse-tree state)
415
  (let loop ((parse-tree parse-tree) (state state))
416
    (match parse-tree
417
      ('() (parser-state-result state))
418
      ((('prefix-id ('pname-ns ns) ('iriref iri ...)) parse-tree ...)
419
       (loop parse-tree
420
             (add-ns-to-state
421
               state ns (resolve-iri (parser-state-base-uri state)
422
                                     (parse-string iri)))))
423
      ((('prefix-id ('pname-ns ('iriref iri ...))) parse-tree ...)
424
       (loop parse-tree
425
             (add-ns-to-state
426
               state "" (resolve-iri (parser-state-base-uri state)
427
                                     (parse-string iri)))))
428
      ((('sparql-prefix ('pname-ns ns) ('iriref iri ...)) parse-tree ...)
429
       (loop parse-tree
430
             (add-ns-to-state
431
               state ns (resolve-iri (parser-state-base-uri state)
432
                                     (parse-string iri)))))
433
      ((('sparql-prefix ('pname-ns ('iriref iri ...))) parse-tree ...)
434
       (loop parse-tree
435
             (add-ns-to-state
436
               state "" (resolve-iri (parser-state-base-uri state)
437
                                     (parse-string iri)))))
438
      ((('base ('iriref iri ...)) parse-tree ...)
439
       (loop parse-tree
440
             (update-parser-state
441
               state #:base-uri (resolve-iri (parser-state-base-uri state)
442
                                             (parse-string iri)))))
443
      ((('sparql-base ('iriref iri ...)) parse-tree ...)
444
       (loop parse-tree
445
             (update-parser-state
446
               state #:base-uri (resolve-iri (parser-state-base-uri state)
447
                                             (parse-string iri)))))
448
      ((('triples t ...) parse-tree ...)
449
       (let ((res (parse-triples t state)))
450
         (loop parse-tree (parse-triples t state))))
451
      ;; otherwise, it's a single element, not a list of statements
452
      (((? symbol? _) _ ...) (loop (list parse-tree) state)))))
453
454
(define (tordf parse-tree base)
455
  (define state
456
    (make-parser-state base '() '() #f #f #f (create-generate-blank-node) '()))
457
  (parse-turtle-doc parse-tree state))
458
459
(define (turtle->rdf str-or-file base)
460
  (define str
461
    (cond
462
      ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all))
463
      ((string? str-or-file) str-or-file)))
464
      
465
  (let ((parse-tree (parse-turtle str)))
466
    (tordf parse-tree base)))
467