guile-rdf/turtle/tordf.scm

tordf.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (turtle tordf)
19
  #:use-module (ice-9 match)
20
  #:use-module (ice-9 textual-ports)
21
  #:use-module (iri iri)
22
  #:use-module (turtle parser)
23
  #:use-module (srfi srfi-9)
24
  #:use-module (rdf rdf)
25
  #:export (turtle->rdf))
26
27
(define-record-type parser-state
28
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
29
                     cur-object blank-node-gen result)
30
  parser-state?
31
  (base-uri       parser-state-base-uri)
32
  (namespaces     parser-state-namespaces)
33
  (bnode-labels   parser-state-bnode-labels)
34
  (cur-subject    parser-state-cur-subject)
35
  (cur-predicate  parser-state-cur-predicate)
36
  (cur-object     parser-state-cur-object)
37
  (blank-node-gen parser-state-blank-node-gen)
38
  (result         parser-state-result))
39
40
(define* (update-parser-state
41
           state #:key (base-uri (parser-state-base-uri state))
42
                       (namespaces (parser-state-namespaces state))
43
                       (bnode-labels (parser-state-bnode-labels state))
44
                       (cur-subject (parser-state-cur-subject state))
45
                       (cur-predicate (parser-state-cur-predicate state))
46
                       (cur-object (parser-state-cur-object state))
47
                       (blank-node-gen (parser-state-blank-node-gen state))
48
                       (result (parser-state-result state)))
49
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
50
                     cur-object blank-node-gen result))
51
52
(define (create-generate-blank-node)
53
  (define num 0)
54
  (lambda ()
55
    (set! num (+ num 1))
56
    num))
57
58
(define (add-ns-to-state state ns iri)
59
  (pk 'iri iri)
60
  (update-parser-state state
61
    #:namespaces (cons (cons ns iri) (parser-state-namespaces state))))
62
63
64
(define (parse-iri iri state)
65
  (format #t "iri: ~a~%" iri)
66
  (match iri
67
    (('iri ('prefixed-name ('pname-ln ('pname-ns ns) suffix)))
68
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns)
69
                               suffix))
70
       ("state" . ,state)))
71
    (('iri ('prefixed-name ('pname-ln ('pname-ns suffix))))
72
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "")
73
                               suffix))
74
       ("state" . ,state)))
75
    (('iri ('prefixed-name 'pname-ns))
76
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ""))
77
       ("state" . ,state)))
78
    (('iri 'iriref)
79
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) ""))
80
       ("state" . ,state)))
81
    (('iri ('iriref iri))
82
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) iri))
83
       ("state" . ,state)))))
84
85
(define (parse-verb verb state)
86
  (match verb
87
    ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
88
           ("state" . ,state)))
89
    (('predicate iri)
90
     (let ((res (parse-iri iri state)))
91
       `(("verb" . ,(assoc-ref res "iri"))
92
         ("state" . ,(assoc-ref res "state")))))))
93
94
(define (parse-object object state)
95
  (pk 'object object)
96
  (match object
97
    (('rdf-literal ('string-pat (_ str)))
98
     (let ((object
99
             (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f)))
100
       (update-parser-state state
101
         #:cur-object object
102
         #:result
103
         (cons
104
           (make-rdf-triple
105
             (parser-state-cur-subject state)
106
             (parser-state-cur-predicate state)
107
             (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f))
108
           (parser-state-result state)))))
109
    (('rdf-literal ('string-pat (_ str)) ("^^" iri))
110
     (let* ((res (parse-iri iri state))
111
            (iri (assoc-ref res "iri"))
112
            (state (assoc-ref res "state"))
113
            (object (make-rdf-literal str iri #f)))
114
       (update-parser-state state
115
         #:cur-object object
116
         #:result
117
         (cons
118
           (make-rdf-triple
119
             (parser-state-cur-subject state)
120
             (parser-state-cur-predicate state)
121
             (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f))
122
           (parser-state-result state)))))
123
    (('rdf-literal ('string-pat (_ str)) ('langtag lang))
124
     (let ((object
125
           (make-rdf-literal
126
             str "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" lang)))
127
       (update-parser-state state
128
         #:cur-object object
129
         #:result
130
         (cons
131
           (make-rdf-triple
132
             (parser-state-cur-subject state)
133
             (parser-state-cur-predicate state)
134
             object)
135
           (parser-state-result state)))))
136
    (('numeric-literal ('decimal num))
137
     (let ((object
138
            (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#decimal" #f)))
139
       (update-parser-state state
140
         #:cur-object object
141
         #:result
142
         (cons
143
           (make-rdf-triple
144
             (parser-state-cur-subject state)
145
             (parser-state-cur-predicate state)
146
             object)
147
           (parser-state-result state)))))
148
    (('numeric-literal ('integer int))
149
     (let ((object
150
           (make-rdf-literal int "http://www.w3.org/2001/XMLSchema#integer" #f)))
151
       (update-parser-state state
152
         #:cur-object object
153
         #:result
154
         (cons
155
           (make-rdf-triple
156
             (parser-state-cur-subject state)
157
             (parser-state-cur-predicate state)
158
             object)
159
           (parser-state-result state)))))
160
    (('numeric-literal ('double num))
161
     (let ((object
162
            (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#double" #f)))
163
       (update-parser-state state
164
         #:cur-object object
165
         #:result
166
         (cons
167
           (make-rdf-triple
168
             (parser-state-cur-subject state)
169
             (parser-state-cur-predicate state)
170
             object)
171
           (parser-state-result state)))))
172
    (('boolean-literal bool)
173
     (let ((object
174
             (make-rdf-literal bool "http://www.w3.org/2001/XMLSchema#boolean" #f)))
175
       (update-parser-state state
176
         #:cur-object object
177
         #:result
178
         (cons
179
           (make-rdf-triple
180
             (parser-state-cur-subject state)
181
             (parser-state-cur-predicate state)
182
             object)
183
           (parser-state-result state)))))
184
    (('blank-node ('anon _))
185
     (let ((node ((parser-state-blank-node-gen state))))
186
       (update-parser-state state
187
         #:cur-object node
188
         #:result
189
         (cons
190
           (make-rdf-triple
191
             (parser-state-cur-subject state)
192
             (parser-state-cur-predicate state)
193
             node)
194
           (parser-state-result state)))))
195
    (('blank-node ('blank-node-label label))
196
     (let* ((node
197
              (or (assoc-ref (parser-state-bnode-labels state) label)
198
                  ((parser-state-blank-node-gen state)))))
199
       (update-parser-state state
200
         #:cur-object node
201
         #:result
202
         (cons
203
           (make-rdf-triple
204
             (parser-state-cur-subject state)
205
             (parser-state-cur-predicate state)
206
             node)
207
           (parser-state-result state)))))
208
    (('blank-node-property-list ('predicate-object-list po ...))
209
     (let* ((node ((parser-state-blank-node-gen state)))
210
            (new-state (parse-predicate-object
211
                         po (update-parser-state state #:cur-subject node))))
212
       (update-parser-state new-state
213
         #:cur-object node
214
         #:cur-subject (parser-state-cur-subject state)
215
         #:cur-predicate (parser-state-cur-predicate state)
216
         #:result
217
         (cons
218
           (make-rdf-triple
219
             (parser-state-cur-subject state)
220
             (parser-state-cur-predicate state)
221
             node)
222
           (parser-state-result new-state)))))
223
    ('collection
224
     (let ((object "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"))
225
       (update-parser-state state
226
         #:cur-object object
227
         #:result
228
         (cons
229
           (make-rdf-triple
230
             (parser-state-cur-subject state)
231
             (parser-state-cur-predicate state)
232
             object)
233
           (parser-state-result state)))))
234
    (('collection objects ...)
235
     (let ((state (parse-collection objects state)))
236
       (update-parser-state state
237
         #:result
238
         (cons
239
           (make-rdf-triple
240
             (parser-state-cur-subject state)
241
             (parser-state-cur-predicate state)
242
             (parser-state-cur-object state))
243
           (parser-state-result state)))))
244
    (('iri _)
245
     (let* ((res (parse-iri object state))
246
            (iri (assoc-ref res "iri"))
247
            (state (assoc-ref res "state")))
248
       (update-parser-state state
249
         #:cur-object iri
250
         #:result
251
         (cons
252
           (make-rdf-triple
253
             (parser-state-cur-subject state)
254
             (parser-state-cur-predicate state)
255
             iri)
256
           (parser-state-result state)))))))
257
258
(define (parse-collection collection state)
259
  (let ((node ((parser-state-blank-node-gen state))))
260
    (let loop ((objects collection)
261
               (new-state
262
                 (update-parser-state state
263
                   #:cur-subject node
264
                   #:cur-predicate
265
                   "http://www.w3.org/1999/02/22-rdf-syntax-ns#first"))
266
               (previous-object #nil))
267
      (match objects
268
        ('()
269
         (update-parser-state new-state
270
           #:cur-object node
271
           #:cur-subject (parser-state-cur-subject state)
272
           #:cur-predicate (parser-state-cur-predicate state)
273
           #:result
274
           (cons
275
             (make-rdf-triple
276
               previous-object
277
               "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
278
               "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
279
             (parser-state-result new-state))))
280
        ((('object object) objects ...)
281
         (if (equal? previous-object #nil)
282
             (let ((new-state (parse-object object new-state)))
283
               (loop objects new-state node))
284
             (let* ((node ((parser-state-blank-node-gen new-state)))
285
                    (new-state
286
                      (update-parser-state new-state
287
                        #:cur-subject node))
288
                    (new-state (parse-object object new-state)))
289
               (loop
290
                 objects
291
                 (update-parser-state new-state
292
                   #:result
293
                   (cons
294
                     (make-rdf-triple
295
                       previous-object
296
                       "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
297
                       node)
298
                     (parser-state-result new-state)))
299
                 node))))))))
300
301
(define (parse-object-list ol state)
302
  (let loop ((ol ol) (state state))
303
    (pk 'ol ol)
304
    (match ol
305
      ('() state)
306
      ((('object object) ol ...)
307
       (loop ol (parse-object object state)))
308
      ((ol)
309
       (loop ol state)))))
310
311
(define (parse-predicate-object po state)
312
  (let loop ((po po) (state state))
313
    (pk 'po po)
314
    (match po
315
      ((('verb verb) ('object-list ol ...) po)
316
       (let* ((verb (parse-verb verb state))
317
              (state (assoc-ref verb "state"))
318
              (verb (assoc-ref verb "verb"))
319
              (new-state (update-parser-state state #:cur-predicate verb))
320
              (res (parse-object-list ol new-state)))
321
         (loop po res)))
322
      ((('verb verb) ('object-list ol ...))
323
       (let* ((verb (parse-verb verb state))
324
              (state (assoc-ref verb "state"))
325
              (verb (assoc-ref verb "verb"))
326
              (new-state (update-parser-state state #:cur-predicate verb))
327
              (res (parse-object-list ol new-state)))
328
         res))
329
      (((('verb verb) ('object-list ol ...)) po ...)
330
       (let* ((verb (parse-verb verb state))
331
              (state (assoc-ref verb "state"))
332
              (verb (assoc-ref verb "verb"))
333
              (new-state (update-parser-state state #:cur-predicate verb))
334
              (res (parse-object-list ol new-state)))
335
         (loop po res)))
336
      ('() state)
337
      ((po)
338
       (loop po state)))))
339
340
(define (parse-subject s state)
341
  (match s
342
    (('iri _ ...)
343
     (let ((res (parse-iri s state)))
344
       `(("subject" . ,(assoc-ref res "iri"))
345
         ("state" . ,(assoc-ref res "state")))))
346
    (('collection objects ...)
347
     (let ((res (parse-collection objects state)))
348
       `(("subject" . ,(parser-state-cur-object res))
349
         ("state" . ,res))))
350
    ('collection
351
     `(("subject" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
352
       ("state" . ,state)))
353
    (('blank-node ('anon _))
354
     (let ((node ((parser-state-blank-node-gen state))))
355
       `(("subject" . ,node)
356
         ("state" . ,state))))
357
    (('blank-node ('blank-node-label label))
358
     (if (assoc-ref (parser-state-bnode-labels state) label)
359
         `(("subject" . ,(assoc-ref (parser-state-bnode-labels state) label))
360
           ("state" . ,state))
361
         (let ((node ((parser-state-blank-node-gen state))))
362
           `(("subject" . ,node)
363
             ("state" . ,(update-parser-state state
364
                           #:bnode-labels
365
                           (cons
366
                             (cons label node)
367
                             (parser-state-bnode-labels state))))))))))
368
369
(define (parse-triples t state)
370
  (match t
371
    ((('subject subject) ('predicate-object-list predicate-object ...))
372
     (let* ((res (parse-subject subject state))
373
            (subject (assoc-ref res "subject"))
374
            (state (assoc-ref res "state"))
375
            (state (update-parser-state state
376
                    #:cur-subject subject)))
377
       (parse-predicate-object predicate-object state)))
378
    ((('blank-node-property-list ('predicate-object-list po ...))
379
      ('predicate-object-list predicate-object ...))
380
     (let* ((subject ((parser-state-blank-node-gen state)))
381
            (new-state (parse-predicate-object
382
                         po (update-parser-state state #:cur-subject subject))))
383
       (parse-predicate-object predicate-object new-state)))
384
    ((('blank-node-property-list ('predicate-object-list po ...)))
385
     (let* ((subject ((parser-state-blank-node-gen state))))
386
       (parse-predicate-object po (update-parser-state state
387
                                    #:cur-subject subject))))))
388
389
(define (parse-turtle-doc parse-tree state)
390
  (let loop ((parse-tree parse-tree) (state state))
391
    (match parse-tree
392
      ('() (parser-state-result state))
393
      ((('prefix-id ('pname-ns ns) ('iriref iri)) parse-tree ...)
394
       (loop parse-tree
395
             (add-ns-to-state
396
               state ns (resolve-iri (parser-state-base-uri state) iri))))
397
      ((('prefix-id ('pname-ns ('iriref iri))) parse-tree ...)
398
       (loop parse-tree
399
             (add-ns-to-state
400
               state "" (resolve-iri (parser-state-base-uri state) iri))))
401
      ((('sparql-prefix ('pname-ns ns) ('iriref iri)) parse-tree ...)
402
       (loop parse-tree
403
             (add-ns-to-state
404
               state ns (resolve-iri (parser-state-base-uri state) iri))))
405
      ((('sparql-prefix ('pname-ns ('iriref iri))) parse-tree ...)
406
       (loop parse-tree
407
             (add-ns-to-state
408
               state "" (resolve-iri (parser-state-base-uri state) iri))))
409
      ((('base ('iriref iri)) parse-tree ...)
410
       (loop parse-tree
411
             (update-parser-state
412
               state #:base-uri (resolve-iri (parser-state-base-uri state) iri))))
413
      ((('sparql-base ('iriref iri)) parse-tree ...)
414
       (loop parse-tree
415
             (update-parser-state
416
               state #:base-uri (resolve-iri iri (parser-state-base-uri state)))))
417
      ((('triples t ...) parse-tree ...)
418
       (format #t "triples: ~a~%" t)
419
       (let ((res (parse-triples t state)))
420
         (loop parse-tree (parse-triples t state))))
421
      ;; otherwise, it's a single element, not a list of statements
422
      (((? symbol? _) _ ...) (loop (list parse-tree) state)))))
423
424
(define (tordf parse-tree base)
425
  (define state
426
    (make-parser-state base '() '() #f #f #f (create-generate-blank-node) '()))
427
  (parse-turtle-doc parse-tree state))
428
429
(define (turtle->rdf str-or-file base)
430
  (define str
431
    (cond
432
      ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all))
433
      ((string? str-or-file) str-or-file)))
434
      
435
  (let ((parse-tree (parse-turtle str)))
436
    (tordf parse-tree base)))
437