guile-rdf/turtle/tordf.scm

tordf.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (turtle tordf)
19
  #:use-module (ice-9 match)
20
  #:use-module (ice-9 textual-ports)
21
  #:use-module (iri iri)
22
  #:use-module (turtle parser)
23
  #:use-module (srfi srfi-9)
24
  #:use-module (rdf rdf)
25
  #:export (turtle->rdf))
26
27
(define-record-type parser-state
28
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
29
                     blank-node-gen result)
30
  parser-state?
31
  (base-uri       parser-state-base-uri)
32
  (namespaces     parser-state-namespaces)
33
  (bnode-labels   parser-state-bnode-labels)
34
  (cur-subject    parser-state-cur-subject)
35
  (cur-predicate  parser-state-cur-predicate)
36
  (blank-node-gen parser-state-blank-node-gen)
37
  (result         parser-state-result))
38
39
(define* (update-parser-state
40
           state #:key (base-uri (parser-state-base-uri state))
41
                       (namespaces (parser-state-namespaces state))
42
                       (bnode-labels (parser-state-bnode-labels state))
43
                       (cur-subject (parser-state-cur-subject state))
44
                       (cur-predicate (parser-state-cur-predicate state))
45
                       (blank-node-gen (parser-state-blank-node-gen state))
46
                       (result (parser-state-result state)))
47
  (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate
48
                     blank-node-gen result))
49
50
(define (create-generate-blank-node)
51
  (define num 0)
52
  (lambda ()
53
    (set! num (+ num 1))
54
    num))
55
56
(define (add-ns-to-state state ns iri)
57
  (pk 'iri iri)
58
  (update-parser-state state
59
    #:namespaces (cons (cons ns iri) (parser-state-namespaces state))))
60
61
62
(define (parse-iri iri state)
63
  (format #t "iri: ~a~%" iri)
64
  (match iri
65
    (('iri ('prefixed-name ('pname-ln ('pname-ns ns) suffix)))
66
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns)
67
                               suffix))
68
       ("state" . ,state)))
69
    (('iri ('prefixed-name ('pname-ln ('pname-ns suffix))))
70
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "")
71
                               suffix))
72
       ("state" . ,state)))
73
    (('iri ('prefixed-name 'pname-ns))
74
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ""))
75
       ("state" . ,state)))
76
    (('iri 'iriref)
77
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) ""))
78
       ("state" . ,state)))
79
    (('iri ('iriref iri))
80
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) iri))
81
       ("state" . ,state)))
82
    (('blank-node ('blank-node-label label))
83
     (if (assoc-ref (parser-state-bnode-labels state) label)
84
         `(("iri" . ,(assoc-ref (parser-state-bnode-labels state) label))
85
           ("state" . ,state))
86
         (let ((node ((parser-state-blank-node-gen state))))
87
           `(("iri" . ,node)
88
             ("state" . ,(update-parser-state state
89
                           #:bnode-labels
90
                           (cons
91
                             (cons label node)
92
                             (parser-state-bnode-labels state))))))))))
93
94
(define (parse-verb verb state)
95
  (match verb
96
    ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
97
           ("state" . ,state)))
98
    (('predicate iri)
99
     (let ((res (parse-iri iri state)))
100
       `(("verb" . ,(assoc-ref res "iri"))
101
         ("state" . ,(assoc-ref res "state")))))))
102
103
(define (parse-object object state)
104
  (pk 'object object)
105
  (match object
106
    (('rdf-literal ('string-pat (_ str)))
107
     (update-parser-state state
108
       #:result
109
       (cons
110
         (make-rdf-triple
111
           (parser-state-cur-subject state)
112
           (parser-state-cur-predicate state)
113
           (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f))
114
         (parser-state-result state))))
115
    (('rdf-literal ('string-pat (_ str)) ('langtag lang))
116
     (update-parser-state state
117
       #:result
118
       (cons
119
         (make-rdf-triple
120
           (parser-state-cur-subject state)
121
           (parser-state-cur-predicate state)
122
           (make-rdf-literal
123
             str "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" lang))
124
         (parser-state-result state))))
125
    (('numeric-literal ('integer int))
126
     (update-parser-state state
127
       #:result
128
       (cons
129
         (make-rdf-triple
130
           (parser-state-cur-subject state)
131
           (parser-state-cur-predicate state)
132
           (make-rdf-literal int "http://www.w3.org/2001/XMLSchema#integer" #f))
133
         (parser-state-result state))))
134
    (('boolean-literal bool)
135
     (update-parser-state state
136
       #:result
137
       (cons
138
         (make-rdf-triple
139
           (parser-state-cur-subject state)
140
           (parser-state-cur-predicate state)
141
           (make-rdf-literal bool "http://www.w3.org/2001/XMLSchema#boolean" #f))
142
         (parser-state-result state))))
143
    (('blank-node-property-list ('predicate-object-list po ...))
144
     (let* ((node ((parser-state-blank-node-gen state)))
145
            (new-state (parse-predicate-object
146
                         po (update-parser-state state #:cur-subject node))))
147
       (update-parser-state new-state
148
         #:cur-subject (parser-state-cur-subject state)
149
         #:cur-predicate (parser-state-cur-predicate state)
150
         #:result
151
         (cons
152
           (make-rdf-triple
153
             (parser-state-cur-subject state)
154
             (parser-state-cur-predicate state)
155
             node)
156
           (parser-state-result new-state)))))
157
    (('collection objects ...)
158
     (let loop ((objects objects) (state state))
159
       (match objects
160
         ('() state)
161
         ((('object object) objects ...)
162
          (loop objects (parse-object object state))))))
163
    (('iri _)
164
     (let* ((res (parse-iri object state))
165
            (iri (assoc-ref res "iri"))
166
            (state (assoc-ref res "state")))
167
       (update-parser-state state
168
         #:result
169
         (cons
170
           (make-rdf-triple
171
             (parser-state-cur-subject state)
172
             (parser-state-cur-predicate state)
173
             iri)
174
           (parser-state-result state)))))))
175
176
(define (parse-object-list ol state)
177
  (let loop ((ol ol) (state state))
178
    (pk 'ol ol)
179
    (match ol
180
      ('() state)
181
      ((('object object) ol ...)
182
       (loop ol (parse-object object state)))
183
      ((ol)
184
       (loop ol state)))))
185
186
(define (parse-predicate-object po state)
187
  (let loop ((po po) (state state))
188
    (pk 'po po)
189
    (match po
190
      ((('verb verb) ('object-list ol ...) po)
191
       (let* ((verb (parse-verb verb state))
192
              (state (assoc-ref verb "state"))
193
              (verb (assoc-ref verb "verb"))
194
              (new-state (update-parser-state state #:cur-predicate verb))
195
              (res (parse-object-list ol new-state)))
196
         (loop po res)))
197
      ((('verb verb) ('object-list ol ...))
198
       (let* ((verb (parse-verb verb state))
199
              (state (assoc-ref verb "state"))
200
              (verb (assoc-ref verb "verb"))
201
              (new-state (update-parser-state state #:cur-predicate verb))
202
              (res (parse-object-list ol new-state)))
203
         res))
204
      (((('verb verb) ('object-list ol ...)) po ...)
205
       (let* ((verb (parse-verb verb state))
206
              (state (assoc-ref verb "state"))
207
              (verb (assoc-ref verb "verb"))
208
              (new-state (update-parser-state state #:cur-predicate verb))
209
              (res (parse-object-list ol new-state)))
210
         (loop po res)))
211
      ('() state)
212
      ((po)
213
       (loop po state)))))
214
215
(define (parse-triples t state)
216
  (match t
217
    ((('subject iri) ('predicate-object-list predicate-object ...))
218
     (let* ((res (parse-iri iri state))
219
            (iri (assoc-ref res "iri"))
220
            (state (assoc-ref res "state"))
221
            (state (update-parser-state state
222
                    #:cur-subject iri)))
223
       (parse-predicate-object predicate-object state)))))
224
225
(define (parse-turtle-doc parse-tree state)
226
  (let loop ((parse-tree parse-tree) (state state))
227
    (match parse-tree
228
      ('() (parser-state-result state))
229
      ((('prefix-id ('pname-ns ns) ('iriref iri)) parse-tree ...)
230
       (loop parse-tree
231
             (add-ns-to-state
232
               state ns (resolve-iri (parser-state-base-uri state) iri))))
233
      ((('prefix-id ('pname-ns ('iriref iri))) parse-tree ...)
234
       (loop parse-tree
235
             (add-ns-to-state
236
               state "" (resolve-iri (parser-state-base-uri state) iri))))
237
      ((('sparql-prefix ('pname-ns ns) ('iriref iri)) parse-tree ...)
238
       (loop parse-tree
239
             (add-ns-to-state
240
               state ns (resolve-iri (parser-state-base-uri state) iri))))
241
      ((('sparql-prefix ('pname-ns ('iriref iri))) parse-tree ...)
242
       (loop parse-tree
243
             (add-ns-to-state
244
               state "" (resolve-iri (parser-state-base-uri state) iri))))
245
      ((('base ('iriref iri)) parse-tree ...)
246
       (loop parse-tree
247
             (update-parser-state
248
               state #:base-uri (resolve-iri (parser-state-base-uri state) iri))))
249
      ((('sparql-base ('iriref iri)) parse-tree ...)
250
       (loop parse-tree
251
             (update-parser-state
252
               state #:base-uri (resolve-iri iri (parser-state-base-uri state)))))
253
      ((('triples t ...) parse-tree ...)
254
       (format #t "triples: ~a~%" t)
255
       (let ((res (parse-triples t state)))
256
         (loop parse-tree (parse-triples t state))))
257
      ;; otherwise, it's a single element, not a list of statements
258
      (((? symbol? _) _ ...) (loop (list parse-tree) state)))))
259
260
(define (tordf parse-tree base)
261
  (define state
262
    (make-parser-state base '() '() #f #f (create-generate-blank-node) '()))
263
  (parse-turtle-doc parse-tree state))
264
265
(define (turtle->rdf str-or-file base)
266
  (define str
267
    (cond
268
      ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all))
269
      ((string? str-or-file) str-or-file)))
270
      
271
  (let ((parse-tree (parse-turtle str)))
272
    (tordf parse-tree base)))
273