tordf.scm
1 | ;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu> |
2 | ;;;; |
3 | ;;;; This library is free software; you can redistribute it and/or |
4 | ;;;; modify it under the terms of the GNU Lesser General Public |
5 | ;;;; License as published by the Free Software Foundation; either |
6 | ;;;; version 3 of the License, or (at your option) any later version. |
7 | ;;;; |
8 | ;;;; This library is distributed in the hope that it will be useful, |
9 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | ;;;; Lesser General Public License for more details. |
12 | ;;;; |
13 | ;;;; You should have received a copy of the GNU Lesser General Public |
14 | ;;;; License along with this library; if not, write to the Free Software |
15 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
16 | ;;;; |
17 | |
18 | (define-module (turtle tordf) |
19 | #:use-module (ice-9 match) |
20 | #:use-module (ice-9 textual-ports) |
21 | #:use-module (iri iri) |
22 | #:use-module (turtle parser) |
23 | #:use-module (srfi srfi-9) |
24 | #:use-module (rdf rdf) |
25 | #:export (turtle->rdf)) |
26 | |
27 | (define-record-type parser-state |
28 | (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate |
29 | cur-object blank-node-gen result) |
30 | parser-state? |
31 | (base-uri parser-state-base-uri) |
32 | (namespaces parser-state-namespaces) |
33 | (bnode-labels parser-state-bnode-labels) |
34 | (cur-subject parser-state-cur-subject) |
35 | (cur-predicate parser-state-cur-predicate) |
36 | (cur-object parser-state-cur-object) |
37 | (blank-node-gen parser-state-blank-node-gen) |
38 | (result parser-state-result)) |
39 | |
40 | (define* (update-parser-state |
41 | state #:key (base-uri (parser-state-base-uri state)) |
42 | (namespaces (parser-state-namespaces state)) |
43 | (bnode-labels (parser-state-bnode-labels state)) |
44 | (cur-subject (parser-state-cur-subject state)) |
45 | (cur-predicate (parser-state-cur-predicate state)) |
46 | (cur-object (parser-state-cur-object state)) |
47 | (blank-node-gen (parser-state-blank-node-gen state)) |
48 | (result (parser-state-result state))) |
49 | (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate |
50 | cur-object blank-node-gen result)) |
51 | |
52 | (define (create-generate-blank-node) |
53 | (define num 0) |
54 | (lambda () |
55 | (set! num (+ num 1)) |
56 | num)) |
57 | |
58 | (define (add-ns-to-state state ns iri) |
59 | (update-parser-state state |
60 | #:namespaces (cons (cons ns iri) (parser-state-namespaces state)))) |
61 | |
62 | (define* (parse-string str #:optional for-iri?) |
63 | (match str |
64 | ((? string? str) str) |
65 | ((component str ...) |
66 | (match component |
67 | ((? string? str1) |
68 | (string-append str1 (parse-string str))) |
69 | (('uchar n) |
70 | (string-append (string (integer->char (string->number n 16))) |
71 | (parse-string str))) |
72 | (('echar e) |
73 | (string-append |
74 | (match e |
75 | ("\\t" "\t") |
76 | ("\\b" "\b") |
77 | ("\\n" "\n") |
78 | ("\\r" "\r") |
79 | ("\\f" "\f") |
80 | ("\\\\" "\\") |
81 | ("\\\"" "\"") |
82 | ("\\'" "'")) |
83 | (parse-string str))))) |
84 | (() ""))) |
85 | |
86 | (define (valid-iri? iri) |
87 | (and (not (string-any (ucs-range->char-set 0 33) iri)) |
88 | (not (string-any #\< iri)) |
89 | (not (string-any #\> iri)))) |
90 | |
91 | (define (parse-iri iri state) |
92 | (pk 'iri iri) |
93 | (match iri |
94 | (('iri ('prefixed-name ('pname-ln ('pname-ns ns) ('pn-local suffix)))) |
95 | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns) |
96 | suffix)) |
97 | ("state" . ,state))) |
98 | (('iri ('prefixed-name ('pname-ln ('pname-ns ('pn-local suffix))))) |
99 | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "") |
100 | suffix)) |
101 | ("state" . ,state))) |
102 | (('iri ('prefixed-name ('pname-ns ns))) |
103 | `(("iri" . ,(assoc-ref (parser-state-namespaces state) ns)) |
104 | ("state" . ,state))) |
105 | (('iri ('prefixed-name 'pname-ns)) |
106 | `(("iri" . ,(assoc-ref (parser-state-namespaces state) "")) |
107 | ("state" . ,state))) |
108 | (('iri 'iriref) |
109 | `(("iri" . ,(resolve-iri (parser-state-base-uri state) "")) |
110 | ("state" . ,state))) |
111 | (('iri ('iriref iri ...)) |
112 | (let ((iri (resolve-iri (parser-state-base-uri state) (parse-string iri)))) |
113 | (if (valid-iri? iri) |
114 | `(("iri" . ,iri) |
115 | ("state" . ,state)) |
116 | (throw 'invalid-iri iri)))))) |
117 | |
118 | (define (parse-verb verb state) |
119 | (match verb |
120 | ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") |
121 | ("state" . ,state))) |
122 | (('predicate iri) |
123 | (let ((res (parse-iri iri state))) |
124 | `(("verb" . ,(assoc-ref res "iri")) |
125 | ("state" . ,(assoc-ref res "state"))))))) |
126 | |
127 | (define (parse-object object state) |
128 | (match object |
129 | (('rdf-literal ('string-pat (_ str ...))) |
130 | (let ((object |
131 | (make-rdf-literal (parse-string str) |
132 | "http://www.w3.org/2001/XMLSchema#string" #f))) |
133 | (update-parser-state state |
134 | #:cur-object object |
135 | #:result |
136 | (cons |
137 | (make-rdf-triple |
138 | (parser-state-cur-subject state) |
139 | (parser-state-cur-predicate state) |
140 | object) |
141 | (parser-state-result state))))) |
142 | (('rdf-literal ('string-pat (_ str ...)) ("^^" iri)) |
143 | (let* ((res (parse-iri iri state)) |
144 | (iri (assoc-ref res "iri")) |
145 | (state (assoc-ref res "state")) |
146 | (object (make-rdf-literal (parse-string str) iri #f))) |
147 | (update-parser-state state |
148 | #:cur-object object |
149 | #:result |
150 | (cons |
151 | (make-rdf-triple |
152 | (parser-state-cur-subject state) |
153 | (parser-state-cur-predicate state) |
154 | object) |
155 | (parser-state-result state))))) |
156 | (('rdf-literal ('string-pat (_ str ...)) ('langtag lang)) |
157 | (let ((object |
158 | (make-rdf-literal |
159 | (parse-string str) |
160 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" |
161 | lang))) |
162 | (update-parser-state state |
163 | #:cur-object object |
164 | #:result |
165 | (cons |
166 | (make-rdf-triple |
167 | (parser-state-cur-subject state) |
168 | (parser-state-cur-predicate state) |
169 | object) |
170 | (parser-state-result state))))) |
171 | (('rdf-literal ('string-pat _) ("^^" iri)) |
172 | (let* ((res (parse-iri iri state)) |
173 | (iri (assoc-ref res "iri")) |
174 | (state (assoc-ref res "state")) |
175 | (object (make-rdf-literal "" iri #f))) |
176 | (update-parser-state state |
177 | #:cur-object object |
178 | #:result |
179 | (cons |
180 | (make-rdf-triple |
181 | (parser-state-cur-subject state) |
182 | (parser-state-cur-predicate state) |
183 | object) |
184 | (parser-state-result state))))) |
185 | (('rdf-literal ('string-pat _) ('langtag lang)) |
186 | (let ((object |
187 | (make-rdf-literal |
188 | "" |
189 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" |
190 | lang))) |
191 | (update-parser-state state |
192 | #:cur-object object |
193 | #:result |
194 | (cons |
195 | (make-rdf-triple |
196 | (parser-state-cur-subject state) |
197 | (parser-state-cur-predicate state) |
198 | object) |
199 | (parser-state-result state))))) |
200 | (('rdf-literal ('string-pat _)) |
201 | (let ((object |
202 | (make-rdf-literal |
203 | "" |
204 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#string" |
205 | #f))) |
206 | (update-parser-state state |
207 | #:cur-object object |
208 | #:result |
209 | (cons |
210 | (make-rdf-triple |
211 | (parser-state-cur-subject state) |
212 | (parser-state-cur-predicate state) |
213 | object) |
214 | (parser-state-result state))))) |
215 | (('numeric-literal ('decimal num)) |
216 | (let ((object |
217 | (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#decimal" #f))) |
218 | (update-parser-state state |
219 | #:cur-object object |
220 | #:result |
221 | (cons |
222 | (make-rdf-triple |
223 | (parser-state-cur-subject state) |
224 | (parser-state-cur-predicate state) |
225 | object) |
226 | (parser-state-result state))))) |
227 | (('numeric-literal ('integer int)) |
228 | (let ((object |
229 | (make-rdf-literal int "http://www.w3.org/2001/XMLSchema#integer" #f))) |
230 | (update-parser-state state |
231 | #:cur-object object |
232 | #:result |
233 | (cons |
234 | (make-rdf-triple |
235 | (parser-state-cur-subject state) |
236 | (parser-state-cur-predicate state) |
237 | object) |
238 | (parser-state-result state))))) |
239 | (('numeric-literal ('double num)) |
240 | (let ((object |
241 | (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#double" #f))) |
242 | (update-parser-state state |
243 | #:cur-object object |
244 | #:result |
245 | (cons |
246 | (make-rdf-triple |
247 | (parser-state-cur-subject state) |
248 | (parser-state-cur-predicate state) |
249 | object) |
250 | (parser-state-result state))))) |
251 | (('boolean-literal bool) |
252 | (let ((object |
253 | (make-rdf-literal bool "http://www.w3.org/2001/XMLSchema#boolean" #f))) |
254 | (update-parser-state state |
255 | #:cur-object object |
256 | #:result |
257 | (cons |
258 | (make-rdf-triple |
259 | (parser-state-cur-subject state) |
260 | (parser-state-cur-predicate state) |
261 | object) |
262 | (parser-state-result state))))) |
263 | (('blank-node ('anon _)) |
264 | (let ((node ((parser-state-blank-node-gen state)))) |
265 | (update-parser-state state |
266 | #:cur-object node |
267 | #:result |
268 | (cons |
269 | (make-rdf-triple |
270 | (parser-state-cur-subject state) |
271 | (parser-state-cur-predicate state) |
272 | node) |
273 | (parser-state-result state))))) |
274 | (('blank-node ('blank-node-label label)) |
275 | (let* ((node |
276 | (or (assoc-ref (parser-state-bnode-labels state) label) |
277 | ((parser-state-blank-node-gen state)))) |
278 | (state |
279 | (if (assoc-ref (parser-state-bnode-labels state) label) |
280 | state |
281 | (update-parser-state state |
282 | #:bnode-labels |
283 | (cons |
284 | (cons label node) |
285 | (parser-state-bnode-labels state)))))) |
286 | (update-parser-state state |
287 | #:cur-object node |
288 | #:result |
289 | (cons |
290 | (make-rdf-triple |
291 | (parser-state-cur-subject state) |
292 | (parser-state-cur-predicate state) |
293 | node) |
294 | (parser-state-result state))))) |
295 | (('blank-node-property-list ('predicate-object-list po ...)) |
296 | (let* ((node ((parser-state-blank-node-gen state))) |
297 | (new-state (parse-predicate-object |
298 | po (update-parser-state state #:cur-subject node)))) |
299 | (update-parser-state new-state |
300 | #:cur-object node |
301 | #:cur-subject (parser-state-cur-subject state) |
302 | #:cur-predicate (parser-state-cur-predicate state) |
303 | #:result |
304 | (cons |
305 | (make-rdf-triple |
306 | (parser-state-cur-subject state) |
307 | (parser-state-cur-predicate state) |
308 | node) |
309 | (parser-state-result new-state))))) |
310 | ('collection |
311 | (let ((object "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")) |
312 | (update-parser-state state |
313 | #:cur-object object |
314 | #:result |
315 | (cons |
316 | (make-rdf-triple |
317 | (parser-state-cur-subject state) |
318 | (parser-state-cur-predicate state) |
319 | object) |
320 | (parser-state-result state))))) |
321 | (('collection objects ...) |
322 | (let ((state (parse-collection objects state))) |
323 | (update-parser-state state |
324 | #:result |
325 | (cons |
326 | (make-rdf-triple |
327 | (parser-state-cur-subject state) |
328 | (parser-state-cur-predicate state) |
329 | (parser-state-cur-object state)) |
330 | (parser-state-result state))))) |
331 | (('iri _) |
332 | (let* ((res (parse-iri object state)) |
333 | (iri (assoc-ref res "iri")) |
334 | (state (assoc-ref res "state"))) |
335 | (update-parser-state state |
336 | #:cur-object iri |
337 | #:result |
338 | (cons |
339 | (make-rdf-triple |
340 | (parser-state-cur-subject state) |
341 | (parser-state-cur-predicate state) |
342 | iri) |
343 | (parser-state-result state))))))) |
344 | |
345 | (define (parse-collection collection state) |
346 | (let ((node ((parser-state-blank-node-gen state)))) |
347 | (let loop ((objects collection) |
348 | (new-state |
349 | (update-parser-state state |
350 | #:cur-subject node |
351 | #:cur-predicate |
352 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#first")) |
353 | (previous-object #nil)) |
354 | (match objects |
355 | ('() |
356 | (update-parser-state new-state |
357 | #:cur-object node |
358 | #:cur-subject (parser-state-cur-subject state) |
359 | #:cur-predicate (parser-state-cur-predicate state) |
360 | #:result |
361 | (cons |
362 | (make-rdf-triple |
363 | previous-object |
364 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" |
365 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") |
366 | (parser-state-result new-state)))) |
367 | ((('object object) objects ...) |
368 | (if (equal? previous-object #nil) |
369 | (let ((new-state (parse-object object new-state))) |
370 | (loop objects new-state node)) |
371 | (let* ((node ((parser-state-blank-node-gen new-state))) |
372 | (new-state |
373 | (update-parser-state new-state |
374 | #:cur-subject node)) |
375 | (new-state (parse-object object new-state))) |
376 | (loop |
377 | objects |
378 | (update-parser-state new-state |
379 | #:result |
380 | (cons |
381 | (make-rdf-triple |
382 | previous-object |
383 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" |
384 | node) |
385 | (parser-state-result new-state))) |
386 | node)))))))) |
387 | |
388 | (define (parse-object-list ol state) |
389 | (let loop ((ol ol) (state state)) |
390 | (match ol |
391 | ('() state) |
392 | ((('object object) ol ...) |
393 | (loop ol (parse-object object state))) |
394 | ((ol) |
395 | (loop ol state))))) |
396 | |
397 | (define (parse-predicate-object po state) |
398 | (let loop ((po po) (state state)) |
399 | (match po |
400 | ((('verb verb) ('object-list ol ...) po) |
401 | (let* ((verb (parse-verb verb state)) |
402 | (state (assoc-ref verb "state")) |
403 | (verb (assoc-ref verb "verb")) |
404 | (new-state (update-parser-state state #:cur-predicate verb)) |
405 | (res (parse-object-list ol new-state))) |
406 | (loop po res))) |
407 | ((('verb verb) ('object-list ol ...)) |
408 | (let* ((verb (parse-verb verb state)) |
409 | (state (assoc-ref verb "state")) |
410 | (verb (assoc-ref verb "verb")) |
411 | (new-state (update-parser-state state #:cur-predicate verb)) |
412 | (res (parse-object-list ol new-state))) |
413 | res)) |
414 | (((('verb verb) ('object-list ol ...)) po ...) |
415 | (let* ((verb (parse-verb verb state)) |
416 | (state (assoc-ref verb "state")) |
417 | (verb (assoc-ref verb "verb")) |
418 | (new-state (update-parser-state state #:cur-predicate verb)) |
419 | (res (parse-object-list ol new-state))) |
420 | (loop po res))) |
421 | ('() state) |
422 | ((po) |
423 | (loop po state))))) |
424 | |
425 | (define (parse-subject s state) |
426 | (match s |
427 | (('iri _ ...) |
428 | (let ((res (parse-iri s state))) |
429 | `(("subject" . ,(assoc-ref res "iri")) |
430 | ("state" . ,(assoc-ref res "state"))))) |
431 | (('collection objects ...) |
432 | (let ((res (parse-collection objects state))) |
433 | `(("subject" . ,(parser-state-cur-object res)) |
434 | ("state" . ,res)))) |
435 | ('collection |
436 | `(("subject" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") |
437 | ("state" . ,state))) |
438 | (('blank-node ('anon _)) |
439 | (let ((node ((parser-state-blank-node-gen state)))) |
440 | `(("subject" . ,node) |
441 | ("state" . ,state)))) |
442 | (('blank-node ('blank-node-label label)) |
443 | (if (assoc-ref (parser-state-bnode-labels state) label) |
444 | `(("subject" . ,(assoc-ref (parser-state-bnode-labels state) label)) |
445 | ("state" . ,state)) |
446 | (let ((node ((parser-state-blank-node-gen state)))) |
447 | `(("subject" . ,node) |
448 | ("state" . ,(update-parser-state state |
449 | #:bnode-labels |
450 | (cons |
451 | (cons label node) |
452 | (parser-state-bnode-labels state)))))))))) |
453 | |
454 | (define (parse-triples t state) |
455 | (match t |
456 | ((('subject subject) ('predicate-object-list predicate-object ...)) |
457 | (let* ((res (parse-subject subject state)) |
458 | (subject (assoc-ref res "subject")) |
459 | (state (assoc-ref res "state")) |
460 | (state (update-parser-state state |
461 | #:cur-subject subject))) |
462 | (parse-predicate-object predicate-object state))) |
463 | ((('blank-node-property-list ('predicate-object-list po ...)) |
464 | ('predicate-object-list predicate-object ...)) |
465 | (let* ((subject ((parser-state-blank-node-gen state))) |
466 | (new-state (parse-predicate-object |
467 | po (update-parser-state state #:cur-subject subject)))) |
468 | (parse-predicate-object predicate-object new-state))) |
469 | ((('blank-node-property-list ('predicate-object-list po ...))) |
470 | (let* ((subject ((parser-state-blank-node-gen state)))) |
471 | (parse-predicate-object po (update-parser-state state |
472 | #:cur-subject subject)))))) |
473 | |
474 | (define (parse-turtle-doc parse-tree state) |
475 | (let loop ((parse-tree parse-tree) (state state)) |
476 | (match parse-tree |
477 | ('() (parser-state-result state)) |
478 | ((('prefix-id ('pname-ns ns) ('iriref iri ...)) parse-tree ...) |
479 | (loop parse-tree |
480 | (add-ns-to-state |
481 | state ns (resolve-iri (parser-state-base-uri state) |
482 | (parse-string iri))))) |
483 | ((('prefix-id ('pname-ns ('iriref iri ...))) parse-tree ...) |
484 | (loop parse-tree |
485 | (add-ns-to-state |
486 | state "" (resolve-iri (parser-state-base-uri state) |
487 | (parse-string iri))))) |
488 | ((('sparql-prefix ('pname-ns ns) ('iriref iri ...)) parse-tree ...) |
489 | (loop parse-tree |
490 | (add-ns-to-state |
491 | state ns (resolve-iri (parser-state-base-uri state) |
492 | (parse-string iri))))) |
493 | ((('sparql-prefix ('pname-ns ('iriref iri ...))) parse-tree ...) |
494 | (loop parse-tree |
495 | (add-ns-to-state |
496 | state "" (resolve-iri (parser-state-base-uri state) |
497 | (parse-string iri))))) |
498 | ((('base ('iriref iri ...)) parse-tree ...) |
499 | (loop parse-tree |
500 | (update-parser-state |
501 | state #:base-uri (resolve-iri (parser-state-base-uri state) |
502 | (parse-string iri))))) |
503 | ((('sparql-base ('iriref iri ...)) parse-tree ...) |
504 | (loop parse-tree |
505 | (update-parser-state |
506 | state #:base-uri (resolve-iri (parser-state-base-uri state) |
507 | (parse-string iri))))) |
508 | ((('triples t ...) parse-tree ...) |
509 | (let ((res (parse-triples t state))) |
510 | (loop parse-tree (parse-triples t state)))) |
511 | ;; otherwise, it's a single element, not a list of statements |
512 | (((? symbol? _) _ ...) (loop (list parse-tree) state))))) |
513 | |
514 | (define (tordf parse-tree base) |
515 | (define state |
516 | (make-parser-state base '() '() #f #f #f (create-generate-blank-node) '())) |
517 | (parse-turtle-doc parse-tree state)) |
518 | |
519 | (define (turtle->rdf str-or-file base) |
520 | (define str |
521 | (cond |
522 | ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all)) |
523 | ((string? str-or-file) str-or-file))) |
524 | |
525 | (let ((parse-tree (parse-turtle str))) |
526 | (tordf parse-tree base))) |
527 |