tordf.scm
1 | ;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu> |
2 | ;;;; |
3 | ;;;; This library is free software; you can redistribute it and/or |
4 | ;;;; modify it under the terms of the GNU Lesser General Public |
5 | ;;;; License as published by the Free Software Foundation; either |
6 | ;;;; version 3 of the License, or (at your option) any later version. |
7 | ;;;; |
8 | ;;;; This library is distributed in the hope that it will be useful, |
9 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | ;;;; Lesser General Public License for more details. |
12 | ;;;; |
13 | ;;;; You should have received a copy of the GNU Lesser General Public |
14 | ;;;; License along with this library; if not, write to the Free Software |
15 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
16 | ;;;; |
17 | |
18 | (define-module (turtle tordf) |
19 | #:use-module (ice-9 match) |
20 | #:use-module (ice-9 textual-ports) |
21 | #:use-module (iri iri) |
22 | #:use-module (turtle parser) |
23 | #:use-module (srfi srfi-9) |
24 | #:use-module (rdf rdf) |
25 | #:export (turtle->rdf)) |
26 | |
27 | (define-record-type parser-state |
28 | (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate |
29 | blank-node-gen result) |
30 | parser-state? |
31 | (base-uri parser-state-base-uri) |
32 | (namespaces parser-state-namespaces) |
33 | (bnode-labels parser-state-bnode-labels) |
34 | (cur-subject parser-state-cur-subject) |
35 | (cur-predicate parser-state-cur-predicate) |
36 | (blank-node-gen parser-state-blank-node-gen) |
37 | (result parser-state-result)) |
38 | |
39 | (define* (update-parser-state |
40 | state #:key (base-uri (parser-state-base-uri state)) |
41 | (namespaces (parser-state-namespaces state)) |
42 | (bnode-labels (parser-state-bnode-labels state)) |
43 | (cur-subject (parser-state-cur-subject state)) |
44 | (cur-predicate (parser-state-cur-predicate state)) |
45 | (blank-node-gen (parser-state-blank-node-gen state)) |
46 | (result (parser-state-result state))) |
47 | (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate |
48 | blank-node-gen result)) |
49 | |
50 | (define (create-generate-blank-node) |
51 | (define num 0) |
52 | (lambda () |
53 | (set! num (+ num 1)) |
54 | num)) |
55 | |
56 | (define (add-ns-to-state state ns iri) |
57 | (pk 'iri iri) |
58 | (update-parser-state state |
59 | #:namespaces (cons (cons ns iri) (parser-state-namespaces state)))) |
60 | |
61 | |
62 | (define (parse-iri iri state) |
63 | (format #t "iri: ~a~%" iri) |
64 | (match iri |
65 | (('iri ('prefixed-name ('pname-ln ('pname-ns ns) suffix))) |
66 | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns) |
67 | suffix)) |
68 | ("state" . ,state))) |
69 | (('iri ('prefixed-name ('pname-ln ('pname-ns suffix)))) |
70 | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "") |
71 | suffix)) |
72 | ("state" . ,state))) |
73 | (('iri ('prefixed-name 'pname-ns)) |
74 | `(("iri" . ,(assoc-ref (parser-state-namespaces state) "")) |
75 | ("state" . ,state))) |
76 | (('iri 'iriref) |
77 | `(("iri" . ,(resolve-iri (parser-state-base-uri state) "")) |
78 | ("state" . ,state))) |
79 | (('iri ('iriref iri)) |
80 | `(("iri" . ,(resolve-iri (parser-state-base-uri state) iri)) |
81 | ("state" . ,state))) |
82 | (('blank-node ('blank-node-label label)) |
83 | (if (assoc-ref (parser-state-bnode-labels state) label) |
84 | `(("iri" . ,(assoc-ref (parser-state-bnode-labels state) label)) |
85 | ("state" . ,state)) |
86 | (let ((node ((parser-state-blank-node-gen state)))) |
87 | `(("iri" . ,node) |
88 | ("state" . ,(update-parser-state state |
89 | #:bnode-labels |
90 | (cons |
91 | (cons label node) |
92 | (parser-state-bnode-labels state)))))))))) |
93 | |
94 | (define (parse-verb verb state) |
95 | (match verb |
96 | ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") |
97 | ("state" . ,state))) |
98 | (('predicate iri) |
99 | (let ((res (parse-iri iri state))) |
100 | `(("verb" . ,(assoc-ref res "iri")) |
101 | ("state" . ,(assoc-ref res "state"))))))) |
102 | |
103 | (define (parse-object object state) |
104 | (pk 'object object) |
105 | (match object |
106 | (('rdf-literal ('string-pat (_ str))) |
107 | (update-parser-state state |
108 | #:result |
109 | (cons |
110 | (make-rdf-triple |
111 | (parser-state-cur-subject state) |
112 | (parser-state-cur-predicate state) |
113 | (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f)) |
114 | (parser-state-result state)))) |
115 | (('rdf-literal ('string-pat (_ str)) ('langtag lang)) |
116 | (update-parser-state state |
117 | #:result |
118 | (cons |
119 | (make-rdf-triple |
120 | (parser-state-cur-subject state) |
121 | (parser-state-cur-predicate state) |
122 | (make-rdf-literal |
123 | str "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" lang)) |
124 | (parser-state-result state)))) |
125 | (('numeric-literal ('integer int)) |
126 | (update-parser-state state |
127 | #:result |
128 | (cons |
129 | (make-rdf-triple |
130 | (parser-state-cur-subject state) |
131 | (parser-state-cur-predicate state) |
132 | (make-rdf-literal int "http://www.w3.org/2001/XMLSchema#integer" #f)) |
133 | (parser-state-result state)))) |
134 | (('boolean-literal bool) |
135 | (update-parser-state state |
136 | #:result |
137 | (cons |
138 | (make-rdf-triple |
139 | (parser-state-cur-subject state) |
140 | (parser-state-cur-predicate state) |
141 | (make-rdf-literal bool "http://www.w3.org/2001/XMLSchema#boolean" #f)) |
142 | (parser-state-result state)))) |
143 | (('blank-node-property-list ('predicate-object-list po ...)) |
144 | (let* ((node ((parser-state-blank-node-gen state))) |
145 | (new-state (parse-predicate-object |
146 | po (update-parser-state state #:cur-subject node)))) |
147 | (update-parser-state new-state |
148 | #:cur-subject (parser-state-cur-subject state) |
149 | #:cur-predicate (parser-state-cur-predicate state) |
150 | #:result |
151 | (cons |
152 | (make-rdf-triple |
153 | (parser-state-cur-subject state) |
154 | (parser-state-cur-predicate state) |
155 | node) |
156 | (parser-state-result new-state))))) |
157 | (('collection objects ...) |
158 | (let loop ((objects objects) (state state)) |
159 | (match objects |
160 | ('() state) |
161 | ((('object object) objects ...) |
162 | (loop objects (parse-object object state)))))) |
163 | (('iri _) |
164 | (let* ((res (parse-iri object state)) |
165 | (iri (assoc-ref res "iri")) |
166 | (state (assoc-ref res "state"))) |
167 | (update-parser-state state |
168 | #:result |
169 | (cons |
170 | (make-rdf-triple |
171 | (parser-state-cur-subject state) |
172 | (parser-state-cur-predicate state) |
173 | iri) |
174 | (parser-state-result state))))))) |
175 | |
176 | (define (parse-object-list ol state) |
177 | (let loop ((ol ol) (state state)) |
178 | (pk 'ol ol) |
179 | (match ol |
180 | ('() state) |
181 | ((('object object) ol ...) |
182 | (loop ol (parse-object object state))) |
183 | ((ol) |
184 | (loop ol state))))) |
185 | |
186 | (define (parse-predicate-object po state) |
187 | (let loop ((po po) (state state)) |
188 | (pk 'po po) |
189 | (match po |
190 | ((('verb verb) ('object-list ol ...) po) |
191 | (let* ((verb (parse-verb verb state)) |
192 | (state (assoc-ref verb "state")) |
193 | (verb (assoc-ref verb "verb")) |
194 | (new-state (update-parser-state state #:cur-predicate verb)) |
195 | (res (parse-object-list ol new-state))) |
196 | (loop po res))) |
197 | ((('verb verb) ('object-list ol ...)) |
198 | (let* ((verb (parse-verb verb state)) |
199 | (state (assoc-ref verb "state")) |
200 | (verb (assoc-ref verb "verb")) |
201 | (new-state (update-parser-state state #:cur-predicate verb)) |
202 | (res (parse-object-list ol new-state))) |
203 | res)) |
204 | (((('verb verb) ('object-list ol ...)) po ...) |
205 | (let* ((verb (parse-verb verb state)) |
206 | (state (assoc-ref verb "state")) |
207 | (verb (assoc-ref verb "verb")) |
208 | (new-state (update-parser-state state #:cur-predicate verb)) |
209 | (res (parse-object-list ol new-state))) |
210 | (loop po res))) |
211 | ('() state) |
212 | ((po) |
213 | (loop po state))))) |
214 | |
215 | (define (parse-triples t state) |
216 | (match t |
217 | ((('subject iri) ('predicate-object-list predicate-object ...)) |
218 | (let* ((res (parse-iri iri state)) |
219 | (iri (assoc-ref res "iri")) |
220 | (state (assoc-ref res "state")) |
221 | (state (update-parser-state state |
222 | #:cur-subject iri))) |
223 | (parse-predicate-object predicate-object state))))) |
224 | |
225 | (define (parse-turtle-doc parse-tree state) |
226 | (let loop ((parse-tree parse-tree) (state state)) |
227 | (match parse-tree |
228 | ('() (parser-state-result state)) |
229 | ((('prefix-id ('pname-ns ns) ('iriref iri)) parse-tree ...) |
230 | (loop parse-tree |
231 | (add-ns-to-state |
232 | state ns (resolve-iri (parser-state-base-uri state) iri)))) |
233 | ((('prefix-id ('pname-ns ('iriref iri))) parse-tree ...) |
234 | (loop parse-tree |
235 | (add-ns-to-state |
236 | state "" (resolve-iri (parser-state-base-uri state) iri)))) |
237 | ((('sparql-prefix ('pname-ns ns) ('iriref iri)) parse-tree ...) |
238 | (loop parse-tree |
239 | (add-ns-to-state |
240 | state ns (resolve-iri (parser-state-base-uri state) iri)))) |
241 | ((('sparql-prefix ('pname-ns ('iriref iri))) parse-tree ...) |
242 | (loop parse-tree |
243 | (add-ns-to-state |
244 | state "" (resolve-iri (parser-state-base-uri state) iri)))) |
245 | ((('base ('iriref iri)) parse-tree ...) |
246 | (loop parse-tree |
247 | (update-parser-state |
248 | state #:base-uri (resolve-iri (parser-state-base-uri state) iri)))) |
249 | ((('sparql-base ('iriref iri)) parse-tree ...) |
250 | (loop parse-tree |
251 | (update-parser-state |
252 | state #:base-uri (resolve-iri iri (parser-state-base-uri state))))) |
253 | ((('triples t ...) parse-tree ...) |
254 | (format #t "triples: ~a~%" t) |
255 | (let ((res (parse-triples t state))) |
256 | (loop parse-tree (parse-triples t state)))) |
257 | ;; otherwise, it's a single element, not a list of statements |
258 | (((? symbol? _) _ ...) (loop (list parse-tree) state))))) |
259 | |
260 | (define (tordf parse-tree base) |
261 | (define state |
262 | (make-parser-state base '() '() #f #f (create-generate-blank-node) '())) |
263 | (parse-turtle-doc parse-tree state)) |
264 | |
265 | (define (turtle->rdf str-or-file base) |
266 | (define str |
267 | (cond |
268 | ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all)) |
269 | ((string? str-or-file) str-or-file))) |
270 | |
271 | (let ((parse-tree (parse-turtle str))) |
272 | (tordf parse-tree base))) |
273 |