tordf.scm
1 | ;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu> |
2 | ;;;; |
3 | ;;;; This library is free software; you can redistribute it and/or |
4 | ;;;; modify it under the terms of the GNU Lesser General Public |
5 | ;;;; License as published by the Free Software Foundation; either |
6 | ;;;; version 3 of the License, or (at your option) any later version. |
7 | ;;;; |
8 | ;;;; This library is distributed in the hope that it will be useful, |
9 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | ;;;; Lesser General Public License for more details. |
12 | ;;;; |
13 | ;;;; You should have received a copy of the GNU Lesser General Public |
14 | ;;;; License along with this library; if not, write to the Free Software |
15 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
16 | ;;;; |
17 | |
18 | (define-module (turtle tordf) |
19 | #:use-module (ice-9 match) |
20 | #:use-module (ice-9 textual-ports) |
21 | #:use-module (iri iri) |
22 | #:use-module (turtle parser) |
23 | #:use-module (srfi srfi-9) |
24 | #:use-module (rdf rdf) |
25 | #:use-module ((rdf xsd) #:prefix xsd:) |
26 | #:export (turtle->rdf)) |
27 | |
28 | (define-record-type parser-state |
29 | (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate |
30 | blank-node-gen result) |
31 | parser-state? |
32 | (base-uri parser-state-base-uri) |
33 | (namespaces parser-state-namespaces) |
34 | (bnode-labels parser-state-bnode-labels) |
35 | (cur-subject parser-state-cur-subject) |
36 | (cur-predicate parser-state-cur-predicate) |
37 | (blank-node-gen parser-state-blank-node-gen) |
38 | (result parser-state-result)) |
39 | |
40 | (define* (update-parser-state |
41 | state #:key (base-uri (parser-state-base-uri state)) |
42 | (namespaces (parser-state-namespaces state)) |
43 | (bnode-labels (parser-state-bnode-labels state)) |
44 | (cur-subject (parser-state-cur-subject state)) |
45 | (cur-predicate (parser-state-cur-predicate state)) |
46 | (blank-node-gen (parser-state-blank-node-gen state)) |
47 | (result (parser-state-result state))) |
48 | (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate |
49 | blank-node-gen result)) |
50 | |
51 | (define (create-generate-blank-node) |
52 | (define num 0) |
53 | (lambda () |
54 | (set! num (+ num 1)) |
55 | num)) |
56 | |
57 | (define (add-ns-to-state state ns iri) |
58 | (pk 'iri iri) |
59 | (update-parser-state state |
60 | #:namespaces (cons (cons ns iri) (parser-state-namespaces state)))) |
61 | |
62 | |
63 | (define (parse-iri iri state) |
64 | (format #t "iri: ~a~%" iri) |
65 | (match iri |
66 | (('iri ('prefixed-name ('pname-ln ('pname-ns ns) suffix))) |
67 | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns) |
68 | suffix)) |
69 | ("state" . ,state))) |
70 | (('iri ('prefixed-name ('pname-ln ('pname-ns suffix)))) |
71 | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "") |
72 | suffix)) |
73 | ("state" . ,state))) |
74 | (('iri 'iriref) |
75 | `(("iri" . ,(resolve-iri (parser-state-base-uri state) "")) |
76 | ("state" . ,state))) |
77 | (('iri ('iriref iri)) |
78 | `(("iri" . ,(resolve-iri (parser-state-base-uri state) iri)) |
79 | ("state" . ,state))) |
80 | (('blank-node ('blank-node-label label)) |
81 | (if (assoc-ref (parser-state-bnode-labels state) label) |
82 | `(("iri" . ,(assoc-ref (parser-state-bnode-labels state) label)) |
83 | ("state" . ,state)) |
84 | (let ((node ((parser-state-blank-node-gen state)))) |
85 | `(("iri" . ,node) |
86 | ("state" . ,(update-parser-state state |
87 | #:bnode-labels |
88 | (cons |
89 | (cons label node) |
90 | (parser-state-bnode-labels state)))))))))) |
91 | |
92 | (define (parse-verb verb state) |
93 | (match verb |
94 | ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") |
95 | ("state" . ,state))) |
96 | (('predicate iri) |
97 | (let ((res (parse-iri iri state))) |
98 | `(("verb" . ,(assoc-ref res "iri")) |
99 | ("state" . ,(assoc-ref res "state"))))))) |
100 | |
101 | (define (parse-object object state) |
102 | (pk 'object object) |
103 | (match object |
104 | (('rdf-literal ('string-pat ('string-literal-quote str))) |
105 | (update-parser-state state |
106 | #:result |
107 | (cons |
108 | (make-rdf-triple |
109 | (parser-state-cur-subject state) |
110 | (parser-state-cur-predicate state) |
111 | (make-rdf-literal str xsd:string #f)) |
112 | (parser-state-result state)))) |
113 | (('blank-node-property-list ('predicate-object-list po ...)) |
114 | (let* ((node ((parser-state-blank-node-gen state))) |
115 | (new-state (parse-predicate-object |
116 | po (update-parser-state state #:cur-subject node)))) |
117 | (update-parser-state new-state |
118 | #:cur-subject (parser-state-cur-subject state) |
119 | #:cur-predicate (parser-state-cur-predicate state) |
120 | #:result |
121 | (cons |
122 | (make-rdf-triple |
123 | (parser-state-cur-subject state) |
124 | (parser-state-cur-predicate state) |
125 | node) |
126 | (parser-state-result new-state))))) |
127 | (('collection objects ...) |
128 | (let loop ((objects objects) (state state)) |
129 | (match objects |
130 | ('() state) |
131 | ((('object object) objects ...) |
132 | (loop objects (parse-object object state)))))) |
133 | (('iri _) |
134 | (let* ((res (parse-iri object state)) |
135 | (iri (assoc-ref res "iri")) |
136 | (state (assoc-ref res "state"))) |
137 | (update-parser-state state |
138 | #:result |
139 | (cons |
140 | (make-rdf-triple |
141 | (parser-state-cur-subject state) |
142 | (parser-state-cur-predicate state) |
143 | iri) |
144 | (parser-state-result state))))))) |
145 | |
146 | (define (parse-object-list ol state) |
147 | (let loop ((ol ol) (state state)) |
148 | (pk 'ol ol) |
149 | (match ol |
150 | ('() state) |
151 | ((('object object) ol ...) |
152 | (loop ol (parse-object object state))) |
153 | ((ol) |
154 | (loop ol state))))) |
155 | |
156 | (define (parse-predicate-object po state) |
157 | (let loop ((po po) (state state)) |
158 | (pk 'po po) |
159 | (match po |
160 | ((('verb verb) ('object-list ol ...) po) |
161 | (let* ((verb (parse-verb verb state)) |
162 | (state (assoc-ref verb "state")) |
163 | (verb (assoc-ref verb "verb")) |
164 | (new-state (update-parser-state state #:cur-predicate verb)) |
165 | (res (parse-object-list ol new-state))) |
166 | (loop po res))) |
167 | ((('verb verb) ('object-list ol ...)) |
168 | (let* ((verb (parse-verb verb state)) |
169 | (state (assoc-ref verb "state")) |
170 | (verb (assoc-ref verb "verb")) |
171 | (new-state (update-parser-state state #:cur-predicate verb)) |
172 | (res (parse-object-list ol new-state))) |
173 | res)) |
174 | (((('verb verb) ('object-list ol ...)) po ...) |
175 | (let* ((verb (parse-verb verb state)) |
176 | (state (assoc-ref verb "state")) |
177 | (verb (assoc-ref verb "verb")) |
178 | (new-state (update-parser-state state #:cur-predicate verb)) |
179 | (res (parse-object-list ol new-state))) |
180 | (loop po res))) |
181 | ('() state) |
182 | ((po) |
183 | (loop po state))))) |
184 | |
185 | (define (parse-triples t state) |
186 | (match t |
187 | ((('subject iri) ('predicate-object-list predicate-object ...)) |
188 | (let* ((res (parse-iri iri state)) |
189 | (iri (assoc-ref res "iri")) |
190 | (state (assoc-ref res "state")) |
191 | (state (update-parser-state state |
192 | #:cur-subject iri))) |
193 | (parse-predicate-object predicate-object state))))) |
194 | |
195 | (define (parse-turtle-doc parse-tree state) |
196 | (let loop ((parse-tree parse-tree) (state state)) |
197 | (match parse-tree |
198 | ('() (parser-state-result state)) |
199 | ((('prefix-id ('pname-ns ns) ('iriref iri)) parse-tree ...) |
200 | (loop parse-tree |
201 | (add-ns-to-state |
202 | state ns (resolve-iri (parser-state-base-uri state) iri)))) |
203 | ((('prefix-id ('pname-ns ('iriref iri))) parse-tree ...) |
204 | (loop parse-tree |
205 | (add-ns-to-state |
206 | state "" (resolve-iri (parser-state-base-uri state) iri)))) |
207 | ((('sparql-prefix ('pname-ns ns) ('iriref iri)) parse-tree ...) |
208 | (loop parse-tree |
209 | (add-ns-to-state |
210 | state ns (resolve-iri (parser-state-base-uri state) iri)))) |
211 | ((('sparql-prefix ('pname-ns ('iriref iri))) parse-tree ...) |
212 | (loop parse-tree |
213 | (add-ns-to-state |
214 | state "" (resolve-iri (parser-state-base-uri state) iri)))) |
215 | ((('base ('iriref iri)) parse-tree ...) |
216 | (loop parse-tree |
217 | (update-parser-state |
218 | state #:base-uri (resolve-iri (parser-state-base-uri state) iri)))) |
219 | ((('sparql-base ('iriref iri)) parse-tree ...) |
220 | (loop parse-tree |
221 | (update-parser-state |
222 | state #:base-uri (resolve-iri iri (parser-state-base-uri state))))) |
223 | ((('triples t ...) parse-tree ...) |
224 | (format #t "triples: ~a~%" t) |
225 | (let ((res (parse-triples t state))) |
226 | (loop parse-tree (parse-triples t state)))) |
227 | ;; otherwise, it's a single element, not a list of statements |
228 | (((? symbol? _) _ ...) (loop (list parse-tree) state))))) |
229 | |
230 | (define (tordf parse-tree base) |
231 | (define state |
232 | (make-parser-state base '() '() #f #f (create-generate-blank-node) '())) |
233 | (parse-turtle-doc parse-tree state)) |
234 | |
235 | (define (turtle->rdf str-or-file base) |
236 | (define str |
237 | (cond |
238 | ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all)) |
239 | ((string? str-or-file) str-or-file))) |
240 | |
241 | (let ((parse-tree (parse-turtle str))) |
242 | (tordf parse-tree base))) |
243 |