tordf.scm
| 1 | ;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu> |
| 2 | ;;;; |
| 3 | ;;;; This library is free software; you can redistribute it and/or |
| 4 | ;;;; modify it under the terms of the GNU Lesser General Public |
| 5 | ;;;; License as published by the Free Software Foundation; either |
| 6 | ;;;; version 3 of the License, or (at your option) any later version. |
| 7 | ;;;; |
| 8 | ;;;; This library is distributed in the hope that it will be useful, |
| 9 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 11 | ;;;; Lesser General Public License for more details. |
| 12 | ;;;; |
| 13 | ;;;; You should have received a copy of the GNU Lesser General Public |
| 14 | ;;;; License along with this library; if not, write to the Free Software |
| 15 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 16 | ;;;; |
| 17 | |
| 18 | (define-module (turtle tordf) |
| 19 | #:use-module (ice-9 match) |
| 20 | #:use-module (ice-9 textual-ports) |
| 21 | #:use-module (iri iri) |
| 22 | #:use-module (turtle parser) |
| 23 | #:use-module (srfi srfi-9) |
| 24 | #:use-module (rdf rdf) |
| 25 | #:export (turtle->rdf)) |
| 26 | |
| 27 | (define-record-type parser-state |
| 28 | (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate |
| 29 | cur-object blank-node-gen result) |
| 30 | parser-state? |
| 31 | (base-uri parser-state-base-uri) |
| 32 | (namespaces parser-state-namespaces) |
| 33 | (bnode-labels parser-state-bnode-labels) |
| 34 | (cur-subject parser-state-cur-subject) |
| 35 | (cur-predicate parser-state-cur-predicate) |
| 36 | (cur-object parser-state-cur-object) |
| 37 | (blank-node-gen parser-state-blank-node-gen) |
| 38 | (result parser-state-result)) |
| 39 | |
| 40 | (define* (update-parser-state |
| 41 | state #:key (base-uri (parser-state-base-uri state)) |
| 42 | (namespaces (parser-state-namespaces state)) |
| 43 | (bnode-labels (parser-state-bnode-labels state)) |
| 44 | (cur-subject (parser-state-cur-subject state)) |
| 45 | (cur-predicate (parser-state-cur-predicate state)) |
| 46 | (cur-object (parser-state-cur-object state)) |
| 47 | (blank-node-gen (parser-state-blank-node-gen state)) |
| 48 | (result (parser-state-result state))) |
| 49 | (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate |
| 50 | cur-object blank-node-gen result)) |
| 51 | |
| 52 | (define (create-generate-blank-node) |
| 53 | (define num 0) |
| 54 | (lambda () |
| 55 | (set! num (+ num 1)) |
| 56 | num)) |
| 57 | |
| 58 | (define (add-ns-to-state state ns iri) |
| 59 | (update-parser-state state |
| 60 | #:namespaces (cons (cons ns iri) (parser-state-namespaces state)))) |
| 61 | |
| 62 | (define (parse-string str) |
| 63 | (match str |
| 64 | ((? string? str) str) |
| 65 | ((component str ...) |
| 66 | (match component |
| 67 | ((? string? str1) |
| 68 | (string-append str1 (parse-string str))) |
| 69 | (('uchar n) |
| 70 | (string-append (string (integer->char (string->number n 16))) |
| 71 | (parse-string str))) |
| 72 | (('echar e) |
| 73 | (string-append |
| 74 | (match e |
| 75 | ("\\t" "\t") |
| 76 | ("\\b" "\b") |
| 77 | ("\\n" "\n") |
| 78 | ("\\r" "\r") |
| 79 | ("\\f" "\f") |
| 80 | ("\\\\" "\\") |
| 81 | ("\\\"" "\"") |
| 82 | ("\\'" "'")) |
| 83 | (parse-string str))))) |
| 84 | (() ""))) |
| 85 | |
| 86 | (define (parse-iri iri state) |
| 87 | (pk 'iri iri) |
| 88 | (match iri |
| 89 | (('iri ('prefixed-name ('pname-ln ('pname-ns ns) ('pn-local suffix)))) |
| 90 | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns) |
| 91 | suffix)) |
| 92 | ("state" . ,state))) |
| 93 | (('iri ('prefixed-name ('pname-ln ('pname-ns ('pn-local suffix))))) |
| 94 | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "") |
| 95 | suffix)) |
| 96 | ("state" . ,state))) |
| 97 | (('iri ('prefixed-name ('pname-ns ns))) |
| 98 | `(("iri" . ,(assoc-ref (parser-state-namespaces state) ns)) |
| 99 | ("state" . ,state))) |
| 100 | (('iri ('prefixed-name 'pname-ns)) |
| 101 | `(("iri" . ,(assoc-ref (parser-state-namespaces state) "")) |
| 102 | ("state" . ,state))) |
| 103 | (('iri 'iriref) |
| 104 | `(("iri" . ,(resolve-iri (parser-state-base-uri state) "")) |
| 105 | ("state" . ,state))) |
| 106 | (('iri ('iriref iri ...)) |
| 107 | `(("iri" . ,(resolve-iri (parser-state-base-uri state) (parse-string iri))) |
| 108 | ("state" . ,state))))) |
| 109 | |
| 110 | (define (parse-verb verb state) |
| 111 | (match verb |
| 112 | ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") |
| 113 | ("state" . ,state))) |
| 114 | (('predicate iri) |
| 115 | (let ((res (parse-iri iri state))) |
| 116 | `(("verb" . ,(assoc-ref res "iri")) |
| 117 | ("state" . ,(assoc-ref res "state"))))))) |
| 118 | |
| 119 | (define (parse-object object state) |
| 120 | (match object |
| 121 | (('rdf-literal ('string-pat (_ str ...))) |
| 122 | (let ((object |
| 123 | (make-rdf-literal (parse-string str) |
| 124 | "http://www.w3.org/2001/XMLSchema#string" #f))) |
| 125 | (update-parser-state state |
| 126 | #:cur-object object |
| 127 | #:result |
| 128 | (cons |
| 129 | (make-rdf-triple |
| 130 | (parser-state-cur-subject state) |
| 131 | (parser-state-cur-predicate state) |
| 132 | (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f)) |
| 133 | (parser-state-result state))))) |
| 134 | (('rdf-literal ('string-pat (_ str ...)) ("^^" iri)) |
| 135 | (let* ((res (parse-iri iri state)) |
| 136 | (iri (assoc-ref res "iri")) |
| 137 | (state (assoc-ref res "state")) |
| 138 | (object (make-rdf-literal (parse-string str) iri #f))) |
| 139 | (update-parser-state state |
| 140 | #:cur-object object |
| 141 | #:result |
| 142 | (cons |
| 143 | (make-rdf-triple |
| 144 | (parser-state-cur-subject state) |
| 145 | (parser-state-cur-predicate state) |
| 146 | (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f)) |
| 147 | (parser-state-result state))))) |
| 148 | (('rdf-literal ('string-pat (_ str ...)) ('langtag lang)) |
| 149 | (let ((object |
| 150 | (make-rdf-literal |
| 151 | (parse-string str) |
| 152 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" |
| 153 | lang))) |
| 154 | (update-parser-state state |
| 155 | #:cur-object object |
| 156 | #:result |
| 157 | (cons |
| 158 | (make-rdf-triple |
| 159 | (parser-state-cur-subject state) |
| 160 | (parser-state-cur-predicate state) |
| 161 | object) |
| 162 | (parser-state-result state))))) |
| 163 | (('numeric-literal ('decimal num)) |
| 164 | (let ((object |
| 165 | (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#decimal" #f))) |
| 166 | (update-parser-state state |
| 167 | #:cur-object object |
| 168 | #:result |
| 169 | (cons |
| 170 | (make-rdf-triple |
| 171 | (parser-state-cur-subject state) |
| 172 | (parser-state-cur-predicate state) |
| 173 | object) |
| 174 | (parser-state-result state))))) |
| 175 | (('numeric-literal ('integer int)) |
| 176 | (let ((object |
| 177 | (make-rdf-literal int "http://www.w3.org/2001/XMLSchema#integer" #f))) |
| 178 | (update-parser-state state |
| 179 | #:cur-object object |
| 180 | #:result |
| 181 | (cons |
| 182 | (make-rdf-triple |
| 183 | (parser-state-cur-subject state) |
| 184 | (parser-state-cur-predicate state) |
| 185 | object) |
| 186 | (parser-state-result state))))) |
| 187 | (('numeric-literal ('double num)) |
| 188 | (let ((object |
| 189 | (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#double" #f))) |
| 190 | (update-parser-state state |
| 191 | #:cur-object object |
| 192 | #:result |
| 193 | (cons |
| 194 | (make-rdf-triple |
| 195 | (parser-state-cur-subject state) |
| 196 | (parser-state-cur-predicate state) |
| 197 | object) |
| 198 | (parser-state-result state))))) |
| 199 | (('boolean-literal bool) |
| 200 | (let ((object |
| 201 | (make-rdf-literal bool "http://www.w3.org/2001/XMLSchema#boolean" #f))) |
| 202 | (update-parser-state state |
| 203 | #:cur-object object |
| 204 | #:result |
| 205 | (cons |
| 206 | (make-rdf-triple |
| 207 | (parser-state-cur-subject state) |
| 208 | (parser-state-cur-predicate state) |
| 209 | object) |
| 210 | (parser-state-result state))))) |
| 211 | (('blank-node ('anon _)) |
| 212 | (let ((node ((parser-state-blank-node-gen state)))) |
| 213 | (update-parser-state state |
| 214 | #:cur-object node |
| 215 | #:result |
| 216 | (cons |
| 217 | (make-rdf-triple |
| 218 | (parser-state-cur-subject state) |
| 219 | (parser-state-cur-predicate state) |
| 220 | node) |
| 221 | (parser-state-result state))))) |
| 222 | (('blank-node ('blank-node-label label)) |
| 223 | (let* ((node |
| 224 | (or (assoc-ref (parser-state-bnode-labels state) label) |
| 225 | ((parser-state-blank-node-gen state))))) |
| 226 | (update-parser-state state |
| 227 | #:cur-object node |
| 228 | #:result |
| 229 | (cons |
| 230 | (make-rdf-triple |
| 231 | (parser-state-cur-subject state) |
| 232 | (parser-state-cur-predicate state) |
| 233 | node) |
| 234 | (parser-state-result state))))) |
| 235 | (('blank-node-property-list ('predicate-object-list po ...)) |
| 236 | (let* ((node ((parser-state-blank-node-gen state))) |
| 237 | (new-state (parse-predicate-object |
| 238 | po (update-parser-state state #:cur-subject node)))) |
| 239 | (update-parser-state new-state |
| 240 | #:cur-object node |
| 241 | #:cur-subject (parser-state-cur-subject state) |
| 242 | #:cur-predicate (parser-state-cur-predicate state) |
| 243 | #:result |
| 244 | (cons |
| 245 | (make-rdf-triple |
| 246 | (parser-state-cur-subject state) |
| 247 | (parser-state-cur-predicate state) |
| 248 | node) |
| 249 | (parser-state-result new-state))))) |
| 250 | ('collection |
| 251 | (let ((object "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")) |
| 252 | (update-parser-state state |
| 253 | #:cur-object object |
| 254 | #:result |
| 255 | (cons |
| 256 | (make-rdf-triple |
| 257 | (parser-state-cur-subject state) |
| 258 | (parser-state-cur-predicate state) |
| 259 | object) |
| 260 | (parser-state-result state))))) |
| 261 | (('collection objects ...) |
| 262 | (let ((state (parse-collection objects state))) |
| 263 | (update-parser-state state |
| 264 | #:result |
| 265 | (cons |
| 266 | (make-rdf-triple |
| 267 | (parser-state-cur-subject state) |
| 268 | (parser-state-cur-predicate state) |
| 269 | (parser-state-cur-object state)) |
| 270 | (parser-state-result state))))) |
| 271 | (('iri _) |
| 272 | (let* ((res (parse-iri object state)) |
| 273 | (iri (assoc-ref res "iri")) |
| 274 | (state (assoc-ref res "state"))) |
| 275 | (update-parser-state state |
| 276 | #:cur-object iri |
| 277 | #:result |
| 278 | (cons |
| 279 | (make-rdf-triple |
| 280 | (parser-state-cur-subject state) |
| 281 | (parser-state-cur-predicate state) |
| 282 | iri) |
| 283 | (parser-state-result state))))))) |
| 284 | |
| 285 | (define (parse-collection collection state) |
| 286 | (let ((node ((parser-state-blank-node-gen state)))) |
| 287 | (let loop ((objects collection) |
| 288 | (new-state |
| 289 | (update-parser-state state |
| 290 | #:cur-subject node |
| 291 | #:cur-predicate |
| 292 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#first")) |
| 293 | (previous-object #nil)) |
| 294 | (match objects |
| 295 | ('() |
| 296 | (update-parser-state new-state |
| 297 | #:cur-object node |
| 298 | #:cur-subject (parser-state-cur-subject state) |
| 299 | #:cur-predicate (parser-state-cur-predicate state) |
| 300 | #:result |
| 301 | (cons |
| 302 | (make-rdf-triple |
| 303 | previous-object |
| 304 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" |
| 305 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") |
| 306 | (parser-state-result new-state)))) |
| 307 | ((('object object) objects ...) |
| 308 | (if (equal? previous-object #nil) |
| 309 | (let ((new-state (parse-object object new-state))) |
| 310 | (loop objects new-state node)) |
| 311 | (let* ((node ((parser-state-blank-node-gen new-state))) |
| 312 | (new-state |
| 313 | (update-parser-state new-state |
| 314 | #:cur-subject node)) |
| 315 | (new-state (parse-object object new-state))) |
| 316 | (loop |
| 317 | objects |
| 318 | (update-parser-state new-state |
| 319 | #:result |
| 320 | (cons |
| 321 | (make-rdf-triple |
| 322 | previous-object |
| 323 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" |
| 324 | node) |
| 325 | (parser-state-result new-state))) |
| 326 | node)))))))) |
| 327 | |
| 328 | (define (parse-object-list ol state) |
| 329 | (let loop ((ol ol) (state state)) |
| 330 | (match ol |
| 331 | ('() state) |
| 332 | ((('object object) ol ...) |
| 333 | (loop ol (parse-object object state))) |
| 334 | ((ol) |
| 335 | (loop ol state))))) |
| 336 | |
| 337 | (define (parse-predicate-object po state) |
| 338 | (let loop ((po po) (state state)) |
| 339 | (match po |
| 340 | ((('verb verb) ('object-list ol ...) po) |
| 341 | (let* ((verb (parse-verb verb state)) |
| 342 | (state (assoc-ref verb "state")) |
| 343 | (verb (assoc-ref verb "verb")) |
| 344 | (new-state (update-parser-state state #:cur-predicate verb)) |
| 345 | (res (parse-object-list ol new-state))) |
| 346 | (loop po res))) |
| 347 | ((('verb verb) ('object-list ol ...)) |
| 348 | (let* ((verb (parse-verb verb state)) |
| 349 | (state (assoc-ref verb "state")) |
| 350 | (verb (assoc-ref verb "verb")) |
| 351 | (new-state (update-parser-state state #:cur-predicate verb)) |
| 352 | (res (parse-object-list ol new-state))) |
| 353 | res)) |
| 354 | (((('verb verb) ('object-list ol ...)) po ...) |
| 355 | (let* ((verb (parse-verb verb state)) |
| 356 | (state (assoc-ref verb "state")) |
| 357 | (verb (assoc-ref verb "verb")) |
| 358 | (new-state (update-parser-state state #:cur-predicate verb)) |
| 359 | (res (parse-object-list ol new-state))) |
| 360 | (loop po res))) |
| 361 | ('() state) |
| 362 | ((po) |
| 363 | (loop po state))))) |
| 364 | |
| 365 | (define (parse-subject s state) |
| 366 | (match s |
| 367 | (('iri _ ...) |
| 368 | (let ((res (parse-iri s state))) |
| 369 | `(("subject" . ,(assoc-ref res "iri")) |
| 370 | ("state" . ,(assoc-ref res "state"))))) |
| 371 | (('collection objects ...) |
| 372 | (let ((res (parse-collection objects state))) |
| 373 | `(("subject" . ,(parser-state-cur-object res)) |
| 374 | ("state" . ,res)))) |
| 375 | ('collection |
| 376 | `(("subject" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") |
| 377 | ("state" . ,state))) |
| 378 | (('blank-node ('anon _)) |
| 379 | (let ((node ((parser-state-blank-node-gen state)))) |
| 380 | `(("subject" . ,node) |
| 381 | ("state" . ,state)))) |
| 382 | (('blank-node ('blank-node-label label)) |
| 383 | (if (assoc-ref (parser-state-bnode-labels state) label) |
| 384 | `(("subject" . ,(assoc-ref (parser-state-bnode-labels state) label)) |
| 385 | ("state" . ,state)) |
| 386 | (let ((node ((parser-state-blank-node-gen state)))) |
| 387 | `(("subject" . ,node) |
| 388 | ("state" . ,(update-parser-state state |
| 389 | #:bnode-labels |
| 390 | (cons |
| 391 | (cons label node) |
| 392 | (parser-state-bnode-labels state)))))))))) |
| 393 | |
| 394 | (define (parse-triples t state) |
| 395 | (match t |
| 396 | ((('subject subject) ('predicate-object-list predicate-object ...)) |
| 397 | (let* ((res (parse-subject subject state)) |
| 398 | (subject (assoc-ref res "subject")) |
| 399 | (state (assoc-ref res "state")) |
| 400 | (state (update-parser-state state |
| 401 | #:cur-subject subject))) |
| 402 | (parse-predicate-object predicate-object state))) |
| 403 | ((('blank-node-property-list ('predicate-object-list po ...)) |
| 404 | ('predicate-object-list predicate-object ...)) |
| 405 | (let* ((subject ((parser-state-blank-node-gen state))) |
| 406 | (new-state (parse-predicate-object |
| 407 | po (update-parser-state state #:cur-subject subject)))) |
| 408 | (parse-predicate-object predicate-object new-state))) |
| 409 | ((('blank-node-property-list ('predicate-object-list po ...))) |
| 410 | (let* ((subject ((parser-state-blank-node-gen state)))) |
| 411 | (parse-predicate-object po (update-parser-state state |
| 412 | #:cur-subject subject)))))) |
| 413 | |
| 414 | (define (parse-turtle-doc parse-tree state) |
| 415 | (let loop ((parse-tree parse-tree) (state state)) |
| 416 | (match parse-tree |
| 417 | ('() (parser-state-result state)) |
| 418 | ((('prefix-id ('pname-ns ns) ('iriref iri ...)) parse-tree ...) |
| 419 | (loop parse-tree |
| 420 | (add-ns-to-state |
| 421 | state ns (resolve-iri (parser-state-base-uri state) |
| 422 | (parse-string iri))))) |
| 423 | ((('prefix-id ('pname-ns ('iriref iri ...))) parse-tree ...) |
| 424 | (loop parse-tree |
| 425 | (add-ns-to-state |
| 426 | state "" (resolve-iri (parser-state-base-uri state) |
| 427 | (parse-string iri))))) |
| 428 | ((('sparql-prefix ('pname-ns ns) ('iriref iri ...)) parse-tree ...) |
| 429 | (loop parse-tree |
| 430 | (add-ns-to-state |
| 431 | state ns (resolve-iri (parser-state-base-uri state) |
| 432 | (parse-string iri))))) |
| 433 | ((('sparql-prefix ('pname-ns ('iriref iri ...))) parse-tree ...) |
| 434 | (loop parse-tree |
| 435 | (add-ns-to-state |
| 436 | state "" (resolve-iri (parser-state-base-uri state) |
| 437 | (parse-string iri))))) |
| 438 | ((('base ('iriref iri ...)) parse-tree ...) |
| 439 | (loop parse-tree |
| 440 | (update-parser-state |
| 441 | state #:base-uri (resolve-iri (parser-state-base-uri state) |
| 442 | (parse-string iri))))) |
| 443 | ((('sparql-base ('iriref iri ...)) parse-tree ...) |
| 444 | (loop parse-tree |
| 445 | (update-parser-state |
| 446 | state #:base-uri (resolve-iri (parser-state-base-uri state) |
| 447 | (parse-string iri))))) |
| 448 | ((('triples t ...) parse-tree ...) |
| 449 | (let ((res (parse-triples t state))) |
| 450 | (loop parse-tree (parse-triples t state)))) |
| 451 | ;; otherwise, it's a single element, not a list of statements |
| 452 | (((? symbol? _) _ ...) (loop (list parse-tree) state))))) |
| 453 | |
| 454 | (define (tordf parse-tree base) |
| 455 | (define state |
| 456 | (make-parser-state base '() '() #f #f #f (create-generate-blank-node) '())) |
| 457 | (parse-turtle-doc parse-tree state)) |
| 458 | |
| 459 | (define (turtle->rdf str-or-file base) |
| 460 | (define str |
| 461 | (cond |
| 462 | ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all)) |
| 463 | ((string? str-or-file) str-or-file))) |
| 464 | |
| 465 | (let ((parse-tree (parse-turtle str))) |
| 466 | (tordf parse-tree base))) |
| 467 |