;;;; Copyright (C) 2020 Julien Lepiller ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public ;;;; License as published by the Free Software Foundation; either ;;;; version 3 of the License, or (at your option) any later version. ;;;; ;;;; This library is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;;; Lesser General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU Lesser General Public ;;;; License along with this library; if not, write to the Free Software ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;;;; (define-module (turtle tordf) #:use-module (ice-9 match) #:use-module (ice-9 textual-ports) #:use-module (iri iri) #:use-module (turtle parser) #:use-module (srfi srfi-9) #:use-module (rdf rdf) #:export (turtle->rdf)) (define-record-type parser-state (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate cur-object blank-node-gen result) parser-state? (base-uri parser-state-base-uri) (namespaces parser-state-namespaces) (bnode-labels parser-state-bnode-labels) (cur-subject parser-state-cur-subject) (cur-predicate parser-state-cur-predicate) (cur-object parser-state-cur-object) (blank-node-gen parser-state-blank-node-gen) (result parser-state-result)) (define* (update-parser-state state #:key (base-uri (parser-state-base-uri state)) (namespaces (parser-state-namespaces state)) (bnode-labels (parser-state-bnode-labels state)) (cur-subject (parser-state-cur-subject state)) (cur-predicate (parser-state-cur-predicate state)) (cur-object (parser-state-cur-object state)) (blank-node-gen (parser-state-blank-node-gen state)) (result (parser-state-result state))) (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate cur-object blank-node-gen result)) (define (create-generate-blank-node) (define num 0) (lambda () (set! num (+ num 1)) num)) (define (add-ns-to-state state ns iri) (update-parser-state state #:namespaces (cons (cons ns iri) (parser-state-namespaces state)))) (define (parse-string str) (match str ((? string? str) str) ((component str ...) (match component ((? string? str1) (string-append str1 (parse-string str))) (('uchar n) (string-append (string (integer->char (string->number n 16))) (parse-string str))) (('echar e) (string-append (match e ("\\t" "\t") ("\\b" "\b") ("\\n" "\n") ("\\r" "\r") ("\\f" "\f") ("\\\\" "\\") ("\\\"" "\"") ("\\'" "'")) (parse-string str))))) (() ""))) (define (parse-iri iri state) (match iri (('iri ('prefixed-name ('pname-ln ('pname-ns ns) ('pn-local suffix)))) `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns) suffix)) ("state" . ,state))) (('iri ('prefixed-name ('pname-ln ('pname-ns ('pn-local suffix))))) `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "") suffix)) ("state" . ,state))) (('iri ('prefixed-name ('pname-ns ns))) `(("iri" . ,(assoc-ref (parser-state-namespaces state) ns)) ("state" . ,state))) (('iri ('prefixed-name 'pname-ns)) `(("iri" . ,(assoc-ref (parser-state-namespaces state) "")) ("state" . ,state))) (('iri 'iriref) `(("iri" . ,(resolve-iri (parser-state-base-uri state) "")) ("state" . ,state))) (('iri ('iriref iri ...)) `(("iri" . ,(resolve-iri (parser-state-base-uri state) (parse-string iri))) ("state" . ,state))))) (define (parse-verb verb state) (match verb ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") ("state" . ,state))) (('predicate iri) (let ((res (parse-iri iri state))) `(("verb" . ,(assoc-ref res "iri")) ("state" . ,(assoc-ref res "state"))))))) (define (parse-object object state) (match object (('rdf-literal ('string-pat (_ str ...))) (let ((object (make-rdf-literal (parse-string str) "http://www.w3.org/2001/XMLSchema#string" #f))) (update-parser-state state #:cur-object object #:result (cons (make-rdf-triple (parser-state-cur-subject state) (parser-state-cur-predicate state) (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f)) (parser-state-result state))))) (('rdf-literal ('string-pat (_ str ...)) ("^^" iri)) (let* ((res (parse-iri iri state)) (iri (assoc-ref res "iri")) (state (assoc-ref res "state")) (object (make-rdf-literal (parse-string str) iri #f))) (update-parser-state state #:cur-object object #:result (cons (make-rdf-triple (parser-state-cur-subject state) (parser-state-cur-predicate state) object) (parser-state-result state))))) (('rdf-literal ('string-pat (_ str ...)) ('langtag lang)) (let ((object (make-rdf-literal (parse-string str) "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" lang))) (update-parser-state state #:cur-object object #:result (cons (make-rdf-triple (parser-state-cur-subject state) (parser-state-cur-predicate state) object) (parser-state-result state))))) (('numeric-literal ('decimal num)) (let ((object (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#decimal" #f))) (update-parser-state state #:cur-object object #:result (cons (make-rdf-triple (parser-state-cur-subject state) (parser-state-cur-predicate state) object) (parser-state-result state))))) (('numeric-literal ('integer int)) (let ((object (make-rdf-literal int "http://www.w3.org/2001/XMLSchema#integer" #f))) (update-parser-state state #:cur-object object #:result (cons (make-rdf-triple (parser-state-cur-subject state) (parser-state-cur-predicate state) object) (parser-state-result state))))) (('numeric-literal ('double num)) (let ((object (make-rdf-literal num "http://www.w3.org/2001/XMLSchema#double" #f))) (update-parser-state state #:cur-object object #:result (cons (make-rdf-triple (parser-state-cur-subject state) (parser-state-cur-predicate state) object) (parser-state-result state))))) (('boolean-literal bool) (let ((object (make-rdf-literal bool "http://www.w3.org/2001/XMLSchema#boolean" #f))) (update-parser-state state #:cur-object object #:result (cons (make-rdf-triple (parser-state-cur-subject state) (parser-state-cur-predicate state) object) (parser-state-result state))))) (('blank-node ('anon _)) (let ((node ((parser-state-blank-node-gen state)))) (update-parser-state state #:cur-object node #:result (cons (make-rdf-triple (parser-state-cur-subject state) (parser-state-cur-predicate state) node) (parser-state-result state))))) (('blank-node ('blank-node-label label)) (let* ((node (or (assoc-ref (parser-state-bnode-labels state) label) ((parser-state-blank-node-gen state)))) (state (if (assoc-ref (parser-state-bnode-labels state) label) state (update-parser-state state #:bnode-labels (cons (cons label node) (parser-state-bnode-labels state)))))) (update-parser-state state #:cur-object node #:result (cons (make-rdf-triple (parser-state-cur-subject state) (parser-state-cur-predicate state) node) (parser-state-result state))))) (('blank-node-property-list ('predicate-object-list po ...)) (let* ((node ((parser-state-blank-node-gen state))) (new-state (parse-predicate-object po (update-parser-state state #:cur-subject node)))) (update-parser-state new-state #:cur-object node #:cur-subject (parser-state-cur-subject state) #:cur-predicate (parser-state-cur-predicate state) #:result (cons (make-rdf-triple (parser-state-cur-subject state) (parser-state-cur-predicate state) node) (parser-state-result new-state))))) ('collection (let ((object "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")) (update-parser-state state #:cur-object object #:result (cons (make-rdf-triple (parser-state-cur-subject state) (parser-state-cur-predicate state) object) (parser-state-result state))))) (('collection objects ...) (let ((state (parse-collection objects state))) (update-parser-state state #:result (cons (make-rdf-triple (parser-state-cur-subject state) (parser-state-cur-predicate state) (parser-state-cur-object state)) (parser-state-result state))))) (('iri _) (let* ((res (parse-iri object state)) (iri (assoc-ref res "iri")) (state (assoc-ref res "state"))) (update-parser-state state #:cur-object iri #:result (cons (make-rdf-triple (parser-state-cur-subject state) (parser-state-cur-predicate state) iri) (parser-state-result state))))))) (define (parse-collection collection state) (let ((node ((parser-state-blank-node-gen state)))) (let loop ((objects collection) (new-state (update-parser-state state #:cur-subject node #:cur-predicate "http://www.w3.org/1999/02/22-rdf-syntax-ns#first")) (previous-object #nil)) (match objects ('() (update-parser-state new-state #:cur-object node #:cur-subject (parser-state-cur-subject state) #:cur-predicate (parser-state-cur-predicate state) #:result (cons (make-rdf-triple previous-object "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") (parser-state-result new-state)))) ((('object object) objects ...) (if (equal? previous-object #nil) (let ((new-state (parse-object object new-state))) (loop objects new-state node)) (let* ((node ((parser-state-blank-node-gen new-state))) (new-state (update-parser-state new-state #:cur-subject node)) (new-state (parse-object object new-state))) (loop objects (update-parser-state new-state #:result (cons (make-rdf-triple previous-object "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest" node) (parser-state-result new-state))) node)))))))) (define (parse-object-list ol state) (let loop ((ol ol) (state state)) (match ol ('() state) ((('object object) ol ...) (loop ol (parse-object object state))) ((ol) (loop ol state))))) (define (parse-predicate-object po state) (let loop ((po po) (state state)) (match po ((('verb verb) ('object-list ol ...) po) (let* ((verb (parse-verb verb state)) (state (assoc-ref verb "state")) (verb (assoc-ref verb "verb")) (new-state (update-parser-state state #:cur-predicate verb)) (res (parse-object-list ol new-state))) (loop po res))) ((('verb verb) ('object-list ol ...)) (let* ((verb (parse-verb verb state)) (state (assoc-ref verb "state")) (verb (assoc-ref verb "verb")) (new-state (update-parser-state state #:cur-predicate verb)) (res (parse-object-list ol new-state))) res)) (((('verb verb) ('object-list ol ...)) po ...) (let* ((verb (parse-verb verb state)) (state (assoc-ref verb "state")) (verb (assoc-ref verb "verb")) (new-state (update-parser-state state #:cur-predicate verb)) (res (parse-object-list ol new-state))) (loop po res))) ('() state) ((po) (loop po state))))) (define (parse-subject s state) (match s (('iri _ ...) (let ((res (parse-iri s state))) `(("subject" . ,(assoc-ref res "iri")) ("state" . ,(assoc-ref res "state"))))) (('collection objects ...) (let ((res (parse-collection objects state))) `(("subject" . ,(parser-state-cur-object res)) ("state" . ,res)))) ('collection `(("subject" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") ("state" . ,state))) (('blank-node ('anon _)) (let ((node ((parser-state-blank-node-gen state)))) `(("subject" . ,node) ("state" . ,state)))) (('blank-node ('blank-node-label label)) (if (assoc-ref (parser-state-bnode-labels state) label) `(("subject" . ,(assoc-ref (parser-state-bnode-labels state) label)) ("state" . ,state)) (let ((node ((parser-state-blank-node-gen state)))) `(("subject" . ,node) ("state" . ,(update-parser-state state #:bnode-labels (cons (cons label node) (parser-state-bnode-labels state)))))))))) (define (parse-triples t state) (match t ((('subject subject) ('predicate-object-list predicate-object ...)) (let* ((res (parse-subject subject state)) (subject (assoc-ref res "subject")) (state (assoc-ref res "state")) (state (update-parser-state state #:cur-subject subject))) (parse-predicate-object predicate-object state))) ((('blank-node-property-list ('predicate-object-list po ...)) ('predicate-object-list predicate-object ...)) (let* ((subject ((parser-state-blank-node-gen state))) (new-state (parse-predicate-object po (update-parser-state state #:cur-subject subject)))) (parse-predicate-object predicate-object new-state))) ((('blank-node-property-list ('predicate-object-list po ...))) (let* ((subject ((parser-state-blank-node-gen state)))) (parse-predicate-object po (update-parser-state state #:cur-subject subject)))))) (define (parse-turtle-doc parse-tree state) (let loop ((parse-tree parse-tree) (state state)) (match parse-tree ('() (parser-state-result state)) ((('prefix-id ('pname-ns ns) ('iriref iri ...)) parse-tree ...) (loop parse-tree (add-ns-to-state state ns (resolve-iri (parser-state-base-uri state) (parse-string iri))))) ((('prefix-id ('pname-ns ('iriref iri ...))) parse-tree ...) (loop parse-tree (add-ns-to-state state "" (resolve-iri (parser-state-base-uri state) (parse-string iri))))) ((('sparql-prefix ('pname-ns ns) ('iriref iri ...)) parse-tree ...) (loop parse-tree (add-ns-to-state state ns (resolve-iri (parser-state-base-uri state) (parse-string iri))))) ((('sparql-prefix ('pname-ns ('iriref iri ...))) parse-tree ...) (loop parse-tree (add-ns-to-state state "" (resolve-iri (parser-state-base-uri state) (parse-string iri))))) ((('base ('iriref iri ...)) parse-tree ...) (loop parse-tree (update-parser-state state #:base-uri (resolve-iri (parser-state-base-uri state) (parse-string iri))))) ((('sparql-base ('iriref iri ...)) parse-tree ...) (loop parse-tree (update-parser-state state #:base-uri (resolve-iri (parser-state-base-uri state) (parse-string iri))))) ((('triples t ...) parse-tree ...) (let ((res (parse-triples t state))) (loop parse-tree (parse-triples t state)))) ;; otherwise, it's a single element, not a list of statements (((? symbol? _) _ ...) (loop (list parse-tree) state))))) (define (tordf parse-tree base) (define state (make-parser-state base '() '() #f #f #f (create-generate-blank-node) '())) (parse-turtle-doc parse-tree state)) (define (turtle->rdf str-or-file base) (define str (cond ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all)) ((string? str-or-file) str-or-file))) (let ((parse-tree (parse-turtle str))) (tordf parse-tree base)))