tordf.scm
| 1 | ;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu> |
| 2 | ;;;; |
| 3 | ;;;; This library is free software; you can redistribute it and/or |
| 4 | ;;;; modify it under the terms of the GNU Lesser General Public |
| 5 | ;;;; License as published by the Free Software Foundation; either |
| 6 | ;;;; version 3 of the License, or (at your option) any later version. |
| 7 | ;;;; |
| 8 | ;;;; This library is distributed in the hope that it will be useful, |
| 9 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 11 | ;;;; Lesser General Public License for more details. |
| 12 | ;;;; |
| 13 | ;;;; You should have received a copy of the GNU Lesser General Public |
| 14 | ;;;; License along with this library; if not, write to the Free Software |
| 15 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 16 | ;;;; |
| 17 | |
| 18 | (define-module (turtle tordf) |
| 19 | #:use-module (ice-9 match) |
| 20 | #:use-module (ice-9 textual-ports) |
| 21 | #:use-module (iri iri) |
| 22 | #:use-module (turtle parser) |
| 23 | #:use-module (srfi srfi-9) |
| 24 | #:use-module (rdf rdf) |
| 25 | #:use-module ((rdf xsd) #:prefix xsd:) |
| 26 | #:export (turtle->rdf)) |
| 27 | |
| 28 | (define-record-type parser-state |
| 29 | (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate |
| 30 | blank-node-gen result) |
| 31 | parser-state? |
| 32 | (base-uri parser-state-base-uri) |
| 33 | (namespaces parser-state-namespaces) |
| 34 | (bnode-labels parser-state-bnode-labels) |
| 35 | (cur-subject parser-state-cur-subject) |
| 36 | (cur-predicate parser-state-cur-predicate) |
| 37 | (blank-node-gen parser-state-blank-node-gen) |
| 38 | (result parser-state-result)) |
| 39 | |
| 40 | (define* (update-parser-state |
| 41 | state #:key (base-uri (parser-state-base-uri state)) |
| 42 | (namespaces (parser-state-namespaces state)) |
| 43 | (bnode-labels (parser-state-bnode-labels state)) |
| 44 | (cur-subject (parser-state-cur-subject state)) |
| 45 | (cur-predicate (parser-state-cur-predicate state)) |
| 46 | (blank-node-gen (parser-state-blank-node-gen state)) |
| 47 | (result (parser-state-result state))) |
| 48 | (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate |
| 49 | blank-node-gen result)) |
| 50 | |
| 51 | (define (create-generate-blank-node) |
| 52 | (define num 0) |
| 53 | (lambda () |
| 54 | (set! num (+ num 1)) |
| 55 | num)) |
| 56 | |
| 57 | (define (add-ns-to-state state ns iri) |
| 58 | (pk 'iri iri) |
| 59 | (update-parser-state state |
| 60 | #:namespaces (cons (cons ns iri) (parser-state-namespaces state)))) |
| 61 | |
| 62 | |
| 63 | (define (parse-iri iri state) |
| 64 | (format #t "iri: ~a~%" iri) |
| 65 | (match iri |
| 66 | (('iri ('prefixed-name ('pname-ln ('pname-ns ns) suffix))) |
| 67 | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns) |
| 68 | suffix)) |
| 69 | ("state" . ,state))) |
| 70 | (('iri ('prefixed-name ('pname-ln ('pname-ns suffix)))) |
| 71 | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "") |
| 72 | suffix)) |
| 73 | ("state" . ,state))) |
| 74 | (('iri 'iriref) |
| 75 | `(("iri" . ,(resolve-iri (parser-state-base-uri state) "")) |
| 76 | ("state" . ,state))) |
| 77 | (('iri ('iriref iri)) |
| 78 | `(("iri" . ,(resolve-iri (parser-state-base-uri state) iri)) |
| 79 | ("state" . ,state))) |
| 80 | (('blank-node ('blank-node-label label)) |
| 81 | (if (assoc-ref (parser-state-bnode-labels state) label) |
| 82 | `(("iri" . ,(assoc-ref (parser-state-bnode-labels state) label)) |
| 83 | ("state" . ,state)) |
| 84 | (let ((node ((parser-state-blank-node-gen state)))) |
| 85 | `(("iri" . ,node) |
| 86 | ("state" . ,(update-parser-state state |
| 87 | #:bnode-labels |
| 88 | (cons |
| 89 | (cons label node) |
| 90 | (parser-state-bnode-labels state)))))))))) |
| 91 | |
| 92 | (define (parse-verb verb state) |
| 93 | (match verb |
| 94 | ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") |
| 95 | ("state" . ,state))) |
| 96 | (('predicate iri) |
| 97 | (let ((res (parse-iri iri state))) |
| 98 | `(("verb" . ,(assoc-ref res "iri")) |
| 99 | ("state" . ,(assoc-ref res "state"))))))) |
| 100 | |
| 101 | (define (parse-object object state) |
| 102 | (pk 'object object) |
| 103 | (match object |
| 104 | (('rdf-literal ('string-pat ('string-literal-quote str))) |
| 105 | (update-parser-state state |
| 106 | #:result |
| 107 | (cons |
| 108 | (make-rdf-triple |
| 109 | (parser-state-cur-subject state) |
| 110 | (parser-state-cur-predicate state) |
| 111 | (make-rdf-literal str xsd:string #f)) |
| 112 | (parser-state-result state)))) |
| 113 | (('blank-node-property-list ('predicate-object-list po ...)) |
| 114 | (let* ((node ((parser-state-blank-node-gen state))) |
| 115 | (new-state (parse-predicate-object |
| 116 | po (update-parser-state state #:cur-subject node)))) |
| 117 | (update-parser-state new-state |
| 118 | #:cur-subject (parser-state-cur-subject state) |
| 119 | #:cur-predicate (parser-state-cur-predicate state) |
| 120 | #:result |
| 121 | (cons |
| 122 | (make-rdf-triple |
| 123 | (parser-state-cur-subject state) |
| 124 | (parser-state-cur-predicate state) |
| 125 | node) |
| 126 | (parser-state-result new-state))))) |
| 127 | (('collection objects ...) |
| 128 | (let loop ((objects objects) (state state)) |
| 129 | (match objects |
| 130 | ('() state) |
| 131 | ((('object object) objects ...) |
| 132 | (loop objects (parse-object object state)))))) |
| 133 | (('iri _) |
| 134 | (let* ((res (parse-iri object state)) |
| 135 | (iri (assoc-ref res "iri")) |
| 136 | (state (assoc-ref res "state"))) |
| 137 | (update-parser-state state |
| 138 | #:result |
| 139 | (cons |
| 140 | (make-rdf-triple |
| 141 | (parser-state-cur-subject state) |
| 142 | (parser-state-cur-predicate state) |
| 143 | iri) |
| 144 | (parser-state-result state))))))) |
| 145 | |
| 146 | (define (parse-object-list ol state) |
| 147 | (let loop ((ol ol) (state state)) |
| 148 | (pk 'ol ol) |
| 149 | (match ol |
| 150 | ('() state) |
| 151 | ((('object object) ol ...) |
| 152 | (loop ol (parse-object object state))) |
| 153 | ((ol) |
| 154 | (loop ol state))))) |
| 155 | |
| 156 | (define (parse-predicate-object po state) |
| 157 | (let loop ((po po) (state state)) |
| 158 | (pk 'po po) |
| 159 | (match po |
| 160 | ((('verb verb) ('object-list ol ...) po) |
| 161 | (let* ((verb (parse-verb verb state)) |
| 162 | (state (assoc-ref verb "state")) |
| 163 | (verb (assoc-ref verb "verb")) |
| 164 | (new-state (update-parser-state state #:cur-predicate verb)) |
| 165 | (res (parse-object-list ol new-state))) |
| 166 | (loop po res))) |
| 167 | ((('verb verb) ('object-list ol ...)) |
| 168 | (let* ((verb (parse-verb verb state)) |
| 169 | (state (assoc-ref verb "state")) |
| 170 | (verb (assoc-ref verb "verb")) |
| 171 | (new-state (update-parser-state state #:cur-predicate verb)) |
| 172 | (res (parse-object-list ol new-state))) |
| 173 | res)) |
| 174 | (((('verb verb) ('object-list ol ...)) po ...) |
| 175 | (let* ((verb (parse-verb verb state)) |
| 176 | (state (assoc-ref verb "state")) |
| 177 | (verb (assoc-ref verb "verb")) |
| 178 | (new-state (update-parser-state state #:cur-predicate verb)) |
| 179 | (res (parse-object-list ol new-state))) |
| 180 | (loop po res))) |
| 181 | ('() state) |
| 182 | ((po) |
| 183 | (loop po state))))) |
| 184 | |
| 185 | (define (parse-triples t state) |
| 186 | (match t |
| 187 | ((('subject iri) ('predicate-object-list predicate-object ...)) |
| 188 | (let* ((res (parse-iri iri state)) |
| 189 | (iri (assoc-ref res "iri")) |
| 190 | (state (assoc-ref res "state")) |
| 191 | (state (update-parser-state state |
| 192 | #:cur-subject iri))) |
| 193 | (parse-predicate-object predicate-object state))))) |
| 194 | |
| 195 | (define (parse-turtle-doc parse-tree state) |
| 196 | (let loop ((parse-tree parse-tree) (state state)) |
| 197 | (match parse-tree |
| 198 | ('() (parser-state-result state)) |
| 199 | ((('prefix-id ('pname-ns ns) ('iriref iri)) parse-tree ...) |
| 200 | (loop parse-tree |
| 201 | (add-ns-to-state |
| 202 | state ns (resolve-iri (parser-state-base-uri state) iri)))) |
| 203 | ((('prefix-id ('pname-ns ('iriref iri))) parse-tree ...) |
| 204 | (loop parse-tree |
| 205 | (add-ns-to-state |
| 206 | state "" (resolve-iri (parser-state-base-uri state) iri)))) |
| 207 | ((('sparql-prefix ('pname-ns ns) ('iriref iri)) parse-tree ...) |
| 208 | (loop parse-tree |
| 209 | (add-ns-to-state |
| 210 | state ns (resolve-iri (parser-state-base-uri state) iri)))) |
| 211 | ((('sparql-prefix ('pname-ns ('iriref iri))) parse-tree ...) |
| 212 | (loop parse-tree |
| 213 | (add-ns-to-state |
| 214 | state "" (resolve-iri (parser-state-base-uri state) iri)))) |
| 215 | ((('base ('iriref iri)) parse-tree ...) |
| 216 | (loop parse-tree |
| 217 | (update-parser-state |
| 218 | state #:base-uri (resolve-iri (parser-state-base-uri state) iri)))) |
| 219 | ((('sparql-base ('iriref iri)) parse-tree ...) |
| 220 | (loop parse-tree |
| 221 | (update-parser-state |
| 222 | state #:base-uri (resolve-iri iri (parser-state-base-uri state))))) |
| 223 | ((('triples t ...) parse-tree ...) |
| 224 | (format #t "triples: ~a~%" t) |
| 225 | (let ((res (parse-triples t state))) |
| 226 | (loop parse-tree (parse-triples t state)))) |
| 227 | ;; otherwise, it's a single element, not a list of statements |
| 228 | (((? symbol? _) _ ...) (loop (list parse-tree) state))))) |
| 229 | |
| 230 | (define (tordf parse-tree base) |
| 231 | (define state |
| 232 | (make-parser-state base '() '() #f #f (create-generate-blank-node) '())) |
| 233 | (parse-turtle-doc parse-tree state)) |
| 234 | |
| 235 | (define (turtle->rdf str-or-file base) |
| 236 | (define str |
| 237 | (cond |
| 238 | ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all)) |
| 239 | ((string? str-or-file) str-or-file))) |
| 240 | |
| 241 | (let ((parse-tree (parse-turtle str))) |
| 242 | (tordf parse-tree base))) |
| 243 |