tordf.scm
1 | ;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu> |
2 | ;;;; |
3 | ;;;; This library is free software; you can redistribute it and/or |
4 | ;;;; modify it under the terms of the GNU Lesser General Public |
5 | ;;;; License as published by the Free Software Foundation; either |
6 | ;;;; version 3 of the License, or (at your option) any later version. |
7 | ;;;; |
8 | ;;;; This library is distributed in the hope that it will be useful, |
9 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | ;;;; Lesser General Public License for more details. |
12 | ;;;; |
13 | ;;;; You should have received a copy of the GNU Lesser General Public |
14 | ;;;; License along with this library; if not, write to the Free Software |
15 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
16 | ;;;; |
17 | |
18 | (define-module (nquads tordf) |
19 | #:use-module (ice-9 match) |
20 | #:use-module (ice-9 textual-ports) |
21 | #:use-module (iri iri) |
22 | #:use-module (nquads parser) |
23 | #:use-module (srfi srfi-9) |
24 | #:use-module (rdf rdf) |
25 | #:use-module (rdf utils) |
26 | #:use-module (web uri) |
27 | #:export (nquads->rdf)) |
28 | |
29 | (define-record-type parser-state |
30 | (make-parser-state bnode-labels blank-node-gen result) |
31 | parser-state? |
32 | (bnode-labels parser-state-bnode-labels) |
33 | (blank-node-gen parser-state-blank-node-gen) |
34 | (result parser-state-result)) |
35 | |
36 | (define* (update-parser-state |
37 | state #:key (bnode-labels (parser-state-bnode-labels state)) |
38 | (blank-node-gen (parser-state-blank-node-gen state)) |
39 | (result (parser-state-result state))) |
40 | (make-parser-state bnode-labels blank-node-gen result)) |
41 | |
42 | (define (create-generate-blank-node) |
43 | (define num 0) |
44 | (lambda () |
45 | (set! num (+ num 1)) |
46 | num)) |
47 | |
48 | (define* (parse-string str #:optional for-iri?) |
49 | (match str |
50 | ((? string? str) str) |
51 | ((component str ...) |
52 | (match component |
53 | ((? string? str1) |
54 | (string-append str1 (parse-string str))) |
55 | (('uchar n) |
56 | (string-append (string (integer->char (string->number n 16))) |
57 | (parse-string str))) |
58 | (('echar e) |
59 | (string-append |
60 | (match e |
61 | ("\\t" "\t") |
62 | ("\\b" "\b") |
63 | ("\\n" "\n") |
64 | ("\\r" "\r") |
65 | ("\\f" "\f") |
66 | ("\\\\" "\\") |
67 | ("\\\"" "\"") |
68 | ("\\'" "'")) |
69 | (parse-string str))))) |
70 | (() ""))) |
71 | |
72 | (define (valid-iri? iri) |
73 | (and (not (string-any (ucs-range->char-set 0 33) iri)) |
74 | (not (string-any #\< iri)) |
75 | (not (string-any #\> iri)) |
76 | (string->uri iri))) |
77 | |
78 | (define (parse-iri iri state) |
79 | (match iri |
80 | ('iriref "" |
81 | (throw 'invalid-iri "")) |
82 | (('iriref iri) |
83 | (if (valid-iri? iri) iri (throw 'invalid-iri iri))) |
84 | (('iriref iri ...) |
85 | (let ((iri (parse-string iri))) |
86 | (if (valid-iri? iri) iri (throw 'invalid-iri iri)))))) |
87 | |
88 | (define (parse-object object state) |
89 | (match object |
90 | (('literal ('string-literal-quote ("^^" iri))) |
91 | (let* ((iri (parse-iri iri state)) |
92 | (object (make-rdf-literal "" iri #f))) |
93 | (update-parser-state state |
94 | #:result object))) |
95 | (('literal ('string-literal-quote ('langtag lang))) |
96 | (let ((object |
97 | (make-rdf-literal |
98 | "" |
99 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" |
100 | lang))) |
101 | (update-parser-state state |
102 | #:result object))) |
103 | (('literal ('string-literal-quote str ...)) |
104 | (let ((object |
105 | (make-rdf-literal (parse-string str) |
106 | "http://www.w3.org/2001/XMLSchema#string" #f))) |
107 | (update-parser-state state |
108 | #:result object))) |
109 | (('literal ('string-literal-quote str ...) ("^^" iri)) |
110 | (let* ((iri (parse-iri iri state)) |
111 | (object (make-rdf-literal (parse-string str) iri #f))) |
112 | (update-parser-state state |
113 | #:result object))) |
114 | (('literal ('string-literal-quote str ...) ('langtag lang)) |
115 | (let ((object |
116 | (make-rdf-literal |
117 | (parse-string str) |
118 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" |
119 | lang))) |
120 | (update-parser-state state |
121 | #:result object))) |
122 | (('literal) |
123 | (let ((object |
124 | (make-rdf-literal |
125 | "" |
126 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#string" |
127 | #f))) |
128 | (update-parser-state state |
129 | #:result object))) |
130 | (('blank-node-label label) |
131 | (let* ((node |
132 | (or (assoc-ref (parser-state-bnode-labels state) label) |
133 | ((parser-state-blank-node-gen state)))) |
134 | (state |
135 | (if (assoc-ref (parser-state-bnode-labels state) label) |
136 | state |
137 | (update-parser-state state |
138 | #:bnode-labels |
139 | (cons |
140 | (cons label node) |
141 | (parser-state-bnode-labels state)))))) |
142 | (update-parser-state state |
143 | #:bnode-labels |
144 | (if (assoc-ref (parser-state-bnode-labels state) label) |
145 | (parser-state-bnode-labels state) |
146 | (cons (cons label node) |
147 | (parser-state-bnode-labels state))) |
148 | #:result node))) |
149 | (('iriref _ ...) |
150 | (update-parser-state state |
151 | #:result (parse-iri object state))))) |
152 | |
153 | (define (parse-statement statement state) |
154 | (match statement |
155 | ((subject predicate object) |
156 | (let* ((state (parse-object subject state)) |
157 | (subject (parser-state-result state)) |
158 | (state (parse-object predicate state)) |
159 | (predicate (parser-state-result state)) |
160 | (state (parse-object object state)) |
161 | (object (parser-state-result state))) |
162 | (update-parser-state state |
163 | #:result (cons "@default" (make-rdf-triple subject predicate object))))) |
164 | ((subject predicate object graph-name) |
165 | (let* ((state (parse-object subject state)) |
166 | (subject (parser-state-result state)) |
167 | (state (parse-object predicate state)) |
168 | (predicate (parser-state-result state)) |
169 | (state (parse-object object state)) |
170 | (object (parser-state-result state)) |
171 | (state (parse-object graph-name state)) |
172 | (graph-name (parser-state-result state))) |
173 | (update-parser-state state |
174 | #:result (cons graph-name (make-rdf-triple subject predicate object))))))) |
175 | |
176 | (define (parse-nquads-doc parse-tree state) |
177 | (let loop ((parse-tree parse-tree) (state state) (default-graph '()) |
178 | (named-graphs '())) |
179 | (match parse-tree |
180 | ('() (make-rdf-dataset default-graph named-graphs)) |
181 | ((('statement statement ...) parse-tree ...) |
182 | (let* ((state (parse-statement statement state)) |
183 | (res (parser-state-result state)) |
184 | (graph-name (car res)) |
185 | (triple (cdr res))) |
186 | (if (equal? graph-name "@default") |
187 | (loop |
188 | parse-tree |
189 | state |
190 | (cons triple default-graph) |
191 | named-graphs) |
192 | (loop |
193 | parse-tree |
194 | state |
195 | default-graph |
196 | (alist-set |
197 | named-graphs graph-name |
198 | (cons triple (assoc-ref named-graphs graph-name))))))) |
199 | (('statement _ ...) |
200 | (loop (list parse-tree) state default-graph named-graphs)) |
201 | (((parse-tree ...)) |
202 | (loop parse-tree state default-graph named-graphs))))) |
203 | |
204 | (define (nquads->rdf str-or-file) |
205 | (define str |
206 | (cond |
207 | ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all)) |
208 | ((string? str-or-file) str-or-file))) |
209 | |
210 | (let ((parse-tree (parse-nquads str))) |
211 | (parse-nquads-doc |
212 | parse-tree (make-parser-state '() (create-generate-blank-node) #f)))) |
213 |