tordf.scm
1 | ;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu> |
2 | ;;;; |
3 | ;;;; This library is free software; you can redistribute it and/or |
4 | ;;;; modify it under the terms of the GNU Lesser General Public |
5 | ;;;; License as published by the Free Software Foundation; either |
6 | ;;;; version 3 of the License, or (at your option) any later version. |
7 | ;;;; |
8 | ;;;; This library is distributed in the hope that it will be useful, |
9 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | ;;;; Lesser General Public License for more details. |
12 | ;;;; |
13 | ;;;; You should have received a copy of the GNU Lesser General Public |
14 | ;;;; License along with this library; if not, write to the Free Software |
15 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
16 | ;;;; |
17 | |
18 | (define-module (nquads tordf) |
19 | #:use-module (ice-9 match) |
20 | #:use-module (ice-9 textual-ports) |
21 | #:use-module (iri iri) |
22 | #:use-module (nquads parser) |
23 | #:use-module (srfi srfi-9) |
24 | #:use-module (rdf rdf) |
25 | #:use-module (rdf utils) |
26 | #:use-module (web uri) |
27 | #:export (nquads->rdf)) |
28 | |
29 | (define-record-type parser-state |
30 | (make-parser-state bnode-labels blank-node-gen result) |
31 | parser-state? |
32 | (bnode-labels parser-state-bnode-labels) |
33 | (blank-node-gen parser-state-blank-node-gen) |
34 | (result parser-state-result)) |
35 | |
36 | (define* (update-parser-state |
37 | state #:key (bnode-labels (parser-state-bnode-labels state)) |
38 | (blank-node-gen (parser-state-blank-node-gen state)) |
39 | (result (parser-state-result state))) |
40 | (make-parser-state bnode-labels blank-node-gen result)) |
41 | |
42 | (define (create-generate-blank-node) |
43 | (define num 0) |
44 | (lambda () |
45 | (set! num (+ num 1)) |
46 | num)) |
47 | |
48 | (define* (parse-string str #:optional for-iri?) |
49 | (match str |
50 | ((? string? str) str) |
51 | ((component str ...) |
52 | (match component |
53 | ((? string? str1) |
54 | (string-append str1 (parse-string str))) |
55 | (('uchar n) |
56 | (string-append (string (integer->char (string->number n 16))) |
57 | (parse-string str))) |
58 | (('echar e) |
59 | (string-append |
60 | (match e |
61 | ("\\t" "\t") |
62 | ("\\b" "\b") |
63 | ("\\n" "\n") |
64 | ("\\r" "\r") |
65 | ("\\f" "\f") |
66 | ("\\\\" "\\") |
67 | ("\\\"" "\"") |
68 | ("\\'" "'")) |
69 | (parse-string str))))) |
70 | (() ""))) |
71 | |
72 | (define (valid-iri? iri) |
73 | (and (not (string-any (ucs-range->char-set 0 33) iri)) |
74 | (not (string-any #\< iri)) |
75 | (not (string-any #\> iri)) |
76 | (string->uri iri))) |
77 | |
78 | (define (parse-iri iri state) |
79 | (match iri |
80 | ('iriref "" |
81 | (throw 'invalid-iri "")) |
82 | (('iriref iri) |
83 | (if (valid-iri? iri) iri (throw 'invalid-iri iri))) |
84 | (('iriref iri ...) |
85 | (let ((iri (parse-string iri))) |
86 | (if (valid-iri? iri) iri (throw 'invalid-iri iri)))))) |
87 | |
88 | (define (parse-object object state) |
89 | (pk 'object object) |
90 | (match object |
91 | (('literal ('string-literal-quote ("^^" iri))) |
92 | (let* ((iri (parse-iri iri state)) |
93 | (object (make-rdf-literal "" iri #f))) |
94 | (update-parser-state state |
95 | #:result object))) |
96 | (('literal ('string-literal-quote ('langtag lang))) |
97 | (let ((object |
98 | (make-rdf-literal |
99 | "" |
100 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" |
101 | lang))) |
102 | (update-parser-state state |
103 | #:result object))) |
104 | (('literal ('string-literal-quote str ...)) |
105 | (let ((object |
106 | (make-rdf-literal (parse-string str) |
107 | "http://www.w3.org/2001/XMLSchema#string" #f))) |
108 | (update-parser-state state |
109 | #:result object))) |
110 | (('literal ('string-literal-quote str ...) ("^^" iri)) |
111 | (let* ((iri (parse-iri iri state)) |
112 | (object (make-rdf-literal (parse-string str) iri #f))) |
113 | (update-parser-state state |
114 | #:result object))) |
115 | (('literal ('string-literal-quote str ...) ('langtag lang)) |
116 | (let ((object |
117 | (make-rdf-literal |
118 | (parse-string str) |
119 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" |
120 | lang))) |
121 | (update-parser-state state |
122 | #:result object))) |
123 | (('literal) |
124 | (let ((object |
125 | (make-rdf-literal |
126 | "" |
127 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#string" |
128 | #f))) |
129 | (update-parser-state state |
130 | #:result object))) |
131 | (('blank-node-label label) |
132 | (let* ((node |
133 | (or (assoc-ref (parser-state-bnode-labels state) label) |
134 | ((parser-state-blank-node-gen state)))) |
135 | (state |
136 | (if (assoc-ref (parser-state-bnode-labels state) label) |
137 | state |
138 | (update-parser-state state |
139 | #:bnode-labels |
140 | (cons |
141 | (cons label node) |
142 | (parser-state-bnode-labels state)))))) |
143 | (update-parser-state state |
144 | #:bnode-labels |
145 | (if (assoc-ref (parser-state-bnode-labels state) label) |
146 | (parser-state-bnode-labels state) |
147 | (cons (cons label node) |
148 | (parser-state-bnode-labels state))) |
149 | #:result node))) |
150 | (('iriref _ ...) |
151 | (update-parser-state state |
152 | #:result (parse-iri object state))))) |
153 | |
154 | (define (parse-statement statement state) |
155 | (match statement |
156 | ((subject predicate object) |
157 | (let* ((state (parse-object subject state)) |
158 | (subject (parser-state-result state)) |
159 | (state (parse-object predicate state)) |
160 | (predicate (parser-state-result state)) |
161 | (state (parse-object object state)) |
162 | (object (parser-state-result state))) |
163 | (update-parser-state state |
164 | #:result (cons "@default" (make-rdf-triple subject predicate object))))) |
165 | ((subject predicate object graph-name) |
166 | (let* ((state (parse-object subject state)) |
167 | (subject (parser-state-result state)) |
168 | (state (parse-object predicate state)) |
169 | (predicate (parser-state-result state)) |
170 | (state (parse-object object state)) |
171 | (object (parser-state-result state)) |
172 | (state (parse-object graph-name state)) |
173 | (graph-name (parser-state-result state))) |
174 | (update-parser-state state |
175 | #:result (cons graph-name (make-rdf-triple subject predicate object))))))) |
176 | |
177 | (define (parse-nquads-doc parse-tree state) |
178 | (let loop ((parse-tree parse-tree) (state state) (default-graph '()) |
179 | (named-graphs '())) |
180 | (pk parse-tree) |
181 | (match parse-tree |
182 | ('() (make-rdf-dataset default-graph named-graphs)) |
183 | ((('statement statement ...) parse-tree ...) |
184 | (let* ((state (parse-statement statement state)) |
185 | (res (parser-state-result state)) |
186 | (graph-name (car res)) |
187 | (triple (cdr res))) |
188 | (if (equal? graph-name "@default") |
189 | (loop |
190 | parse-tree |
191 | state |
192 | (cons triple default-graph) |
193 | named-graphs) |
194 | (loop |
195 | parse-tree |
196 | state |
197 | default-graph |
198 | (alist-set |
199 | named-graphs graph-name |
200 | (cons triple (assoc-ref named-graphs graph-name))))))) |
201 | (('statement _ ...) |
202 | (loop (list parse-tree) state default-graph named-graphs)) |
203 | (((parse-tree ...)) |
204 | (loop parse-tree state default-graph named-graphs))))) |
205 | |
206 | (define (nquads->rdf str-or-file) |
207 | (define str |
208 | (cond |
209 | ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all)) |
210 | ((string? str-or-file) str-or-file))) |
211 | |
212 | (let ((parse-tree (parse-nquads str))) |
213 | (parse-nquads-doc |
214 | parse-tree (make-parser-state '() (create-generate-blank-node) #f)))) |
215 |