;;;; Copyright (C) 2020 Julien Lepiller ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public ;;;; License as published by the Free Software Foundation; either ;;;; version 3 of the License, or (at your option) any later version. ;;;; ;;;; This library is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;;; Lesser General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU Lesser General Public ;;;; License along with this library; if not, write to the Free Software ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;;;; (define-module (nquads tordf) #:use-module (ice-9 match) #:use-module (ice-9 textual-ports) #:use-module (iri iri) #:use-module (nquads parser) #:use-module (srfi srfi-9) #:use-module (rdf rdf) #:use-module (rdf utils) #:use-module (web uri) #:export (nquads->rdf)) (define-record-type parser-state (make-parser-state bnode-labels blank-node-gen result) parser-state? (bnode-labels parser-state-bnode-labels) (blank-node-gen parser-state-blank-node-gen) (result parser-state-result)) (define* (update-parser-state state #:key (bnode-labels (parser-state-bnode-labels state)) (blank-node-gen (parser-state-blank-node-gen state)) (result (parser-state-result state))) (make-parser-state bnode-labels blank-node-gen result)) (define (create-generate-blank-node) (define num 0) (lambda () (set! num (+ num 1)) num)) (define* (parse-string str #:optional for-iri?) (match str ((? string? str) str) ((component str ...) (match component ((? string? str1) (string-append str1 (parse-string str))) (('uchar n) (string-append (string (integer->char (string->number n 16))) (parse-string str))) (('echar e) (string-append (match e ("\\t" "\t") ("\\b" "\b") ("\\n" "\n") ("\\r" "\r") ("\\f" "\f") ("\\\\" "\\") ("\\\"" "\"") ("\\'" "'")) (parse-string str))))) (() ""))) (define (valid-iri? iri) (and (not (string-any (ucs-range->char-set 0 33) iri)) (not (string-any #\< iri)) (not (string-any #\> iri)) (string->uri iri))) (define (parse-iri iri state) (match iri ('iriref "" (throw 'invalid-iri "")) (('iriref iri) (if (valid-iri? iri) iri (throw 'invalid-iri iri))) (('iriref iri ...) (let ((iri (parse-string iri))) (if (valid-iri? iri) iri (throw 'invalid-iri iri)))))) (define (parse-object object state) (match object (('literal ('string-literal-quote ("^^" iri))) (let* ((iri (parse-iri iri state)) (object (make-rdf-literal "" iri #f))) (update-parser-state state #:result object))) (('literal ('string-literal-quote ('langtag lang))) (let ((object (make-rdf-literal "" "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" lang))) (update-parser-state state #:result object))) (('literal ('string-literal-quote str ...)) (let ((object (make-rdf-literal (parse-string str) "http://www.w3.org/2001/XMLSchema#string" #f))) (update-parser-state state #:result object))) (('literal ('string-literal-quote str ...) ("^^" iri)) (let* ((iri (parse-iri iri state)) (object (make-rdf-literal (parse-string str) iri #f))) (update-parser-state state #:result object))) (('literal ('string-literal-quote str ...) ('langtag lang)) (let ((object (make-rdf-literal (parse-string str) "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" lang))) (update-parser-state state #:result object))) (('literal) (let ((object (make-rdf-literal "" "http://www.w3.org/1999/02/22-rdf-syntax-ns#string" #f))) (update-parser-state state #:result object))) (('blank-node-label label) (let* ((node (or (assoc-ref (parser-state-bnode-labels state) label) ((parser-state-blank-node-gen state)))) (state (if (assoc-ref (parser-state-bnode-labels state) label) state (update-parser-state state #:bnode-labels (cons (cons label node) (parser-state-bnode-labels state)))))) (update-parser-state state #:bnode-labels (if (assoc-ref (parser-state-bnode-labels state) label) (parser-state-bnode-labels state) (cons (cons label node) (parser-state-bnode-labels state))) #:result node))) (('iriref _ ...) (update-parser-state state #:result (parse-iri object state))))) (define (parse-statement statement state) (match statement ((subject predicate object) (let* ((state (parse-object subject state)) (subject (parser-state-result state)) (state (parse-object predicate state)) (predicate (parser-state-result state)) (state (parse-object object state)) (object (parser-state-result state))) (update-parser-state state #:result (cons "@default" (make-rdf-triple subject predicate object))))) ((subject predicate object graph-name) (let* ((state (parse-object subject state)) (subject (parser-state-result state)) (state (parse-object predicate state)) (predicate (parser-state-result state)) (state (parse-object object state)) (object (parser-state-result state)) (state (parse-object graph-name state)) (graph-name (parser-state-result state))) (update-parser-state state #:result (cons graph-name (make-rdf-triple subject predicate object))))))) (define (parse-nquads-doc parse-tree state) (let loop ((parse-tree parse-tree) (state state) (default-graph '()) (named-graphs '())) (match parse-tree ('() (make-rdf-dataset default-graph named-graphs)) ((('statement statement ...) parse-tree ...) (let* ((state (parse-statement statement state)) (res (parser-state-result state)) (graph-name (car res)) (triple (cdr res))) (if (equal? graph-name "@default") (loop parse-tree state (cons triple default-graph) named-graphs) (loop parse-tree state default-graph (alist-set named-graphs graph-name (cons triple (or (assoc-ref named-graphs graph-name)'()))))))) (('statement _ ...) (loop (list parse-tree) state default-graph named-graphs)) (((parse-tree ...)) (loop parse-tree state default-graph named-graphs))))) (define (nquads->rdf str-or-file) (define str (cond ((port? str-or-file) (get-string-all str-or-file)) ((string? str-or-file) str-or-file))) (let ((parse-tree (parse-nquads str))) (parse-nquads-doc parse-tree (make-parser-state '() (create-generate-blank-node) #f))))