Add initial turtle parser
Makefile.am
| 3 | 3 | SOURCES= \ | |
| 4 | 4 | rdf/rdf.scm \ | |
| 5 | 5 | rdf/xsd.scm \ | |
| 6 | + | turtle/parser.scm \ | |
| 7 | + | turtle/tordf.scm \ | |
| 6 | 8 | iri/iri.scm \ | |
| 7 | 9 | ||
| 8 | 10 | TEST_EXTENSIONS = .scm |
turtle/parser.scm unknown status 1
| 1 | + | ;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu> | |
| 2 | + | ;;;; | |
| 3 | + | ;;;; This library is free software; you can redistribute it and/or | |
| 4 | + | ;;;; modify it under the terms of the GNU Lesser General Public | |
| 5 | + | ;;;; License as published by the Free Software Foundation; either | |
| 6 | + | ;;;; version 3 of the License, or (at your option) any later version. | |
| 7 | + | ;;;; | |
| 8 | + | ;;;; This library is distributed in the hope that it will be useful, | |
| 9 | + | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 10 | + | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 11 | + | ;;;; Lesser General Public License for more details. | |
| 12 | + | ;;;; | |
| 13 | + | ;;;; You should have received a copy of the GNU Lesser General Public | |
| 14 | + | ;;;; License along with this library; if not, write to the Free Software | |
| 15 | + | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
| 16 | + | ;;;; | |
| 17 | + | ||
| 18 | + | (define-module (turtle parser) | |
| 19 | + | #:use-module (ice-9 peg) | |
| 20 | + | #:export (parse-turtle)) | |
| 21 | + | ||
| 22 | + | ;; Productions for terminals | |
| 23 | + | ;; [18] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>' /* #x00=NULL #01-#x1F=control codes #x20=space */ | |
| 24 | + | (define-peg-pattern iriref all | |
| 25 | + | (and (ignore "<") | |
| 26 | + | (* (or "!" (range #\x23 #\x3b) "=" (range #\x3f #\x5b) "]" "_" | |
| 27 | + | (range #\x61 #\x7a) (range #\x7e #\x10ffff) uchar)) | |
| 28 | + | (ignore ">"))) | |
| 29 | + | ;; [139s] PNAME_NS ::= PN_PREFIX? ':' | |
| 30 | + | (define-peg-pattern pname-ns all (and (? pn-prefix) (ignore ":"))) | |
| 31 | + | ;; [140s] PNAME_LN ::= PNAME_NS PN_LOCAL | |
| 32 | + | (define-peg-pattern pname-ln all (and pname-ns pn-local)) | |
| 33 | + | ;; [141s] BLANK_NODE_LABEL ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)? | |
| 34 | + | (define-peg-pattern blank-node-label all | |
| 35 | + | (and "_:" (or pn-chars-u (range #\0 #\9)) (* (or pn-chars (and "." pn-chars))))) | |
| 36 | + | ;; [144s] LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* | |
| 37 | + | (define-peg-pattern langtag all | |
| 38 | + | (and "@" (+ (or (range #\a #\z) (range #\A #\Z))) | |
| 39 | + | (* (and "-" (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9)))))) | |
| 40 | + | ;; [19] INTEGER ::= [+-]? [0-9]+ | |
| 41 | + | (define-peg-pattern integer all (and (? (or "+" "-")) (+ (range #\0 #\9)))) | |
| 42 | + | ;; [20] DECIMAL ::= [+-]? [0-9]* '.' [0-9]+ | |
| 43 | + | (define-peg-pattern decimal all | |
| 44 | + | (and (? (or "+" "-")) (* (range #\0 #\9)) "." (+ (range #\0 #\9)))) | |
| 45 | + | ;; [21] DOUBLE ::= [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT) | |
| 46 | + | (define-peg-pattern double all | |
| 47 | + | (and (? (or "+" "-")) | |
| 48 | + | (or (and (+ (range #\0 #\9)) "." (* (range #\0 #\9)) exponent) | |
| 49 | + | (and "." (+ (range #\0 #\9)) exponent) | |
| 50 | + | (and (+ (range #\0 #\9)) exponent)))) | |
| 51 | + | ;; [154s] EXPONENT ::= [eE] [+-]? [0-9]+ | |
| 52 | + | (define-peg-pattern exponent body | |
| 53 | + | (and (or "e" "E") (? (or "+" "-")) (+ (range #\0 #\9)))) | |
| 54 | + | ;; [22] STRING_LITERAL_QUOTE ::= '"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"' /* #x22=" #x5C=\ #xA=new line #xD=carriage return */ | |
| 55 | + | (define-peg-pattern string-literal-quote all | |
| 56 | + | (and (ignore "\"") | |
| 57 | + | (* (or (range #\x00 #\x09) (range #\x0b #\x0c) (range #\x0d #\x21) | |
| 58 | + | (range #\x23 #\x5b) (range #\x5d #\x10ffff) echar uchar)) | |
| 59 | + | (ignore "\""))) | |
| 60 | + | ;; [23] STRING_LITERAL_SINGLE_QUOTE ::= "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */ | |
| 61 | + | (define-peg-pattern string-literal-single-quote all | |
| 62 | + | (and (ignore "'") | |
| 63 | + | (* (or (range #\x00 #\x09) (range #\x0b #\x0c) (range #\x0d #\x26) | |
| 64 | + | (range #\x28 #\x5b) (range #\x5d #\x10ffff) echar uchar)) | |
| 65 | + | (ignore "'"))) | |
| 66 | + | ;; [24] STRING_LITERAL_LONG_SINGLE_QUOTE ::= "'''" (("'" | "''")? ([^'\] | ECHAR | UCHAR))* "'''" | |
| 67 | + | (define-peg-pattern string-literal-long-single-quote all | |
| 68 | + | (and (ignore "'''") | |
| 69 | + | (* (and (? (or "''" "'")) | |
| 70 | + | (or (range #\x00 #\x26) (range #\x28 #\x5b) | |
| 71 | + | (range #\x5d #\x10ffff) echar uchar))) | |
| 72 | + | (ignore "'''"))) | |
| 73 | + | ;; [25] STRING_LITERAL_LONG_QUOTE ::= '"""' (('"' | '""')? ([^"\] | ECHAR | UCHAR))* '"""' | |
| 74 | + | (define-peg-pattern string-literal-long-quote all | |
| 75 | + | (and (ignore "\"\"\"") | |
| 76 | + | (* (and (? (or "\"\"" "\"")) | |
| 77 | + | (or (range #\x00 #\x21) (range #\x23 #\x5b) | |
| 78 | + | (range #\x5d #\x10ffff) echar uchar))) | |
| 79 | + | (ignore "\"\"\""))) | |
| 80 | + | ;; [26] UCHAR ::= '\u' HEX HEX HEX HEX | '\U' HEX HEX HEX HEX HEX HEX HEX HEX | |
| 81 | + | (define-peg-pattern uchar body | |
| 82 | + | (or (and "\\u" hex hex hex hex) | |
| 83 | + | (and "\\U" hex hex hex hex hex hex hex hex))) | |
| 84 | + | ;; [159s] ECHAR ::= '\' [tbnrf"'\] | |
| 85 | + | (define-peg-pattern echar body | |
| 86 | + | (or "\\t" "\\b" "\\n" "\\r" "\\f" "\\\"" "\\'" "\\\\")) | |
| 87 | + | ;; [161s] WS ::= #x20 | #x9 | #xD | #xA /* #x20=space #x9=character tabulation #xD=carriage return #xA=new line */ | |
| 88 | + | (define-peg-pattern ws body (or " " "\t" "\r" "\n")) | |
| 89 | + | ;; [162s] ANON ::= '[' WS* ']' | |
| 90 | + | (define-peg-pattern anon all (and "[" (* ws) "]")) | |
| 91 | + | ;; [163s] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] | |
| 92 | + | (define-peg-pattern pn-chars-base body | |
| 93 | + | (or (range #\A #\Z) (range #\a #\z) (range #\x00c0 #\x00d6) | |
| 94 | + | (range #\x00d8 #\x00f6) (range #\x00f8 #\x02ff) (range #\x0370 #\x037d) | |
| 95 | + | (range #\x037f #\x1fff) (range #\x200c #\x200d) (range #\x2070 #\x218f) | |
| 96 | + | (range #\x2c00 #\x2fef) (range #\x3001 #\xd7ff) (range #\xf900 #\xfdcf) | |
| 97 | + | (range #\xfdf0 #\xfffd) (range #\x10000 #\xeffff))) | |
| 98 | + | ;; [164s] PN_CHARS_U ::= PN_CHARS_BASE | '_' | |
| 99 | + | (define-peg-pattern pn-chars-u body (or pn-chars-base "_")) | |
| 100 | + | ;; [166s] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] | |
| 101 | + | (define-peg-pattern pn-chars body | |
| 102 | + | (or pn-chars-u "-" (range #\0 #\9) "??" (range #\x0300 #\x036f) | |
| 103 | + | (range #\x203f #\x2040))) | |
| 104 | + | ;; [167s] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? | |
| 105 | + | (define-peg-pattern pn-prefix body | |
| 106 | + | (and pn-chars-base (* (or pn-chars (and "." pn-chars))))) | |
| 107 | + | ;; [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? | |
| 108 | + | (define-peg-pattern pn-local body | |
| 109 | + | (and (or pn-chars-u ":" (range #\0 #\9) plx) | |
| 110 | + | (* (or pn-chars ":" "plx" (and "." (or pn-chars ":" plx)))))) | |
| 111 | + | ;; [169s] PLX ::= PERCENT | PN_LOCAL_ESC | |
| 112 | + | (define-peg-pattern plx body (or percent pn-local-esc)) | |
| 113 | + | ;; [170s] PERCENT ::= '%' HEX HEX | |
| 114 | + | (define-peg-pattern percent body (and "%" hex hex)) | |
| 115 | + | ;; [171s] HEX ::= [0-9] | [A-F] | [a-f] | |
| 116 | + | (define-peg-pattern hex body (or (range #\0 #\9) (range #\a #\f) (range #\A #\F))) | |
| 117 | + | ;; [172s] PN_LOCAL_ESC ::= '\' ('_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%') | |
| 118 | + | (define-peg-pattern pn-local-esc body | |
| 119 | + | (and "\\" (or "~" "." "-" "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=" "/" | |
| 120 | + | "?" "#" "@" "%" "_"))) | |
| 121 | + | ||
| 122 | + | (define-peg-pattern comment body (and "#" (* (or (range #\x00 #\x09) | |
| 123 | + | (range #\x0B #\x0C) | |
| 124 | + | (range #\x0E #\x10FFFF))))) | |
| 125 | + | (define-peg-pattern WS none (* (or comment ws))) | |
| 126 | + | (define-peg-pattern unrecognized body (range #\x00 #\x10ffff)) | |
| 127 | + | ||
| 128 | + | ||
| 129 | + | ;; [1] turtleDoc ::= statement* | |
| 130 | + | (define-peg-pattern turtle-doc body (and WS (* (and statement WS)))) | |
| 131 | + | ;; [2] statement ::= directive | triples '.' | |
| 132 | + | (define-peg-pattern statement body | |
| 133 | + | (or directive (and triples WS (ignore ".")) (* unrecognized))) | |
| 134 | + | ;; [3] directive ::= prefixID | base | sparqlPrefix | sparqlBase | |
| 135 | + | (define-peg-pattern directive body (or prefix-id base sparql-prefix sparql-base)) | |
| 136 | + | ;; [4] prefixID ::= '@prefix' PNAME_NS IRIREF '.' | |
| 137 | + | (define-peg-pattern prefix-id all | |
| 138 | + | (and (ignore "@prefix") WS pname-ns WS iriref WS (ignore "."))) | |
| 139 | + | ;; [5] base ::= '@base' IRIREF '.' | |
| 140 | + | (define-peg-pattern base all | |
| 141 | + | (and (ignore "@base") WS iriref WS (ignore "."))) | |
| 142 | + | ;; [5s] sparqlBase ::= "BASE" IRIREF | |
| 143 | + | (define-peg-pattern sparql-base all | |
| 144 | + | (and (ignore (and (or "b" "B") (or "a" "A") (or "s" "S") (or "e" "E"))) | |
| 145 | + | WS iriref)) | |
| 146 | + | ;; [6s] sparqlPrefix ::= "PREFIX" PNAME_NS IRIREF | |
| 147 | + | (define-peg-pattern sparql-prefix all | |
| 148 | + | (and (ignore (and (or "p" "P") (or "r" "R") (or "e" "E") (or "f" "F") | |
| 149 | + | (or "i" "I") (or "x" "X"))) | |
| 150 | + | WS pname-ns WS iriref)) | |
| 151 | + | ;; [6] triples ::= subject predicateObjectList | blankNodePropertyList predicateObjectList? | |
| 152 | + | (define-peg-pattern triples all | |
| 153 | + | (or (and subject WS predicate-object-list) | |
| 154 | + | (and blank-node-property-list WS (? predicate-object-list)))) | |
| 155 | + | ;; [7] predicateObjectList ::= verb objectList (';' (verb objectList)?)* | |
| 156 | + | (define-peg-pattern predicate-object-list all | |
| 157 | + | (and verb WS object-list | |
| 158 | + | (* (and WS (ignore ";") WS (? (and verb WS object-list)))))) | |
| 159 | + | ;; [8] objectList ::= object (',' object)* | |
| 160 | + | (define-peg-pattern object-list all | |
| 161 | + | (and object (* (and WS (ignore ",") WS object)))) | |
| 162 | + | ;; [9] verb ::= predicate | 'a' | |
| 163 | + | (define-peg-pattern verb all (or predicate "a")) | |
| 164 | + | ;; [10] subject ::= iri | BlankNode | collection | |
| 165 | + | (define-peg-pattern subject all (or iri blank-node collection)) | |
| 166 | + | ;; [11] predicate ::= iri | |
| 167 | + | (define-peg-pattern predicate all iri) | |
| 168 | + | ;; [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal | |
| 169 | + | (define-peg-pattern object all | |
| 170 | + | (or iri blank-node collection blank-node-property-list literal)) | |
| 171 | + | ;; [13] literal ::= RDFLiteral | NumericLiteral | BooleanLiteral | |
| 172 | + | (define-peg-pattern literal body | |
| 173 | + | (or rdf-literal numeric-literal boolean-literal)) | |
| 174 | + | ;; [14] blankNodePropertyList ::= '[' predicateObjectList ']' | |
| 175 | + | (define-peg-pattern blank-node-property-list all | |
| 176 | + | (and (ignore "[") WS predicate-object-list WS (ignore "]"))) | |
| 177 | + | ;; [15] collection ::= '(' object* ')' | |
| 178 | + | (define-peg-pattern collection all | |
| 179 | + | (and (ignore "(") WS (* (and object WS)) (ignore ")"))) | |
| 180 | + | ;; [16] NumericLiteral ::= INTEGER | DECIMAL | DOUBLE | |
| 181 | + | (define-peg-pattern numeric-literal all (or integer decimal double)) | |
| 182 | + | ;; [128s] RDFLiteral ::= String (LANGTAG | '^^' iri)? | |
| 183 | + | (define-peg-pattern rdf-literal all | |
| 184 | + | (and string-pat WS (? (or langtag (and "^^" WS iri))))) | |
| 185 | + | ;; [133s] BooleanLiteral ::= 'true' | 'false' | |
| 186 | + | (define-peg-pattern boolean-literal all (or "true" "false")) | |
| 187 | + | ;; [17] String ::= STRING_LITERAL_QUOTE | STRING_LITERAL_SINGLE_QUOTE | STRING_LITERAL_LONG_SINGLE_QUOTE | STRING_LITERAL_LONG_QUOTE | |
| 188 | + | (define-peg-pattern string-pat all | |
| 189 | + | (or string-literal-long-single-quote string-literal-long-quote | |
| 190 | + | string-literal-quote string-literal-single-quote)) | |
| 191 | + | ;; [135s] iri ::= IRIREF | PrefixedName | |
| 192 | + | (define-peg-pattern iri all (or iriref prefixed-name)) | |
| 193 | + | ;; [136s] PrefixedName ::= PNAME_LN | PNAME_NS | |
| 194 | + | (define-peg-pattern prefixed-name all (or pname-ln pname-ns)) | |
| 195 | + | ;; [137s] BlankNode ::= BLANK_NODE_LABEL | ANON | |
| 196 | + | (define-peg-pattern blank-node all (or blank-node-label anon)) | |
| 197 | + | ||
| 198 | + | ||
| 199 | + | ||
| 200 | + | (define (parse-turtle str) | |
| 201 | + | (peg:tree (match-pattern turtle-doc str))) |
turtle/tordf.scm unknown status 1
| 1 | + | ;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu> | |
| 2 | + | ;;;; | |
| 3 | + | ;;;; This library is free software; you can redistribute it and/or | |
| 4 | + | ;;;; modify it under the terms of the GNU Lesser General Public | |
| 5 | + | ;;;; License as published by the Free Software Foundation; either | |
| 6 | + | ;;;; version 3 of the License, or (at your option) any later version. | |
| 7 | + | ;;;; | |
| 8 | + | ;;;; This library is distributed in the hope that it will be useful, | |
| 9 | + | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 10 | + | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 11 | + | ;;;; Lesser General Public License for more details. | |
| 12 | + | ;;;; | |
| 13 | + | ;;;; You should have received a copy of the GNU Lesser General Public | |
| 14 | + | ;;;; License along with this library; if not, write to the Free Software | |
| 15 | + | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
| 16 | + | ;;;; | |
| 17 | + | ||
| 18 | + | (define-module (turtle tordf) | |
| 19 | + | #:use-module (ice-9 match) | |
| 20 | + | #:use-module (ice-9 textual-ports) | |
| 21 | + | #:use-module (iri iri) | |
| 22 | + | #:use-module (turtle parser) | |
| 23 | + | #:use-module (srfi srfi-9) | |
| 24 | + | #:use-module (rdf rdf) | |
| 25 | + | #:use-module ((rdf xsd) #:prefix xsd:) | |
| 26 | + | #:export (turtle->rdf)) | |
| 27 | + | ||
| 28 | + | (define-record-type parser-state | |
| 29 | + | (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate | |
| 30 | + | blank-node-gen result) | |
| 31 | + | parser-state? | |
| 32 | + | (base-uri parser-state-base-uri) | |
| 33 | + | (namespaces parser-state-namespaces) | |
| 34 | + | (bnode-labels parser-state-bnode-labels) | |
| 35 | + | (cur-subject parser-state-cur-subject) | |
| 36 | + | (cur-predicate parser-state-cur-predicate) | |
| 37 | + | (blank-node-gen parser-state-blank-node-gen) | |
| 38 | + | (result parser-state-result)) | |
| 39 | + | ||
| 40 | + | (define* (update-parser-state | |
| 41 | + | state #:key (base-uri (parser-state-base-uri state)) | |
| 42 | + | (namespaces (parser-state-namespaces state)) | |
| 43 | + | (bnode-labels (parser-state-bnode-labels state)) | |
| 44 | + | (cur-subject (parser-state-cur-subject state)) | |
| 45 | + | (cur-predicate (parser-state-cur-predicate state)) | |
| 46 | + | (blank-node-gen (parser-state-blank-node-gen state)) | |
| 47 | + | (result (parser-state-result state))) | |
| 48 | + | (make-parser-state base-uri namespaces bnode-labels cur-subject cur-predicate | |
| 49 | + | blank-node-gen result)) | |
| 50 | + | ||
| 51 | + | (define (create-generate-blank-node) | |
| 52 | + | (define num 0) | |
| 53 | + | (lambda () | |
| 54 | + | (set! num (+ num 1)) | |
| 55 | + | num)) | |
| 56 | + | ||
| 57 | + | (define (add-ns-to-state state ns iri) | |
| 58 | + | (pk 'iri iri) | |
| 59 | + | (update-parser-state state | |
| 60 | + | #:namespaces (cons (cons ns iri) (parser-state-namespaces state)))) | |
| 61 | + | ||
| 62 | + | ||
| 63 | + | (define (parse-iri iri state) | |
| 64 | + | (format #t "iri: ~a~%" iri) | |
| 65 | + | (match iri | |
| 66 | + | (('iri ('prefixed-name ('pname-ln ('pname-ns ns) suffix))) | |
| 67 | + | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) ns) | |
| 68 | + | suffix)) | |
| 69 | + | ("state" . ,state))) | |
| 70 | + | (('iri ('prefixed-name ('pname-ln ('pname-ns suffix)))) | |
| 71 | + | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "") | |
| 72 | + | suffix)) | |
| 73 | + | ("state" . ,state))) | |
| 74 | + | (('iri 'iriref) | |
| 75 | + | `(("iri" . ,(resolve-iri (parser-state-base-uri state) "")) | |
| 76 | + | ("state" . ,state))) | |
| 77 | + | (('iri ('iriref iri)) | |
| 78 | + | `(("iri" . ,(resolve-iri (parser-state-base-uri state) iri)) | |
| 79 | + | ("state" . ,state))) | |
| 80 | + | (('blank-node ('blank-node-label label)) | |
| 81 | + | (if (assoc-ref (parser-state-bnode-labels state) label) | |
| 82 | + | `(("iri" . ,(assoc-ref (parser-state-bnode-labels state) label)) | |
| 83 | + | ("state" . ,state)) | |
| 84 | + | (let ((node ((parser-state-blank-node-gen state)))) | |
| 85 | + | `(("iri" . ,node) | |
| 86 | + | ("state" . ,(update-parser-state state | |
| 87 | + | #:bnode-labels | |
| 88 | + | (cons | |
| 89 | + | (cons label node) | |
| 90 | + | (parser-state-bnode-labels state)))))))))) | |
| 91 | + | ||
| 92 | + | (define (parse-verb verb state) | |
| 93 | + | (match verb | |
| 94 | + | ("a" `(("verb" . "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") | |
| 95 | + | ("state" . ,state))) | |
| 96 | + | (('predicate iri) | |
| 97 | + | (let ((res (parse-iri iri state))) | |
| 98 | + | `(("verb" . ,(assoc-ref res "iri")) | |
| 99 | + | ("state" . ,(assoc-ref res "state"))))))) | |
| 100 | + | ||
| 101 | + | (define (parse-object object state) | |
| 102 | + | (pk 'object object) | |
| 103 | + | (match object | |
| 104 | + | (('rdf-literal ('string-pat ('string-literal-quote str))) | |
| 105 | + | (update-parser-state state | |
| 106 | + | #:result | |
| 107 | + | (cons | |
| 108 | + | (make-rdf-triple | |
| 109 | + | (parser-state-cur-subject state) | |
| 110 | + | (parser-state-cur-predicate state) | |
| 111 | + | (make-rdf-literal str xsd:string #f)) | |
| 112 | + | (parser-state-result state)))) | |
| 113 | + | (('blank-node-property-list ('predicate-object-list po ...)) | |
| 114 | + | (let* ((node ((parser-state-blank-node-gen state))) | |
| 115 | + | (new-state (parse-predicate-object | |
| 116 | + | po (update-parser-state state #:cur-subject node)))) | |
| 117 | + | (update-parser-state new-state | |
| 118 | + | #:cur-subject (parser-state-cur-subject state) | |
| 119 | + | #:cur-predicate (parser-state-cur-predicate state) | |
| 120 | + | #:result | |
| 121 | + | (cons | |
| 122 | + | (make-rdf-triple | |
| 123 | + | (parser-state-cur-subject state) | |
| 124 | + | (parser-state-cur-predicate state) | |
| 125 | + | node) | |
| 126 | + | (parser-state-result new-state))))) | |
| 127 | + | (('collection objects ...) | |
| 128 | + | (let loop ((objects objects) (state state)) | |
| 129 | + | (match objects | |
| 130 | + | ('() state) | |
| 131 | + | ((('object object) objects ...) | |
| 132 | + | (loop objects (parse-object object state)))))) | |
| 133 | + | (('iri _) | |
| 134 | + | (let* ((res (parse-iri object state)) | |
| 135 | + | (iri (assoc-ref res "iri")) | |
| 136 | + | (state (assoc-ref res "state"))) | |
| 137 | + | (update-parser-state state | |
| 138 | + | #:result | |
| 139 | + | (cons | |
| 140 | + | (make-rdf-triple | |
| 141 | + | (parser-state-cur-subject state) | |
| 142 | + | (parser-state-cur-predicate state) | |
| 143 | + | iri) | |
| 144 | + | (parser-state-result state))))))) | |
| 145 | + | ||
| 146 | + | (define (parse-object-list ol state) | |
| 147 | + | (let loop ((ol ol) (state state)) | |
| 148 | + | (pk 'ol ol) | |
| 149 | + | (match ol | |
| 150 | + | ('() state) | |
| 151 | + | ((('object object) ol ...) | |
| 152 | + | (loop ol (parse-object object state))) | |
| 153 | + | ((ol) | |
| 154 | + | (loop ol state))))) | |
| 155 | + | ||
| 156 | + | (define (parse-predicate-object po state) | |
| 157 | + | (let loop ((po po) (state state)) | |
| 158 | + | (pk 'po po) | |
| 159 | + | (match po | |
| 160 | + | ((('verb verb) ('object-list ol ...) po) | |
| 161 | + | (let* ((verb (parse-verb verb state)) | |
| 162 | + | (state (assoc-ref verb "state")) | |
| 163 | + | (verb (assoc-ref verb "verb")) | |
| 164 | + | (new-state (update-parser-state state #:cur-predicate verb)) | |
| 165 | + | (res (parse-object-list ol new-state))) | |
| 166 | + | (loop po res))) | |
| 167 | + | ((('verb verb) ('object-list ol ...)) | |
| 168 | + | (let* ((verb (parse-verb verb state)) | |
| 169 | + | (state (assoc-ref verb "state")) | |
| 170 | + | (verb (assoc-ref verb "verb")) | |
| 171 | + | (new-state (update-parser-state state #:cur-predicate verb)) | |
| 172 | + | (res (parse-object-list ol new-state))) | |
| 173 | + | res)) | |
| 174 | + | (((('verb verb) ('object-list ol ...)) po ...) | |
| 175 | + | (let* ((verb (parse-verb verb state)) | |
| 176 | + | (state (assoc-ref verb "state")) | |
| 177 | + | (verb (assoc-ref verb "verb")) | |
| 178 | + | (new-state (update-parser-state state #:cur-predicate verb)) | |
| 179 | + | (res (parse-object-list ol new-state))) | |
| 180 | + | (loop po res))) | |
| 181 | + | ('() state) | |
| 182 | + | ((po) | |
| 183 | + | (loop po state))))) | |
| 184 | + | ||
| 185 | + | (define (parse-triples t state) | |
| 186 | + | (match t | |
| 187 | + | ((('subject iri) ('predicate-object-list predicate-object ...)) | |
| 188 | + | (let* ((res (parse-iri iri state)) | |
| 189 | + | (iri (assoc-ref res "iri")) | |
| 190 | + | (state (assoc-ref res "state")) | |
| 191 | + | (state (update-parser-state state | |
| 192 | + | #:cur-subject iri))) | |
| 193 | + | (parse-predicate-object predicate-object state))))) | |
| 194 | + | ||
| 195 | + | (define (parse-turtle-doc parse-tree state) | |
| 196 | + | (let loop ((parse-tree parse-tree) (state state)) | |
| 197 | + | (match parse-tree | |
| 198 | + | ('() (parser-state-result state)) | |
| 199 | + | ((('prefix-id ('pname-ns ns) ('iriref iri)) parse-tree ...) | |
| 200 | + | (loop parse-tree | |
| 201 | + | (add-ns-to-state | |
| 202 | + | state ns (resolve-iri (parser-state-base-uri state) iri)))) | |
| 203 | + | ((('prefix-id ('pname-ns ('iriref iri))) parse-tree ...) | |
| 204 | + | (loop parse-tree | |
| 205 | + | (add-ns-to-state | |
| 206 | + | state "" (resolve-iri (parser-state-base-uri state) iri)))) | |
| 207 | + | ((('sparql-prefix ('pname-ns ns) ('iriref iri)) parse-tree ...) | |
| 208 | + | (loop parse-tree | |
| 209 | + | (add-ns-to-state | |
| 210 | + | state ns (resolve-iri (parser-state-base-uri state) iri)))) | |
| 211 | + | ((('sparql-prefix ('pname-ns ('iriref iri))) parse-tree ...) | |
| 212 | + | (loop parse-tree | |
| 213 | + | (add-ns-to-state | |
| 214 | + | state "" (resolve-iri (parser-state-base-uri state) iri)))) | |
| 215 | + | ((('base ('iriref iri)) parse-tree ...) | |
| 216 | + | (loop parse-tree | |
| 217 | + | (update-parser-state | |
| 218 | + | state #:base-uri (resolve-iri (parser-state-base-uri state) iri)))) | |
| 219 | + | ((('sparql-base ('iriref iri)) parse-tree ...) | |
| 220 | + | (loop parse-tree | |
| 221 | + | (update-parser-state | |
| 222 | + | state #:base-uri (resolve-iri iri (parser-state-base-uri state))))) | |
| 223 | + | ((('triples t ...) parse-tree ...) | |
| 224 | + | (format #t "triples: ~a~%" t) | |
| 225 | + | (let ((res (parse-triples t state))) | |
| 226 | + | (loop parse-tree (parse-triples t state)))) | |
| 227 | + | ;; otherwise, it's a single element, not a list of statements | |
| 228 | + | (((? symbol? _) _ ...) (loop (list parse-tree) state))))) | |
| 229 | + | ||
| 230 | + | (define (tordf parse-tree base) | |
| 231 | + | (define state | |
| 232 | + | (make-parser-state base '() '() #f #f (create-generate-blank-node) '())) | |
| 233 | + | (parse-turtle-doc parse-tree state)) | |
| 234 | + | ||
| 235 | + | (define (turtle->rdf str-or-file base) | |
| 236 | + | (define str | |
| 237 | + | (cond | |
| 238 | + | ((file-exists? str-or-file) (call-with-input-file str-or-file get-string-all)) | |
| 239 | + | ((string? str-or-file) str-or-file))) | |
| 240 | + | ||
| 241 | + | (let ((parse-tree (parse-turtle str))) | |
| 242 | + | (tordf parse-tree base))) |