Add unicode and escape decoding support
turtle/parser.scm
| 78 | 78 | (range #\x5d #\x10ffff) echar uchar))) | |
| 79 | 79 | (ignore "\"\"\""))) | |
| 80 | 80 | ;; [26] UCHAR ::= '\u' HEX HEX HEX HEX | '\U' HEX HEX HEX HEX HEX HEX HEX HEX | |
| 81 | - | (define-peg-pattern uchar body | |
| 82 | - | (or (and "\\u" hex hex hex hex) | |
| 83 | - | (and "\\U" hex hex hex hex hex hex hex hex))) | |
| 81 | + | (define-peg-pattern uchar all | |
| 82 | + | (or (and (ignore "\\u") hex hex hex hex) | |
| 83 | + | (and (ignore "\\U") hex hex hex hex hex hex hex hex))) | |
| 84 | 84 | ;; [159s] ECHAR ::= '\' [tbnrf"'\] | |
| 85 | - | (define-peg-pattern echar body | |
| 85 | + | (define-peg-pattern echar all | |
| 86 | 86 | (or "\\t" "\\b" "\\n" "\\r" "\\f" "\\\"" "\\'" "\\\\")) | |
| 87 | 87 | ;; [161s] WS ::= #x20 | #x9 | #xD | #xA /* #x20=space #x9=character tabulation #xD=carriage return #xA=new line */ | |
| 88 | 88 | (define-peg-pattern ws body (or " " "\t" "\r" "\n")) |
turtle/tordf.scm
| 59 | 59 | (update-parser-state state | |
| 60 | 60 | #:namespaces (cons (cons ns iri) (parser-state-namespaces state)))) | |
| 61 | 61 | ||
| 62 | + | (define (parse-string str) | |
| 63 | + | (match str | |
| 64 | + | ((? string? str) str) | |
| 65 | + | ((component str ...) | |
| 66 | + | (match component | |
| 67 | + | ((? string? str1) | |
| 68 | + | (string-append str1 (parse-string str))) | |
| 69 | + | (('uchar n) | |
| 70 | + | (string-append (string (integer->char (string->number n 16))) | |
| 71 | + | (parse-string str))) | |
| 72 | + | (('echar e) | |
| 73 | + | (string-append | |
| 74 | + | (match e | |
| 75 | + | ("\\t" "\t") | |
| 76 | + | ("\\b" "\b") | |
| 77 | + | ("\\n" "\n") | |
| 78 | + | ("\\r" "\r") | |
| 79 | + | ("\\f" "\f") | |
| 80 | + | ("\\\\" "\\") | |
| 81 | + | ("\\\"" "\"") | |
| 82 | + | ("\\'" "'")) | |
| 83 | + | (parse-string str))))) | |
| 84 | + | (() ""))) | |
| 62 | 85 | ||
| 63 | 86 | (define (parse-iri iri state) | |
| 64 | 87 | (match iri | |
… | |||
| 68 | 91 | ("state" . ,state))) | |
| 69 | 92 | (('iri ('prefixed-name ('pname-ln ('pname-ns suffix)))) | |
| 70 | 93 | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "") | |
| 71 | - | suffix)) | |
| 94 | + | suffix)) | |
| 95 | + | ("state" . ,state))) | |
| 96 | + | (('iri ('prefixed-name ('pname-ns suffix))) | |
| 97 | + | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "") | |
| 98 | + | suffix)) | |
| 72 | 99 | ("state" . ,state))) | |
| 73 | 100 | (('iri ('prefixed-name 'pname-ns)) | |
| 74 | 101 | `(("iri" . ,(assoc-ref (parser-state-namespaces state) "")) | |
… | |||
| 76 | 103 | (('iri 'iriref) | |
| 77 | 104 | `(("iri" . ,(resolve-iri (parser-state-base-uri state) "")) | |
| 78 | 105 | ("state" . ,state))) | |
| 79 | - | (('iri ('iriref iri)) | |
| 80 | - | `(("iri" . ,(resolve-iri (parser-state-base-uri state) iri)) | |
| 106 | + | (('iri ('iriref iri ...)) | |
| 107 | + | `(("iri" . ,(resolve-iri (parser-state-base-uri state) (parse-string iri))) | |
| 81 | 108 | ("state" . ,state))))) | |
| 82 | 109 | ||
| 83 | 110 | (define (parse-verb verb state) | |
… | |||
| 91 | 118 | ||
| 92 | 119 | (define (parse-object object state) | |
| 93 | 120 | (match object | |
| 94 | - | (('rdf-literal ('string-pat (_ str))) | |
| 121 | + | (('rdf-literal ('string-pat (_ str ...))) | |
| 95 | 122 | (let ((object | |
| 96 | - | (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f))) | |
| 123 | + | (make-rdf-literal (parse-string str) | |
| 124 | + | "http://www.w3.org/2001/XMLSchema#string" #f))) | |
| 97 | 125 | (update-parser-state state | |
| 98 | 126 | #:cur-object object | |
| 99 | 127 | #:result | |
… | |||
| 103 | 131 | (parser-state-cur-predicate state) | |
| 104 | 132 | (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f)) | |
| 105 | 133 | (parser-state-result state))))) | |
| 106 | - | (('rdf-literal ('string-pat (_ str)) ("^^" iri)) | |
| 134 | + | (('rdf-literal ('string-pat (_ str ...)) ("^^" iri)) | |
| 107 | 135 | (let* ((res (parse-iri iri state)) | |
| 108 | 136 | (iri (assoc-ref res "iri")) | |
| 109 | 137 | (state (assoc-ref res "state")) | |
| 110 | - | (object (make-rdf-literal str iri #f))) | |
| 138 | + | (object (make-rdf-literal (parse-string str) iri #f))) | |
| 111 | 139 | (update-parser-state state | |
| 112 | 140 | #:cur-object object | |
| 113 | 141 | #:result | |
… | |||
| 117 | 145 | (parser-state-cur-predicate state) | |
| 118 | 146 | (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f)) | |
| 119 | 147 | (parser-state-result state))))) | |
| 120 | - | (('rdf-literal ('string-pat (_ str)) ('langtag lang)) | |
| 148 | + | (('rdf-literal ('string-pat (_ str ...)) ('langtag lang)) | |
| 121 | 149 | (let ((object | |
| 122 | 150 | (make-rdf-literal | |
| 123 | - | str "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" lang))) | |
| 151 | + | (parse-string str) | |
| 152 | + | "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" | |
| 153 | + | lang))) | |
| 124 | 154 | (update-parser-state state | |
| 125 | 155 | #:cur-object object | |
| 126 | 156 | #:result | |
… | |||
| 385 | 415 | (let loop ((parse-tree parse-tree) (state state)) | |
| 386 | 416 | (match parse-tree | |
| 387 | 417 | ('() (parser-state-result state)) | |
| 388 | - | ((('prefix-id ('pname-ns ns) ('iriref iri)) parse-tree ...) | |
| 418 | + | ((('prefix-id ('pname-ns ns) ('iriref iri ...)) parse-tree ...) | |
| 389 | 419 | (loop parse-tree | |
| 390 | 420 | (add-ns-to-state | |
| 391 | - | state ns (resolve-iri (parser-state-base-uri state) iri)))) | |
| 392 | - | ((('prefix-id ('pname-ns ('iriref iri))) parse-tree ...) | |
| 421 | + | state ns (resolve-iri (parser-state-base-uri state) | |
| 422 | + | (parse-string iri))))) | |
| 423 | + | ((('prefix-id ('pname-ns ('iriref iri ...))) parse-tree ...) | |
| 393 | 424 | (loop parse-tree | |
| 394 | 425 | (add-ns-to-state | |
| 395 | - | state "" (resolve-iri (parser-state-base-uri state) iri)))) | |
| 396 | - | ((('sparql-prefix ('pname-ns ns) ('iriref iri)) parse-tree ...) | |
| 426 | + | state "" (resolve-iri (parser-state-base-uri state) | |
| 427 | + | (parse-string iri))))) | |
| 428 | + | ((('sparql-prefix ('pname-ns ns) ('iriref iri ...)) parse-tree ...) | |
| 397 | 429 | (loop parse-tree | |
| 398 | 430 | (add-ns-to-state | |
| 399 | - | state ns (resolve-iri (parser-state-base-uri state) iri)))) | |
| 400 | - | ((('sparql-prefix ('pname-ns ('iriref iri))) parse-tree ...) | |
| 431 | + | state ns (resolve-iri (parser-state-base-uri state) | |
| 432 | + | (parse-string iri))))) | |
| 433 | + | ((('sparql-prefix ('pname-ns ('iriref iri ...))) parse-tree ...) | |
| 401 | 434 | (loop parse-tree | |
| 402 | 435 | (add-ns-to-state | |
| 403 | - | state "" (resolve-iri (parser-state-base-uri state) iri)))) | |
| 404 | - | ((('base ('iriref iri)) parse-tree ...) | |
| 436 | + | state "" (resolve-iri (parser-state-base-uri state) | |
| 437 | + | (parse-string iri))))) | |
| 438 | + | ((('base ('iriref iri ...)) parse-tree ...) | |
| 405 | 439 | (loop parse-tree | |
| 406 | 440 | (update-parser-state | |
| 407 | - | state #:base-uri (resolve-iri (parser-state-base-uri state) iri)))) | |
| 408 | - | ((('sparql-base ('iriref iri)) parse-tree ...) | |
| 441 | + | state #:base-uri (resolve-iri (parser-state-base-uri state) | |
| 442 | + | (parse-string iri))))) | |
| 443 | + | ((('sparql-base ('iriref iri ...)) parse-tree ...) | |
| 409 | 444 | (loop parse-tree | |
| 410 | 445 | (update-parser-state | |
| 411 | - | state #:base-uri (resolve-iri iri (parser-state-base-uri state))))) | |
| 446 | + | state #:base-uri (resolve-iri (parser-state-base-uri state) | |
| 447 | + | (parse-string iri))))) | |
| 412 | 448 | ((('triples t ...) parse-tree ...) | |
| 413 | 449 | (let ((res (parse-triples t state))) | |
| 414 | 450 | (loop parse-tree (parse-triples t state)))) | |