Add unicode and escape decoding support
turtle/parser.scm
78 | 78 | (range #\x5d #\x10ffff) echar uchar))) | |
79 | 79 | (ignore "\"\"\""))) | |
80 | 80 | ;; [26] UCHAR ::= '\u' HEX HEX HEX HEX | '\U' HEX HEX HEX HEX HEX HEX HEX HEX | |
81 | - | (define-peg-pattern uchar body | |
82 | - | (or (and "\\u" hex hex hex hex) | |
83 | - | (and "\\U" hex hex hex hex hex hex hex hex))) | |
81 | + | (define-peg-pattern uchar all | |
82 | + | (or (and (ignore "\\u") hex hex hex hex) | |
83 | + | (and (ignore "\\U") hex hex hex hex hex hex hex hex))) | |
84 | 84 | ;; [159s] ECHAR ::= '\' [tbnrf"'\] | |
85 | - | (define-peg-pattern echar body | |
85 | + | (define-peg-pattern echar all | |
86 | 86 | (or "\\t" "\\b" "\\n" "\\r" "\\f" "\\\"" "\\'" "\\\\")) | |
87 | 87 | ;; [161s] WS ::= #x20 | #x9 | #xD | #xA /* #x20=space #x9=character tabulation #xD=carriage return #xA=new line */ | |
88 | 88 | (define-peg-pattern ws body (or " " "\t" "\r" "\n")) |
turtle/tordf.scm
59 | 59 | (update-parser-state state | |
60 | 60 | #:namespaces (cons (cons ns iri) (parser-state-namespaces state)))) | |
61 | 61 | ||
62 | + | (define (parse-string str) | |
63 | + | (match str | |
64 | + | ((? string? str) str) | |
65 | + | ((component str ...) | |
66 | + | (match component | |
67 | + | ((? string? str1) | |
68 | + | (string-append str1 (parse-string str))) | |
69 | + | (('uchar n) | |
70 | + | (string-append (string (integer->char (string->number n 16))) | |
71 | + | (parse-string str))) | |
72 | + | (('echar e) | |
73 | + | (string-append | |
74 | + | (match e | |
75 | + | ("\\t" "\t") | |
76 | + | ("\\b" "\b") | |
77 | + | ("\\n" "\n") | |
78 | + | ("\\r" "\r") | |
79 | + | ("\\f" "\f") | |
80 | + | ("\\\\" "\\") | |
81 | + | ("\\\"" "\"") | |
82 | + | ("\\'" "'")) | |
83 | + | (parse-string str))))) | |
84 | + | (() ""))) | |
62 | 85 | ||
63 | 86 | (define (parse-iri iri state) | |
64 | 87 | (match iri | |
… | |||
68 | 91 | ("state" . ,state))) | |
69 | 92 | (('iri ('prefixed-name ('pname-ln ('pname-ns suffix)))) | |
70 | 93 | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "") | |
71 | - | suffix)) | |
94 | + | suffix)) | |
95 | + | ("state" . ,state))) | |
96 | + | (('iri ('prefixed-name ('pname-ns suffix))) | |
97 | + | `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "") | |
98 | + | suffix)) | |
72 | 99 | ("state" . ,state))) | |
73 | 100 | (('iri ('prefixed-name 'pname-ns)) | |
74 | 101 | `(("iri" . ,(assoc-ref (parser-state-namespaces state) "")) | |
… | |||
76 | 103 | (('iri 'iriref) | |
77 | 104 | `(("iri" . ,(resolve-iri (parser-state-base-uri state) "")) | |
78 | 105 | ("state" . ,state))) | |
79 | - | (('iri ('iriref iri)) | |
80 | - | `(("iri" . ,(resolve-iri (parser-state-base-uri state) iri)) | |
106 | + | (('iri ('iriref iri ...)) | |
107 | + | `(("iri" . ,(resolve-iri (parser-state-base-uri state) (parse-string iri))) | |
81 | 108 | ("state" . ,state))))) | |
82 | 109 | ||
83 | 110 | (define (parse-verb verb state) | |
… | |||
91 | 118 | ||
92 | 119 | (define (parse-object object state) | |
93 | 120 | (match object | |
94 | - | (('rdf-literal ('string-pat (_ str))) | |
121 | + | (('rdf-literal ('string-pat (_ str ...))) | |
95 | 122 | (let ((object | |
96 | - | (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f))) | |
123 | + | (make-rdf-literal (parse-string str) | |
124 | + | "http://www.w3.org/2001/XMLSchema#string" #f))) | |
97 | 125 | (update-parser-state state | |
98 | 126 | #:cur-object object | |
99 | 127 | #:result | |
… | |||
103 | 131 | (parser-state-cur-predicate state) | |
104 | 132 | (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f)) | |
105 | 133 | (parser-state-result state))))) | |
106 | - | (('rdf-literal ('string-pat (_ str)) ("^^" iri)) | |
134 | + | (('rdf-literal ('string-pat (_ str ...)) ("^^" iri)) | |
107 | 135 | (let* ((res (parse-iri iri state)) | |
108 | 136 | (iri (assoc-ref res "iri")) | |
109 | 137 | (state (assoc-ref res "state")) | |
110 | - | (object (make-rdf-literal str iri #f))) | |
138 | + | (object (make-rdf-literal (parse-string str) iri #f))) | |
111 | 139 | (update-parser-state state | |
112 | 140 | #:cur-object object | |
113 | 141 | #:result | |
… | |||
117 | 145 | (parser-state-cur-predicate state) | |
118 | 146 | (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f)) | |
119 | 147 | (parser-state-result state))))) | |
120 | - | (('rdf-literal ('string-pat (_ str)) ('langtag lang)) | |
148 | + | (('rdf-literal ('string-pat (_ str ...)) ('langtag lang)) | |
121 | 149 | (let ((object | |
122 | 150 | (make-rdf-literal | |
123 | - | str "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" lang))) | |
151 | + | (parse-string str) | |
152 | + | "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" | |
153 | + | lang))) | |
124 | 154 | (update-parser-state state | |
125 | 155 | #:cur-object object | |
126 | 156 | #:result | |
… | |||
385 | 415 | (let loop ((parse-tree parse-tree) (state state)) | |
386 | 416 | (match parse-tree | |
387 | 417 | ('() (parser-state-result state)) | |
388 | - | ((('prefix-id ('pname-ns ns) ('iriref iri)) parse-tree ...) | |
418 | + | ((('prefix-id ('pname-ns ns) ('iriref iri ...)) parse-tree ...) | |
389 | 419 | (loop parse-tree | |
390 | 420 | (add-ns-to-state | |
391 | - | state ns (resolve-iri (parser-state-base-uri state) iri)))) | |
392 | - | ((('prefix-id ('pname-ns ('iriref iri))) parse-tree ...) | |
421 | + | state ns (resolve-iri (parser-state-base-uri state) | |
422 | + | (parse-string iri))))) | |
423 | + | ((('prefix-id ('pname-ns ('iriref iri ...))) parse-tree ...) | |
393 | 424 | (loop parse-tree | |
394 | 425 | (add-ns-to-state | |
395 | - | state "" (resolve-iri (parser-state-base-uri state) iri)))) | |
396 | - | ((('sparql-prefix ('pname-ns ns) ('iriref iri)) parse-tree ...) | |
426 | + | state "" (resolve-iri (parser-state-base-uri state) | |
427 | + | (parse-string iri))))) | |
428 | + | ((('sparql-prefix ('pname-ns ns) ('iriref iri ...)) parse-tree ...) | |
397 | 429 | (loop parse-tree | |
398 | 430 | (add-ns-to-state | |
399 | - | state ns (resolve-iri (parser-state-base-uri state) iri)))) | |
400 | - | ((('sparql-prefix ('pname-ns ('iriref iri))) parse-tree ...) | |
431 | + | state ns (resolve-iri (parser-state-base-uri state) | |
432 | + | (parse-string iri))))) | |
433 | + | ((('sparql-prefix ('pname-ns ('iriref iri ...))) parse-tree ...) | |
401 | 434 | (loop parse-tree | |
402 | 435 | (add-ns-to-state | |
403 | - | state "" (resolve-iri (parser-state-base-uri state) iri)))) | |
404 | - | ((('base ('iriref iri)) parse-tree ...) | |
436 | + | state "" (resolve-iri (parser-state-base-uri state) | |
437 | + | (parse-string iri))))) | |
438 | + | ((('base ('iriref iri ...)) parse-tree ...) | |
405 | 439 | (loop parse-tree | |
406 | 440 | (update-parser-state | |
407 | - | state #:base-uri (resolve-iri (parser-state-base-uri state) iri)))) | |
408 | - | ((('sparql-base ('iriref iri)) parse-tree ...) | |
441 | + | state #:base-uri (resolve-iri (parser-state-base-uri state) | |
442 | + | (parse-string iri))))) | |
443 | + | ((('sparql-base ('iriref iri ...)) parse-tree ...) | |
409 | 444 | (loop parse-tree | |
410 | 445 | (update-parser-state | |
411 | - | state #:base-uri (resolve-iri iri (parser-state-base-uri state))))) | |
446 | + | state #:base-uri (resolve-iri (parser-state-base-uri state) | |
447 | + | (parse-string iri))))) | |
412 | 448 | ((('triples t ...) parse-tree ...) | |
413 | 449 | (let ((res (parse-triples t state))) | |
414 | 450 | (loop parse-tree (parse-triples t state)))) |