Add unicode and escape decoding support

Julien LepillerThu Apr 02 04:09:30+0200 2020

c1df0f1

Add unicode and escape decoding support

turtle/parser.scm

7878
                   (range #\x5d #\x10ffff) echar uchar)))
7979
       (ignore "\"\"\"")))
8080
;; [26] 	UCHAR 	::= 	'\u' HEX HEX HEX HEX | '\U' HEX HEX HEX HEX HEX HEX HEX HEX
81-
(define-peg-pattern uchar body
82-
  (or (and "\\u" hex hex hex hex)
83-
      (and "\\U" hex hex hex hex hex hex hex hex)))
81+
(define-peg-pattern uchar all
82+
  (or (and (ignore "\\u") hex hex hex hex)
83+
      (and (ignore "\\U") hex hex hex hex hex hex hex hex)))
8484
;; [159s] 	ECHAR 	::= 	'\' [tbnrf"'\]
85-
(define-peg-pattern echar body
85+
(define-peg-pattern echar all
8686
  (or "\\t" "\\b" "\\n" "\\r" "\\f" "\\\"" "\\'" "\\\\"))
8787
;; [161s] 	WS 	::= 	#x20 | #x9 | #xD | #xA /* #x20=space #x9=character tabulation #xD=carriage return #xA=new line */
8888
(define-peg-pattern ws body (or " " "\t" "\r" "\n"))

turtle/tordf.scm

5959
  (update-parser-state state
6060
    #:namespaces (cons (cons ns iri) (parser-state-namespaces state))))
6161
62+
(define (parse-string str)
63+
  (match str
64+
    ((? string? str) str)
65+
    ((component str ...)
66+
     (match component
67+
       ((? string? str1)
68+
        (string-append str1 (parse-string str)))
69+
       (('uchar n)
70+
        (string-append (string (integer->char (string->number n 16)))
71+
                       (parse-string str)))
72+
       (('echar e)
73+
        (string-append
74+
          (match e
75+
            ("\\t" "\t")
76+
            ("\\b" "\b")
77+
            ("\\n" "\n")
78+
            ("\\r" "\r")
79+
            ("\\f" "\f")
80+
            ("\\\\" "\\")
81+
            ("\\\"" "\"")
82+
            ("\\'" "'"))
83+
          (parse-string str)))))
84+
    (() "")))
6285
6386
(define (parse-iri iri state)
6487
  (match iri

6891
       ("state" . ,state)))
6992
    (('iri ('prefixed-name ('pname-ln ('pname-ns suffix))))
7093
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "")
71-
                               suffix))
94+
                                suffix))
95+
       ("state" . ,state)))
96+
    (('iri ('prefixed-name ('pname-ns suffix)))
97+
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "")
98+
                                suffix))
7299
       ("state" . ,state)))
73100
    (('iri ('prefixed-name 'pname-ns))
74101
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ""))

76103
    (('iri 'iriref)
77104
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) ""))
78105
       ("state" . ,state)))
79-
    (('iri ('iriref iri))
80-
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) iri))
106+
    (('iri ('iriref iri ...))
107+
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) (parse-string iri)))
81108
       ("state" . ,state)))))
82109
83110
(define (parse-verb verb state)

91118
92119
(define (parse-object object state)
93120
  (match object
94-
    (('rdf-literal ('string-pat (_ str)))
121+
    (('rdf-literal ('string-pat (_ str ...)))
95122
     (let ((object
96-
             (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f)))
123+
             (make-rdf-literal (parse-string str)
124+
                               "http://www.w3.org/2001/XMLSchema#string" #f)))
97125
       (update-parser-state state
98126
         #:cur-object object
99127
         #:result

103131
             (parser-state-cur-predicate state)
104132
             (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f))
105133
           (parser-state-result state)))))
106-
    (('rdf-literal ('string-pat (_ str)) ("^^" iri))
134+
    (('rdf-literal ('string-pat (_ str ...)) ("^^" iri))
107135
     (let* ((res (parse-iri iri state))
108136
            (iri (assoc-ref res "iri"))
109137
            (state (assoc-ref res "state"))
110-
            (object (make-rdf-literal str iri #f)))
138+
            (object (make-rdf-literal (parse-string str) iri #f)))
111139
       (update-parser-state state
112140
         #:cur-object object
113141
         #:result

117145
             (parser-state-cur-predicate state)
118146
             (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f))
119147
           (parser-state-result state)))))
120-
    (('rdf-literal ('string-pat (_ str)) ('langtag lang))
148+
    (('rdf-literal ('string-pat (_ str ...)) ('langtag lang))
121149
     (let ((object
122150
           (make-rdf-literal
123-
             str "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" lang)))
151+
             (parse-string str)
152+
             "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
153+
             lang)))
124154
       (update-parser-state state
125155
         #:cur-object object
126156
         #:result

385415
  (let loop ((parse-tree parse-tree) (state state))
386416
    (match parse-tree
387417
      ('() (parser-state-result state))
388-
      ((('prefix-id ('pname-ns ns) ('iriref iri)) parse-tree ...)
418+
      ((('prefix-id ('pname-ns ns) ('iriref iri ...)) parse-tree ...)
389419
       (loop parse-tree
390420
             (add-ns-to-state
391-
               state ns (resolve-iri (parser-state-base-uri state) iri))))
392-
      ((('prefix-id ('pname-ns ('iriref iri))) parse-tree ...)
421+
               state ns (resolve-iri (parser-state-base-uri state)
422+
                                     (parse-string iri)))))
423+
      ((('prefix-id ('pname-ns ('iriref iri ...))) parse-tree ...)
393424
       (loop parse-tree
394425
             (add-ns-to-state
395-
               state "" (resolve-iri (parser-state-base-uri state) iri))))
396-
      ((('sparql-prefix ('pname-ns ns) ('iriref iri)) parse-tree ...)
426+
               state "" (resolve-iri (parser-state-base-uri state)
427+
                                     (parse-string iri)))))
428+
      ((('sparql-prefix ('pname-ns ns) ('iriref iri ...)) parse-tree ...)
397429
       (loop parse-tree
398430
             (add-ns-to-state
399-
               state ns (resolve-iri (parser-state-base-uri state) iri))))
400-
      ((('sparql-prefix ('pname-ns ('iriref iri))) parse-tree ...)
431+
               state ns (resolve-iri (parser-state-base-uri state)
432+
                                     (parse-string iri)))))
433+
      ((('sparql-prefix ('pname-ns ('iriref iri ...))) parse-tree ...)
401434
       (loop parse-tree
402435
             (add-ns-to-state
403-
               state "" (resolve-iri (parser-state-base-uri state) iri))))
404-
      ((('base ('iriref iri)) parse-tree ...)
436+
               state "" (resolve-iri (parser-state-base-uri state)
437+
                                     (parse-string iri)))))
438+
      ((('base ('iriref iri ...)) parse-tree ...)
405439
       (loop parse-tree
406440
             (update-parser-state
407-
               state #:base-uri (resolve-iri (parser-state-base-uri state) iri))))
408-
      ((('sparql-base ('iriref iri)) parse-tree ...)
441+
               state #:base-uri (resolve-iri (parser-state-base-uri state)
442+
                                             (parse-string iri)))))
443+
      ((('sparql-base ('iriref iri ...)) parse-tree ...)
409444
       (loop parse-tree
410445
             (update-parser-state
411-
               state #:base-uri (resolve-iri iri (parser-state-base-uri state)))))
446+
               state #:base-uri (resolve-iri (parser-state-base-uri state)
447+
                                             (parse-string iri)))))
412448
      ((('triples t ...) parse-tree ...)
413449
       (let ((res (parse-triples t state)))
414450
         (loop parse-tree (parse-triples t state))))