Improve turtle parsing

Julien LepillerWed Apr 01 03:19:56+0200 2020

944a9c3

Improve turtle parsing

.gitignore

11
*.go
22
*.log
3+
*.trs
34
aclocal.m4
45
autom4te.cache/
56
configure

turtle/parser.scm

3232
(define-peg-pattern pname-ln all (and pname-ns pn-local))
3333
;; [141s] 	BLANK_NODE_LABEL 	::= 	'_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
3434
(define-peg-pattern blank-node-label all
35-
  (and "_:" (or pn-chars-u (range #\0 #\9)) (* (or pn-chars (and "." pn-chars)))))
35+
  (and "_:" (or pn-chars-u (range #\0 #\9)) (* (and (* ".") pn-chars))))
3636
;; [144s] 	LANGTAG 	::= 	'@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
3737
(define-peg-pattern langtag all
3838
  (and "@" (+ (or (range #\a #\z) (range #\A #\Z)))

103103
      (range #\x203f #\x2040)))
104104
;; [167s] 	PN_PREFIX 	::= 	PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)?
105105
(define-peg-pattern pn-prefix body
106-
  (and pn-chars-base (* (or pn-chars (and "." pn-chars)))))
106+
  (and pn-chars-base (* (and (* ".") pn-chars))))
107107
;; [168s] 	PN_LOCAL 	::= 	(PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
108108
(define-peg-pattern pn-local body
109109
  (and (or pn-chars-u ":" (range #\0 #\9) plx)
110-
       (* (or pn-chars ":" "plx" (and "." (or pn-chars ":" plx))))))
110+
       (* (and (* ".") (or pn-chars ":" plx)))))
111111
;; [169s] 	PLX 	::= 	PERCENT | PN_LOCAL_ESC
112112
(define-peg-pattern plx body (or percent pn-local-esc))
113113
;; [170s] 	PERCENT 	::= 	'%' HEX HEX

turtle/tordf.scm

2222
  #:use-module (turtle parser)
2323
  #:use-module (srfi srfi-9)
2424
  #:use-module (rdf rdf)
25-
  #:use-module ((rdf xsd) #:prefix xsd:)
2625
  #:export (turtle->rdf))
2726
2827
(define-record-type parser-state

7170
     `(("iri" . ,(string-append (assoc-ref (parser-state-namespaces state) "")
7271
                               suffix))
7372
       ("state" . ,state)))
73+
    (('iri ('prefixed-name 'pname-ns))
74+
     `(("iri" . ,(assoc-ref (parser-state-namespaces state) ""))
75+
       ("state" . ,state)))
7476
    (('iri 'iriref)
7577
     `(("iri" . ,(resolve-iri (parser-state-base-uri state) ""))
7678
       ("state" . ,state)))

101103
(define (parse-object object state)
102104
  (pk 'object object)
103105
  (match object
104-
    (('rdf-literal ('string-pat ('string-literal-quote str)))
106+
    (('rdf-literal ('string-pat (_ str)))
107+
     (update-parser-state state
108+
       #:result
109+
       (cons
110+
         (make-rdf-triple
111+
           (parser-state-cur-subject state)
112+
           (parser-state-cur-predicate state)
113+
           (make-rdf-literal str "http://www.w3.org/2001/XMLSchema#string" #f))
114+
         (parser-state-result state))))
115+
    (('rdf-literal ('string-pat (_ str)) ('langtag lang))
116+
     (update-parser-state state
117+
       #:result
118+
       (cons
119+
         (make-rdf-triple
120+
           (parser-state-cur-subject state)
121+
           (parser-state-cur-predicate state)
122+
           (make-rdf-literal
123+
             str "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" lang))
124+
         (parser-state-result state))))
125+
    (('numeric-literal ('integer int))
126+
     (update-parser-state state
127+
       #:result
128+
       (cons
129+
         (make-rdf-triple
130+
           (parser-state-cur-subject state)
131+
           (parser-state-cur-predicate state)
132+
           (make-rdf-literal int "http://www.w3.org/2001/XMLSchema#integer" #f))
133+
         (parser-state-result state))))
134+
    (('boolean-literal bool)
105135
     (update-parser-state state
106136
       #:result
107137
       (cons
108138
         (make-rdf-triple
109139
           (parser-state-cur-subject state)
110140
           (parser-state-cur-predicate state)
111-
           (make-rdf-literal str xsd:string #f))
141+
           (make-rdf-literal bool "http://www.w3.org/2001/XMLSchema#boolean" #f))
112142
         (parser-state-result state))))
113143
    (('blank-node-property-list ('predicate-object-list po ...))
114144
     (let* ((node ((parser-state-blank-node-gen state)))