guile-rdf/nquads/parser.scm

parser.scm

1
;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (nquads parser)
19
  #:use-module (ice-9 peg)
20
  #:export (parse-nquads))
21
22
;; Productions for terminals
23
(define-peg-pattern langtag all
24
  (and (ignore "@") (+ (or (range #\a #\z) (range #\A #\Z)))
25
       (* (and "-" (+ (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9)))))))
26
(define-peg-pattern eol none (+ (or "\n" "\r")))
27
(define-peg-pattern iriref all
28
  (and (ignore "<")
29
       (* (or "!" (range #\x23 #\x3b) "=" (range #\x3f #\x5b) "]" "_"
30
              (range #\x61 #\x7a) (range #\x7e #\x10ffff) uchar))
31
       (ignore ">")))
32
(define-peg-pattern string-literal-quote all
33
  (and (ignore "\"")
34
       (* (or (range #\x00 #\x09) (range #\x0b #\x0c) (range #\x0d #\x21)
35
              (range #\x23 #\x5b) (range #\x5d #\x10ffff) echar uchar))
36
       (ignore "\"")))
37
(define-peg-pattern blank-node-label all
38
  (and "_:" (or pn-chars-u (range #\0 #\9)) (* (and (* ".") pn-chars))))
39
(define-peg-pattern uchar all
40
  (or (and (ignore "\\u") hex hex hex hex)
41
      (and (ignore "\\U") hex hex hex hex hex hex hex hex)))
42
(define-peg-pattern echar all
43
  (or "\\t" "\\b" "\\n" "\\r" "\\f" "\\\"" "\\'" "\\\\"))
44
(define-peg-pattern pn-chars-base body
45
  (or (range #\A #\Z) (range #\a #\z) (range #\x00c0 #\x00d6)
46
      (range #\x00d8 #\x00f6) (range #\x00f8 #\x02ff) (range #\x0370 #\x037d)
47
      (range #\x037f #\x1fff) (range #\x200c #\x200d) (range #\x2070 #\x218f)
48
      (range #\x2c00 #\x2fef) (range #\x3001 #\xd7ff) (range #\xf900 #\xfdcf)
49
      (range #\xfdf0 #\xfffd) (range #\x10000 #\xeffff)))
50
(define-peg-pattern pn-chars-u body (or pn-chars-base "_"))
51
(define-peg-pattern pn-chars body
52
  (or pn-chars-u "-" (range #\0 #\9) "ยท" (range #\x0300 #\x036f)
53
      (range #\x203f #\x2040)))
54
(define-peg-pattern hex body (or (range #\0 #\9) (range #\a #\f) (range #\A #\F)))
55
56
;; Grammar
57
(define-peg-pattern unrecognized body (range #\x00 #\x10ffff))
58
(define-peg-pattern comment body (and "#" (* (or (range #\x00 #\x09)
59
                                                 (range #\x0B #\x0C)
60
                                                 (range #\x0E #\x10FFFF)))))
61
(define-peg-pattern WS none (* (or comment "\t" " ")))
62
63
(define-peg-pattern nquads-doc body (and (* (and WS (? eol))) (* statement)))
64
(define-peg-pattern statement all
65
  (or
66
    (and WS subject WS iriref WS object WS (? graph-label) WS (? (ignore "."))
67
         (* (and WS eol)))
68
    (* unrecognized)))
69
(define-peg-pattern subject body
70
  (or iriref blank-node-label))
71
(define-peg-pattern object body
72
  (or iriref blank-node-label literal))
73
(define-peg-pattern graph-label body
74
  (or iriref blank-node-label))
75
(define-peg-pattern literal all
76
  (and string-literal-quote (? (or (and "^^" iriref) langtag))))
77
78
79
(define (parse-nquads str)
80
  (peg:tree (match-pattern nquads-doc str)))
81