Add iri.scm from jsonld
Makefile.am
2 | 2 | ||
3 | 3 | SOURCES= \ | |
4 | 4 | rdf/rdf.scm \ | |
5 | - | rdf/xsd.scm | |
5 | + | rdf/xsd.scm \ | |
6 | + | iri/iri.scm \ | |
6 | 7 | ||
7 | 8 | TEST_EXTENSIONS = .scm | |
8 | 9 | SCM_LOG_DRIVER = env AM_TAP_AWK='$(AWK)' $(SHELL) $(top_srcdir)/tap-driver.sh --color-tests yes --ignore-exit |
iri/iri.scm unknown status 1
1 | + | ;;;; Copyright (C) 2019, 2020 Julien Lepiller <julien@lepiller.eu> | |
2 | + | ;;;; | |
3 | + | ;;;; This library is free software; you can redistribute it and/or | |
4 | + | ;;;; modify it under the terms of the GNU Lesser General Public | |
5 | + | ;;;; License as published by the Free Software Foundation; either | |
6 | + | ;;;; version 3 of the License, or (at your option) any later version. | |
7 | + | ;;;; | |
8 | + | ;;;; This library is distributed in the hope that it will be useful, | |
9 | + | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | + | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | + | ;;;; Lesser General Public License for more details. | |
12 | + | ;;;; | |
13 | + | ;;;; You should have received a copy of the GNU Lesser General Public | |
14 | + | ;;;; License along with this library; if not, write to the Free Software | |
15 | + | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
16 | + | ;;;; | |
17 | + | ||
18 | + | (define-module (iri iri) | |
19 | + | #:use-module (ice-9 match) | |
20 | + | #:use-module (web uri) | |
21 | + | #:export (resolve-iri | |
22 | + | make-relative-iri)) | |
23 | + | ||
24 | + | (define (resolve-iri base reference) | |
25 | + | "Resolve an IRI from a @var{base} IRI and a relative @var{reference}. This | |
26 | + | is an implementation of the URI resolving algorithm described in section 5.2 | |
27 | + | of RFC3986. See @url{https://tools.ietf.org/html/rfc3986}." | |
28 | + | (define (remove-last-component path) | |
29 | + | (string-join | |
30 | + | (reverse (cdr (reverse (string-split path #\/)))) | |
31 | + | "/")) | |
32 | + | ||
33 | + | (define (transform-references base R) | |
34 | + | (if (uri-scheme R) | |
35 | + | (build-uri (uri-scheme R) | |
36 | + | #:host (uri-host R) | |
37 | + | #:path (remove-dot-segments (uri-path R)) | |
38 | + | #:query (uri-query R) | |
39 | + | #:fragment (uri-fragment R)) | |
40 | + | (if (uri-host R) | |
41 | + | (build-uri (uri-scheme base) | |
42 | + | #:host (uri-host R) | |
43 | + | #:path (remove-dot-segments (uri-path R)) | |
44 | + | #:query (uri-query R) | |
45 | + | #:fragment (uri-fragment R)) | |
46 | + | (if (equal? (uri-path R) "") | |
47 | + | (build-uri (uri-scheme base) | |
48 | + | #:host (uri-host base) | |
49 | + | #:path (uri-path base) | |
50 | + | #:query (or (uri-query R) (uri-query base)) | |
51 | + | #:fragment (if (uri-query R) | |
52 | + | (uri-fragment R) | |
53 | + | (or (uri-fragment R) (uri-fragment base)))) | |
54 | + | (build-uri (uri-scheme base) | |
55 | + | #:host (uri-host base) | |
56 | + | #:path (if (equal? (substring (uri-path R) 0 1) "/") | |
57 | + | (remove-dot-segments (uri-path R)) | |
58 | + | (remove-dot-segments | |
59 | + | (merge (uri-path base) (uri-path R)))) | |
60 | + | #:query (uri-query R) | |
61 | + | #:fragment (uri-fragment R)))))) | |
62 | + | ||
63 | + | (define (merge basepath referencepath) | |
64 | + | (string-append | |
65 | + | (string-join | |
66 | + | (append (reverse (cdr (reverse (string-split basepath #\/)))) '("")) | |
67 | + | "/") | |
68 | + | referencepath)) | |
69 | + | ||
70 | + | (define (remove-dot-segments path) | |
71 | + | (let loop ((input path) (output "")) | |
72 | + | (if (equal? input "") | |
73 | + | output | |
74 | + | (cond | |
75 | + | ((and (> (string-length input) 2) (equal? (substring input 0 3) "../")) | |
76 | + | (loop (substring input 3) output)) | |
77 | + | ((and (> (string-length input) 1) (equal? (substring input 0 2) "./")) | |
78 | + | (loop (substring input 2) output)) | |
79 | + | ((and (> (string-length input) 2) (equal? (substring input 0 3) "/./")) | |
80 | + | (loop (substring input 2) output)); keep trailing / | |
81 | + | ((equal? input "/.") | |
82 | + | (loop "/" output)) | |
83 | + | ((and (> (string-length input) 3) (equal? (substring input 0 4) "/../")) | |
84 | + | (loop (substring input 3) (remove-last-component output))); keep trailing / | |
85 | + | ((equal? input "/..") | |
86 | + | (loop "/" (remove-last-component output))) | |
87 | + | ((equal? input ".") | |
88 | + | (loop "" output)) | |
89 | + | ((equal? input "..") | |
90 | + | (loop "" output)) | |
91 | + | (else | |
92 | + | (let* ((segments (string-split input #\/)) | |
93 | + | (segment (car segments)) | |
94 | + | (add-slash? (equal? segment "")) | |
95 | + | (segment (if add-slash? (cadr segments) segment))) | |
96 | + | (loop | |
97 | + | (if add-slash? | |
98 | + | (string-join (cons "" (cdr (cdr segments))) "/") | |
99 | + | (string-join (cdr segments))) | |
100 | + | (if add-slash? | |
101 | + | (string-append output "/" segment) | |
102 | + | (string-append output segment))))))))) | |
103 | + | ||
104 | + | (define (set-iri-path uri) | |
105 | + | (build-uri | |
106 | + | (uri-scheme uri) | |
107 | + | #:host (uri-host uri) | |
108 | + | #:path (if (equal? (uri-path uri) "") "/" (uri-path uri)) | |
109 | + | #:query (uri-query uri) | |
110 | + | #:fragment (uri-fragment uri))) | |
111 | + | ||
112 | + | ;; This algorithm is not always called with sane values, so prevent errors | |
113 | + | ;; in some edge cases. | |
114 | + | (if (and base (string? reference)) | |
115 | + | (uri->string | |
116 | + | (transform-references (set-iri-path (string->uri base)) | |
117 | + | (string->uri-reference reference))) | |
118 | + | reference)) | |
119 | + | ||
120 | + | (define (canonical-iri iri) | |
121 | + | (let* ((iri (string->uri iri)) | |
122 | + | (path (uri-path iri)) | |
123 | + | (start (if (equal? (string-ref path 0) #\/) "/" "")) | |
124 | + | (path (reverse (string-split path #\/)))) | |
125 | + | (let loop ((path path) (result '()) (skip 0)) | |
126 | + | (match path | |
127 | + | ('() (build-uri | |
128 | + | (uri-scheme iri) | |
129 | + | #:host (uri-host iri) | |
130 | + | #:path (string-append start (string-join result "/")) | |
131 | + | #:query (uri-query iri) | |
132 | + | #:fragment (uri-fragment iri))) | |
133 | + | ((component path ...) | |
134 | + | (cond | |
135 | + | ((equal? component "") | |
136 | + | (if (null? result) | |
137 | + | (loop path (cons component result) skip) | |
138 | + | (loop path result skip))) | |
139 | + | ((equal? component ".") | |
140 | + | (loop path result skip)) | |
141 | + | ((equal? component "..") | |
142 | + | (loop path result (+ skip 1))) | |
143 | + | (else | |
144 | + | (if (> skip 0) | |
145 | + | (loop path result (- skip 1)) | |
146 | + | (loop path (cons component result) skip))))))))) | |
147 | + | ||
148 | + | (define (make-relative-iri iri base) | |
149 | + | (let ((iri (canonical-iri iri)) | |
150 | + | (base (canonical-iri base))) | |
151 | + | (uri->string | |
152 | + | (if (equal? (uri-scheme iri) (uri-scheme base)) | |
153 | + | ;; if we have the same scheme, we can skip it | |
154 | + | (if (equal? (uri-host iri) (uri-host base)) | |
155 | + | ;; if the host is the same, we try to compress the paths. There | |
156 | + | ;; are two solutions: keep the path from the iri, or use '..' | |
157 | + | ;; to go back whenever necessary. We select the shortes one. | |
158 | + | (let* ((base-path (string-split (uri-path base) #\/)) | |
159 | + | (iri-path (string-split (uri-path iri) #\/)) | |
160 | + | ;; the base path starts with /, so an empty component. | |
161 | + | ;; we also ignore the last component. | |
162 | + | (parents (cdr (cdr (map (const "..") base-path))))) | |
163 | + | (let loop ((base-path (cdr base-path)) | |
164 | + | (iri-path (cdr iri-path)) | |
165 | + | (parents parents)) | |
166 | + | (match base-path | |
167 | + | ((base-component base-path ...) | |
168 | + | (if (null? base-path) | |
169 | + | ;; we reached the end, we have the same directory structure | |
170 | + | (if (and (= (length iri-path) 1) | |
171 | + | (equal? (car iri-path) base-component) | |
172 | + | (or (uri-query iri) | |
173 | + | (uri-fragment iri))) | |
174 | + | ;; if it's the same path | |
175 | + | (build-uri-reference | |
176 | + | #:query (uri-query iri) | |
177 | + | #:fragment (uri-fragment iri)) | |
178 | + | ;; otherwise | |
179 | + | (let ((path (string-join iri-path "/"))) | |
180 | + | (build-uri-reference | |
181 | + | #:path (if (equal? base-component "") | |
182 | + | path | |
183 | + | (if (equal? path "") | |
184 | + | "." | |
185 | + | path)) | |
186 | + | #:query (uri-query iri) | |
187 | + | #:fragment (uri-fragment iri)))) | |
188 | + | ;; we didn't reach the end of the base iri yet, so | |
189 | + | ;; continue with the main logic | |
190 | + | (if (and (not (null? iri-path)) | |
191 | + | (equal? (car iri-path) base-component)) | |
192 | + | ;; we have the same directory, so we can skip a '..' | |
193 | + | (loop base-path (cdr iri-path) (cdr parents)) | |
194 | + | ;; we have a different directory, file, or we stopped | |
195 | + | ;; there, so return | |
196 | + | (let ((path (string-join (append parents iri-path) "/"))) | |
197 | + | (build-uri-reference | |
198 | + | #:path path | |
199 | + | #:query (uri-query iri) | |
200 | + | #:fragment (uri-fragment iri))))))))) | |
201 | + | ;; but if the host is different, we need to preserve everything | |
202 | + | ;; else, we have a url such as '//example.org' | |
203 | + | (build-uri-reference | |
204 | + | #:host (uri-host iri) | |
205 | + | #:path (uri-path iri) | |
206 | + | #:query (uri-query iri) | |
207 | + | #:fragment (uri-fragment iri))) | |
208 | + | ;; if we don't have the same scheme, we can't use a relative reference, | |
209 | + | ;; return the original iri | |
210 | + | iri)))) |