iri.scm
1 | ;;;; Copyright (C) 2019, 2020 Julien Lepiller <julien@lepiller.eu> |
2 | ;;;; |
3 | ;;;; This library is free software; you can redistribute it and/or |
4 | ;;;; modify it under the terms of the GNU Lesser General Public |
5 | ;;;; License as published by the Free Software Foundation; either |
6 | ;;;; version 3 of the License, or (at your option) any later version. |
7 | ;;;; |
8 | ;;;; This library is distributed in the hope that it will be useful, |
9 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | ;;;; Lesser General Public License for more details. |
12 | ;;;; |
13 | ;;;; You should have received a copy of the GNU Lesser General Public |
14 | ;;;; License along with this library; if not, write to the Free Software |
15 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
16 | ;;;; |
17 | |
18 | (define-module (jsonld iri) |
19 | #:use-module (ice-9 match) |
20 | #:use-module (web uri) |
21 | #:export (resolve-iri |
22 | make-relative-iri)) |
23 | |
24 | (define (resolve-iri base reference) |
25 | "Resolve an IRI from a @var{base} IRI and a relative @var{reference}. This |
26 | is an implementation of the URI resolving algorithm described in section 5.2 |
27 | of RFC3986. See @url{https://tools.ietf.org/html/rfc3986}." |
28 | (define (remove-last-component path) |
29 | (string-join |
30 | (reverse (cdr (reverse (string-split path #\/)))) |
31 | "/")) |
32 | |
33 | (define (transform-references base R) |
34 | (if (uri-scheme R) |
35 | (build-uri (uri-scheme R) |
36 | #:host (uri-host R) |
37 | #:path (remove-dot-segments (uri-path R)) |
38 | #:query (uri-query R) |
39 | #:fragment (uri-fragment R)) |
40 | (if (uri-host R) |
41 | (build-uri (uri-scheme base) |
42 | #:host (uri-host R) |
43 | #:path (remove-dot-segments (uri-path R)) |
44 | #:query (uri-query R) |
45 | #:fragment (uri-fragment R)) |
46 | (if (equal? (uri-path R) "") |
47 | (build-uri (uri-scheme base) |
48 | #:host (uri-host base) |
49 | #:path (uri-path base) |
50 | #:query (or (uri-query R) (uri-query base)) |
51 | #:fragment (if (uri-query R) |
52 | (uri-fragment R) |
53 | (or (uri-fragment R) (uri-fragment base)))) |
54 | (build-uri (uri-scheme base) |
55 | #:host (uri-host base) |
56 | #:path (if (equal? (substring (uri-path R) 0 1) "/") |
57 | (remove-dot-segments (uri-path R)) |
58 | (remove-dot-segments |
59 | (merge (uri-path base) (uri-path R)))) |
60 | #:query (uri-query R) |
61 | #:fragment (uri-fragment R)))))) |
62 | |
63 | (define (merge basepath referencepath) |
64 | (string-append |
65 | (string-join |
66 | (append (reverse (cdr (reverse (string-split basepath #\/)))) '("")) |
67 | "/") |
68 | referencepath)) |
69 | |
70 | (define (remove-dot-segments path) |
71 | (let loop ((input path) (output "")) |
72 | (if (equal? input "") |
73 | output |
74 | (cond |
75 | ((and (> (string-length input) 2) (equal? (substring input 0 3) "../")) |
76 | (loop (substring input 3) output)) |
77 | ((and (> (string-length input) 1) (equal? (substring input 0 2) "./")) |
78 | (loop (substring input 2) output)) |
79 | ((and (> (string-length input) 2) (equal? (substring input 0 3) "/./")) |
80 | (loop (substring input 2) output)); keep trailing / |
81 | ((equal? input "/.") |
82 | (loop "/" output)) |
83 | ((and (> (string-length input) 3) (equal? (substring input 0 4) "/../")) |
84 | (loop (substring input 3) (remove-last-component output))); keep trailing / |
85 | ((equal? input "/..") |
86 | (loop "/" (remove-last-component output))) |
87 | ((equal? input ".") |
88 | (loop "" output)) |
89 | ((equal? input "..") |
90 | (loop "" output)) |
91 | (else |
92 | (let* ((segments (string-split input #\/)) |
93 | (segment (car segments)) |
94 | (add-slash? (equal? segment "")) |
95 | (segment (if add-slash? (cadr segments) segment))) |
96 | (loop |
97 | (if add-slash? |
98 | (string-join (cons "" (cdr (cdr segments))) "/") |
99 | (string-join (cdr segments))) |
100 | (if add-slash? |
101 | (string-append output "/" segment) |
102 | (string-append output segment))))))))) |
103 | |
104 | (define (set-iri-path uri) |
105 | (build-uri |
106 | (uri-scheme uri) |
107 | #:host (uri-host uri) |
108 | #:path (if (equal? (uri-path uri) "") "/" (uri-path uri)) |
109 | #:query (uri-query uri) |
110 | #:fragment (uri-fragment uri))) |
111 | |
112 | ;; This algorithm is not always called with sane values, so prevent errors |
113 | ;; in some edge cases. |
114 | (if (and base (string? reference) (not (string->uri reference)) |
115 | (string->uri-reference reference)) |
116 | (uri->string |
117 | (transform-references (set-iri-path (string->uri base)) |
118 | (string->uri-reference reference))) |
119 | reference)) |
120 | |
121 | (define (canonical-iri iri) |
122 | (let* ((iri (string->uri iri)) |
123 | (path (uri-path iri)) |
124 | (start (if (equal? (string-ref path 0) #\/) "/" "")) |
125 | (path (reverse (string-split path #\/)))) |
126 | (let loop ((path path) (result '()) (skip 0)) |
127 | (match path |
128 | ('() (build-uri |
129 | (uri-scheme iri) |
130 | #:host (uri-host iri) |
131 | #:path (string-append start (string-join result "/")) |
132 | #:query (uri-query iri) |
133 | #:fragment (uri-fragment iri))) |
134 | ((component path ...) |
135 | (cond |
136 | ((equal? component "") |
137 | (if (null? result) |
138 | (loop path (cons component result) skip) |
139 | (loop path result skip))) |
140 | ((equal? component ".") |
141 | (loop path result skip)) |
142 | ((equal? component "..") |
143 | (loop path result (+ skip 1))) |
144 | (else |
145 | (if (> skip 0) |
146 | (loop path result (- skip 1)) |
147 | (loop path (cons component result) skip))))))))) |
148 | |
149 | (define (make-relative-iri iri base) |
150 | (let ((iri (canonical-iri iri)) |
151 | (base (canonical-iri base))) |
152 | (uri->string |
153 | (if (equal? (uri-scheme iri) (uri-scheme base)) |
154 | ;; if we have the same scheme, we can skip it |
155 | (if (equal? (uri-host iri) (uri-host base)) |
156 | ;; if the host is the same, we try to compress the paths. There |
157 | ;; are two solutions: keep the path from the iri, or use '..' |
158 | ;; to go back whenever necessary. We select the shortes one. |
159 | (let* ((base-path (string-split (uri-path base) #\/)) |
160 | (iri-path (string-split (uri-path iri) #\/)) |
161 | ;; the base path starts with /, so an empty component. |
162 | ;; we also ignore the last component. |
163 | (parents (cdr (cdr (map (const "..") base-path))))) |
164 | (let loop ((base-path (cdr base-path)) |
165 | (iri-path (cdr iri-path)) |
166 | (parents parents)) |
167 | (match base-path |
168 | ((base-component base-path ...) |
169 | (if (null? base-path) |
170 | ;; we reached the end, we have the same directory structure |
171 | (if (and (= (length iri-path) 1) |
172 | (equal? (car iri-path) base-component) |
173 | (or (uri-query iri) |
174 | (uri-fragment iri))) |
175 | ;; if it's the same path |
176 | (build-uri-reference |
177 | #:query (uri-query iri) |
178 | #:fragment (uri-fragment iri)) |
179 | ;; otherwise |
180 | (let ((path (string-join iri-path "/"))) |
181 | (build-uri-reference |
182 | #:path (if (equal? base-component "") |
183 | path |
184 | (if (equal? path "") |
185 | "." |
186 | path)) |
187 | #:query (uri-query iri) |
188 | #:fragment (uri-fragment iri)))) |
189 | ;; we didn't reach the end of the base iri yet, so |
190 | ;; continue with the main logic |
191 | (if (and (not (null? iri-path)) |
192 | (equal? (car iri-path) base-component)) |
193 | ;; we have the same directory, so we can skip a '..' |
194 | (loop base-path (cdr iri-path) (cdr parents)) |
195 | ;; we have a different directory, file, or we stopped |
196 | ;; there, so return |
197 | (let ((path (string-join (append parents iri-path) "/"))) |
198 | (build-uri-reference |
199 | #:path path |
200 | #:query (uri-query iri) |
201 | #:fragment (uri-fragment iri))))))))) |
202 | ;; but if the host is different, we need to preserve everything |
203 | ;; else, we have a url such as '//example.org' |
204 | (build-uri-reference |
205 | #:host (uri-host iri) |
206 | #:path (uri-path iri) |
207 | #:query (uri-query iri) |
208 | #:fragment (uri-fragment iri))) |
209 | ;; if we don't have the same scheme, we can't use a relative reference, |
210 | ;; return the original iri |
211 | iri)))) |
212 |