iri.scm
| 1 | ;;;; Copyright (C) 2019, 2020 Julien Lepiller <julien@lepiller.eu> |
| 2 | ;;;; |
| 3 | ;;;; This library is free software; you can redistribute it and/or |
| 4 | ;;;; modify it under the terms of the GNU Lesser General Public |
| 5 | ;;;; License as published by the Free Software Foundation; either |
| 6 | ;;;; version 3 of the License, or (at your option) any later version. |
| 7 | ;;;; |
| 8 | ;;;; This library is distributed in the hope that it will be useful, |
| 9 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 11 | ;;;; Lesser General Public License for more details. |
| 12 | ;;;; |
| 13 | ;;;; You should have received a copy of the GNU Lesser General Public |
| 14 | ;;;; License along with this library; if not, write to the Free Software |
| 15 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 16 | ;;;; |
| 17 | |
| 18 | (define-module (iri iri) |
| 19 | #:use-module (ice-9 match) |
| 20 | #:use-module (web uri) |
| 21 | #:export (resolve-iri |
| 22 | make-relative-iri)) |
| 23 | |
| 24 | (define (resolve-iri base reference) |
| 25 | "Resolve an IRI from a @var{base} IRI and a relative @var{reference}. This |
| 26 | is an implementation of the URI resolving algorithm described in section 5.2 |
| 27 | of RFC3986. See @url{https://tools.ietf.org/html/rfc3986}." |
| 28 | (define (remove-last-component path) |
| 29 | (string-join |
| 30 | (reverse (cdr (reverse (string-split path #\/)))) |
| 31 | "/")) |
| 32 | |
| 33 | (define (transform-references base R) |
| 34 | (if (uri-scheme R) |
| 35 | (build-uri (uri-scheme R) |
| 36 | #:host (uri-host R) |
| 37 | #:path (remove-dot-segments (uri-path R)) |
| 38 | #:query (uri-query R) |
| 39 | #:fragment (uri-fragment R)) |
| 40 | (if (uri-host R) |
| 41 | (build-uri (uri-scheme base) |
| 42 | #:host (uri-host R) |
| 43 | #:path (remove-dot-segments (uri-path R)) |
| 44 | #:query (uri-query R) |
| 45 | #:fragment (uri-fragment R)) |
| 46 | (if (equal? (uri-path R) "") |
| 47 | (build-uri (uri-scheme base) |
| 48 | #:host (uri-host base) |
| 49 | #:path (uri-path base) |
| 50 | #:query (or (uri-query R) (uri-query base)) |
| 51 | #:fragment (if (uri-query R) |
| 52 | (uri-fragment R) |
| 53 | (or (uri-fragment R) (uri-fragment base)))) |
| 54 | (build-uri (uri-scheme base) |
| 55 | #:host (uri-host base) |
| 56 | #:path (if (equal? (substring (uri-path R) 0 1) "/") |
| 57 | (remove-dot-segments (uri-path R)) |
| 58 | (remove-dot-segments |
| 59 | (merge (uri-path base) (uri-path R)))) |
| 60 | #:query (uri-query R) |
| 61 | #:fragment (uri-fragment R)))))) |
| 62 | |
| 63 | (define (merge basepath referencepath) |
| 64 | (string-append |
| 65 | (string-join |
| 66 | (append (reverse (cdr (reverse (string-split basepath #\/)))) '("")) |
| 67 | "/") |
| 68 | referencepath)) |
| 69 | |
| 70 | (define (remove-dot-segments path) |
| 71 | (let loop ((input path) (output "")) |
| 72 | (if (equal? input "") |
| 73 | output |
| 74 | (cond |
| 75 | ((and (> (string-length input) 2) (equal? (substring input 0 3) "../")) |
| 76 | (loop (substring input 3) output)) |
| 77 | ((and (> (string-length input) 1) (equal? (substring input 0 2) "./")) |
| 78 | (loop (substring input 2) output)) |
| 79 | ((and (> (string-length input) 2) (equal? (substring input 0 3) "/./")) |
| 80 | (loop (substring input 2) output)); keep trailing / |
| 81 | ((equal? input "/.") |
| 82 | (loop "/" output)) |
| 83 | ((and (> (string-length input) 3) (equal? (substring input 0 4) "/../")) |
| 84 | (loop (substring input 3) (remove-last-component output))); keep trailing / |
| 85 | ((equal? input "/..") |
| 86 | (loop "/" (remove-last-component output))) |
| 87 | ((equal? input ".") |
| 88 | (loop "" output)) |
| 89 | ((equal? input "..") |
| 90 | (loop "" output)) |
| 91 | (else |
| 92 | (let* ((segments (string-split input #\/)) |
| 93 | (segment (car segments)) |
| 94 | (add-slash? (equal? segment "")) |
| 95 | (segment (if add-slash? (cadr segments) segment))) |
| 96 | (loop |
| 97 | (if add-slash? |
| 98 | (string-join (cons "" (cdr (cdr segments))) "/") |
| 99 | (string-join (cdr segments))) |
| 100 | (if add-slash? |
| 101 | (string-append output "/" segment) |
| 102 | (string-append output segment))))))))) |
| 103 | |
| 104 | (define (set-iri-path uri) |
| 105 | (build-uri |
| 106 | (uri-scheme uri) |
| 107 | #:host (uri-host uri) |
| 108 | #:path (if (equal? (uri-path uri) "") "/" (uri-path uri)) |
| 109 | #:query (uri-query uri) |
| 110 | #:fragment (uri-fragment uri))) |
| 111 | |
| 112 | ;; This algorithm is not always called with sane values, so prevent errors |
| 113 | ;; in some edge cases. |
| 114 | (if (and base (string? reference)) |
| 115 | (uri->string |
| 116 | (transform-references (set-iri-path (string->uri base)) |
| 117 | (string->uri-reference reference))) |
| 118 | reference)) |
| 119 | |
| 120 | (define (canonical-iri iri) |
| 121 | (let* ((iri (string->uri iri)) |
| 122 | (path (uri-path iri)) |
| 123 | (start (if (equal? (string-ref path 0) #\/) "/" "")) |
| 124 | (path (reverse (string-split path #\/)))) |
| 125 | (let loop ((path path) (result '()) (skip 0)) |
| 126 | (match path |
| 127 | ('() (build-uri |
| 128 | (uri-scheme iri) |
| 129 | #:host (uri-host iri) |
| 130 | #:path (string-append start (string-join result "/")) |
| 131 | #:query (uri-query iri) |
| 132 | #:fragment (uri-fragment iri))) |
| 133 | ((component path ...) |
| 134 | (cond |
| 135 | ((equal? component "") |
| 136 | (if (null? result) |
| 137 | (loop path (cons component result) skip) |
| 138 | (loop path result skip))) |
| 139 | ((equal? component ".") |
| 140 | (loop path result skip)) |
| 141 | ((equal? component "..") |
| 142 | (loop path result (+ skip 1))) |
| 143 | (else |
| 144 | (if (> skip 0) |
| 145 | (loop path result (- skip 1)) |
| 146 | (loop path (cons component result) skip))))))))) |
| 147 | |
| 148 | (define (make-relative-iri iri base) |
| 149 | (let ((iri (canonical-iri iri)) |
| 150 | (base (canonical-iri base))) |
| 151 | (uri->string |
| 152 | (if (equal? (uri-scheme iri) (uri-scheme base)) |
| 153 | ;; if we have the same scheme, we can skip it |
| 154 | (if (equal? (uri-host iri) (uri-host base)) |
| 155 | ;; if the host is the same, we try to compress the paths. There |
| 156 | ;; are two solutions: keep the path from the iri, or use '..' |
| 157 | ;; to go back whenever necessary. We select the shortes one. |
| 158 | (let* ((base-path (string-split (uri-path base) #\/)) |
| 159 | (iri-path (string-split (uri-path iri) #\/)) |
| 160 | ;; the base path starts with /, so an empty component. |
| 161 | ;; we also ignore the last component. |
| 162 | (parents (cdr (cdr (map (const "..") base-path))))) |
| 163 | (let loop ((base-path (cdr base-path)) |
| 164 | (iri-path (cdr iri-path)) |
| 165 | (parents parents)) |
| 166 | (match base-path |
| 167 | ((base-component base-path ...) |
| 168 | (if (null? base-path) |
| 169 | ;; we reached the end, we have the same directory structure |
| 170 | (if (and (= (length iri-path) 1) |
| 171 | (equal? (car iri-path) base-component) |
| 172 | (or (uri-query iri) |
| 173 | (uri-fragment iri))) |
| 174 | ;; if it's the same path |
| 175 | (build-uri-reference |
| 176 | #:query (uri-query iri) |
| 177 | #:fragment (uri-fragment iri)) |
| 178 | ;; otherwise |
| 179 | (let ((path (string-join iri-path "/"))) |
| 180 | (build-uri-reference |
| 181 | #:path (if (equal? base-component "") |
| 182 | path |
| 183 | (if (equal? path "") |
| 184 | "." |
| 185 | path)) |
| 186 | #:query (uri-query iri) |
| 187 | #:fragment (uri-fragment iri)))) |
| 188 | ;; we didn't reach the end of the base iri yet, so |
| 189 | ;; continue with the main logic |
| 190 | (if (and (not (null? iri-path)) |
| 191 | (equal? (car iri-path) base-component)) |
| 192 | ;; we have the same directory, so we can skip a '..' |
| 193 | (loop base-path (cdr iri-path) (cdr parents)) |
| 194 | ;; we have a different directory, file, or we stopped |
| 195 | ;; there, so return |
| 196 | (let ((path (string-join (append parents iri-path) "/"))) |
| 197 | (build-uri-reference |
| 198 | #:path path |
| 199 | #:query (uri-query iri) |
| 200 | #:fragment (uri-fragment iri))))))))) |
| 201 | ;; but if the host is different, we need to preserve everything |
| 202 | ;; else, we have a url such as '//example.org' |
| 203 | (build-uri-reference |
| 204 | #:host (uri-host iri) |
| 205 | #:path (uri-path iri) |
| 206 | #:query (uri-query iri) |
| 207 | #:fragment (uri-fragment iri))) |
| 208 | ;; if we don't have the same scheme, we can't use a relative reference, |
| 209 | ;; return the original iri |
| 210 | iri)))) |
| 211 |