Add iri.scm from jsonld
Makefile.am
| 2 | 2 | ||
| 3 | 3 | SOURCES= \ | |
| 4 | 4 | rdf/rdf.scm \ | |
| 5 | - | rdf/xsd.scm | |
| 5 | + | rdf/xsd.scm \ | |
| 6 | + | iri/iri.scm \ | |
| 6 | 7 | ||
| 7 | 8 | TEST_EXTENSIONS = .scm | |
| 8 | 9 | SCM_LOG_DRIVER = env AM_TAP_AWK='$(AWK)' $(SHELL) $(top_srcdir)/tap-driver.sh --color-tests yes --ignore-exit |
iri/iri.scm unknown status 1
| 1 | + | ;;;; Copyright (C) 2019, 2020 Julien Lepiller <julien@lepiller.eu> | |
| 2 | + | ;;;; | |
| 3 | + | ;;;; This library is free software; you can redistribute it and/or | |
| 4 | + | ;;;; modify it under the terms of the GNU Lesser General Public | |
| 5 | + | ;;;; License as published by the Free Software Foundation; either | |
| 6 | + | ;;;; version 3 of the License, or (at your option) any later version. | |
| 7 | + | ;;;; | |
| 8 | + | ;;;; This library is distributed in the hope that it will be useful, | |
| 9 | + | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 10 | + | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 11 | + | ;;;; Lesser General Public License for more details. | |
| 12 | + | ;;;; | |
| 13 | + | ;;;; You should have received a copy of the GNU Lesser General Public | |
| 14 | + | ;;;; License along with this library; if not, write to the Free Software | |
| 15 | + | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
| 16 | + | ;;;; | |
| 17 | + | ||
| 18 | + | (define-module (iri iri) | |
| 19 | + | #:use-module (ice-9 match) | |
| 20 | + | #:use-module (web uri) | |
| 21 | + | #:export (resolve-iri | |
| 22 | + | make-relative-iri)) | |
| 23 | + | ||
| 24 | + | (define (resolve-iri base reference) | |
| 25 | + | "Resolve an IRI from a @var{base} IRI and a relative @var{reference}. This | |
| 26 | + | is an implementation of the URI resolving algorithm described in section 5.2 | |
| 27 | + | of RFC3986. See @url{https://tools.ietf.org/html/rfc3986}." | |
| 28 | + | (define (remove-last-component path) | |
| 29 | + | (string-join | |
| 30 | + | (reverse (cdr (reverse (string-split path #\/)))) | |
| 31 | + | "/")) | |
| 32 | + | ||
| 33 | + | (define (transform-references base R) | |
| 34 | + | (if (uri-scheme R) | |
| 35 | + | (build-uri (uri-scheme R) | |
| 36 | + | #:host (uri-host R) | |
| 37 | + | #:path (remove-dot-segments (uri-path R)) | |
| 38 | + | #:query (uri-query R) | |
| 39 | + | #:fragment (uri-fragment R)) | |
| 40 | + | (if (uri-host R) | |
| 41 | + | (build-uri (uri-scheme base) | |
| 42 | + | #:host (uri-host R) | |
| 43 | + | #:path (remove-dot-segments (uri-path R)) | |
| 44 | + | #:query (uri-query R) | |
| 45 | + | #:fragment (uri-fragment R)) | |
| 46 | + | (if (equal? (uri-path R) "") | |
| 47 | + | (build-uri (uri-scheme base) | |
| 48 | + | #:host (uri-host base) | |
| 49 | + | #:path (uri-path base) | |
| 50 | + | #:query (or (uri-query R) (uri-query base)) | |
| 51 | + | #:fragment (if (uri-query R) | |
| 52 | + | (uri-fragment R) | |
| 53 | + | (or (uri-fragment R) (uri-fragment base)))) | |
| 54 | + | (build-uri (uri-scheme base) | |
| 55 | + | #:host (uri-host base) | |
| 56 | + | #:path (if (equal? (substring (uri-path R) 0 1) "/") | |
| 57 | + | (remove-dot-segments (uri-path R)) | |
| 58 | + | (remove-dot-segments | |
| 59 | + | (merge (uri-path base) (uri-path R)))) | |
| 60 | + | #:query (uri-query R) | |
| 61 | + | #:fragment (uri-fragment R)))))) | |
| 62 | + | ||
| 63 | + | (define (merge basepath referencepath) | |
| 64 | + | (string-append | |
| 65 | + | (string-join | |
| 66 | + | (append (reverse (cdr (reverse (string-split basepath #\/)))) '("")) | |
| 67 | + | "/") | |
| 68 | + | referencepath)) | |
| 69 | + | ||
| 70 | + | (define (remove-dot-segments path) | |
| 71 | + | (let loop ((input path) (output "")) | |
| 72 | + | (if (equal? input "") | |
| 73 | + | output | |
| 74 | + | (cond | |
| 75 | + | ((and (> (string-length input) 2) (equal? (substring input 0 3) "../")) | |
| 76 | + | (loop (substring input 3) output)) | |
| 77 | + | ((and (> (string-length input) 1) (equal? (substring input 0 2) "./")) | |
| 78 | + | (loop (substring input 2) output)) | |
| 79 | + | ((and (> (string-length input) 2) (equal? (substring input 0 3) "/./")) | |
| 80 | + | (loop (substring input 2) output)); keep trailing / | |
| 81 | + | ((equal? input "/.") | |
| 82 | + | (loop "/" output)) | |
| 83 | + | ((and (> (string-length input) 3) (equal? (substring input 0 4) "/../")) | |
| 84 | + | (loop (substring input 3) (remove-last-component output))); keep trailing / | |
| 85 | + | ((equal? input "/..") | |
| 86 | + | (loop "/" (remove-last-component output))) | |
| 87 | + | ((equal? input ".") | |
| 88 | + | (loop "" output)) | |
| 89 | + | ((equal? input "..") | |
| 90 | + | (loop "" output)) | |
| 91 | + | (else | |
| 92 | + | (let* ((segments (string-split input #\/)) | |
| 93 | + | (segment (car segments)) | |
| 94 | + | (add-slash? (equal? segment "")) | |
| 95 | + | (segment (if add-slash? (cadr segments) segment))) | |
| 96 | + | (loop | |
| 97 | + | (if add-slash? | |
| 98 | + | (string-join (cons "" (cdr (cdr segments))) "/") | |
| 99 | + | (string-join (cdr segments))) | |
| 100 | + | (if add-slash? | |
| 101 | + | (string-append output "/" segment) | |
| 102 | + | (string-append output segment))))))))) | |
| 103 | + | ||
| 104 | + | (define (set-iri-path uri) | |
| 105 | + | (build-uri | |
| 106 | + | (uri-scheme uri) | |
| 107 | + | #:host (uri-host uri) | |
| 108 | + | #:path (if (equal? (uri-path uri) "") "/" (uri-path uri)) | |
| 109 | + | #:query (uri-query uri) | |
| 110 | + | #:fragment (uri-fragment uri))) | |
| 111 | + | ||
| 112 | + | ;; This algorithm is not always called with sane values, so prevent errors | |
| 113 | + | ;; in some edge cases. | |
| 114 | + | (if (and base (string? reference)) | |
| 115 | + | (uri->string | |
| 116 | + | (transform-references (set-iri-path (string->uri base)) | |
| 117 | + | (string->uri-reference reference))) | |
| 118 | + | reference)) | |
| 119 | + | ||
| 120 | + | (define (canonical-iri iri) | |
| 121 | + | (let* ((iri (string->uri iri)) | |
| 122 | + | (path (uri-path iri)) | |
| 123 | + | (start (if (equal? (string-ref path 0) #\/) "/" "")) | |
| 124 | + | (path (reverse (string-split path #\/)))) | |
| 125 | + | (let loop ((path path) (result '()) (skip 0)) | |
| 126 | + | (match path | |
| 127 | + | ('() (build-uri | |
| 128 | + | (uri-scheme iri) | |
| 129 | + | #:host (uri-host iri) | |
| 130 | + | #:path (string-append start (string-join result "/")) | |
| 131 | + | #:query (uri-query iri) | |
| 132 | + | #:fragment (uri-fragment iri))) | |
| 133 | + | ((component path ...) | |
| 134 | + | (cond | |
| 135 | + | ((equal? component "") | |
| 136 | + | (if (null? result) | |
| 137 | + | (loop path (cons component result) skip) | |
| 138 | + | (loop path result skip))) | |
| 139 | + | ((equal? component ".") | |
| 140 | + | (loop path result skip)) | |
| 141 | + | ((equal? component "..") | |
| 142 | + | (loop path result (+ skip 1))) | |
| 143 | + | (else | |
| 144 | + | (if (> skip 0) | |
| 145 | + | (loop path result (- skip 1)) | |
| 146 | + | (loop path (cons component result) skip))))))))) | |
| 147 | + | ||
| 148 | + | (define (make-relative-iri iri base) | |
| 149 | + | (let ((iri (canonical-iri iri)) | |
| 150 | + | (base (canonical-iri base))) | |
| 151 | + | (uri->string | |
| 152 | + | (if (equal? (uri-scheme iri) (uri-scheme base)) | |
| 153 | + | ;; if we have the same scheme, we can skip it | |
| 154 | + | (if (equal? (uri-host iri) (uri-host base)) | |
| 155 | + | ;; if the host is the same, we try to compress the paths. There | |
| 156 | + | ;; are two solutions: keep the path from the iri, or use '..' | |
| 157 | + | ;; to go back whenever necessary. We select the shortes one. | |
| 158 | + | (let* ((base-path (string-split (uri-path base) #\/)) | |
| 159 | + | (iri-path (string-split (uri-path iri) #\/)) | |
| 160 | + | ;; the base path starts with /, so an empty component. | |
| 161 | + | ;; we also ignore the last component. | |
| 162 | + | (parents (cdr (cdr (map (const "..") base-path))))) | |
| 163 | + | (let loop ((base-path (cdr base-path)) | |
| 164 | + | (iri-path (cdr iri-path)) | |
| 165 | + | (parents parents)) | |
| 166 | + | (match base-path | |
| 167 | + | ((base-component base-path ...) | |
| 168 | + | (if (null? base-path) | |
| 169 | + | ;; we reached the end, we have the same directory structure | |
| 170 | + | (if (and (= (length iri-path) 1) | |
| 171 | + | (equal? (car iri-path) base-component) | |
| 172 | + | (or (uri-query iri) | |
| 173 | + | (uri-fragment iri))) | |
| 174 | + | ;; if it's the same path | |
| 175 | + | (build-uri-reference | |
| 176 | + | #:query (uri-query iri) | |
| 177 | + | #:fragment (uri-fragment iri)) | |
| 178 | + | ;; otherwise | |
| 179 | + | (let ((path (string-join iri-path "/"))) | |
| 180 | + | (build-uri-reference | |
| 181 | + | #:path (if (equal? base-component "") | |
| 182 | + | path | |
| 183 | + | (if (equal? path "") | |
| 184 | + | "." | |
| 185 | + | path)) | |
| 186 | + | #:query (uri-query iri) | |
| 187 | + | #:fragment (uri-fragment iri)))) | |
| 188 | + | ;; we didn't reach the end of the base iri yet, so | |
| 189 | + | ;; continue with the main logic | |
| 190 | + | (if (and (not (null? iri-path)) | |
| 191 | + | (equal? (car iri-path) base-component)) | |
| 192 | + | ;; we have the same directory, so we can skip a '..' | |
| 193 | + | (loop base-path (cdr iri-path) (cdr parents)) | |
| 194 | + | ;; we have a different directory, file, or we stopped | |
| 195 | + | ;; there, so return | |
| 196 | + | (let ((path (string-join (append parents iri-path) "/"))) | |
| 197 | + | (build-uri-reference | |
| 198 | + | #:path path | |
| 199 | + | #:query (uri-query iri) | |
| 200 | + | #:fragment (uri-fragment iri))))))))) | |
| 201 | + | ;; but if the host is different, we need to preserve everything | |
| 202 | + | ;; else, we have a url such as '//example.org' | |
| 203 | + | (build-uri-reference | |
| 204 | + | #:host (uri-host iri) | |
| 205 | + | #:path (uri-path iri) | |
| 206 | + | #:query (uri-query iri) | |
| 207 | + | #:fragment (uri-fragment iri))) | |
| 208 | + | ;; if we don't have the same scheme, we can't use a relative reference, | |
| 209 | + | ;; return the original iri | |
| 210 | + | iri)))) |