;;;; Copyright (C) 2019, 2020 Julien Lepiller ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public ;;;; License as published by the Free Software Foundation; either ;;;; version 3 of the License, or (at your option) any later version. ;;;; ;;;; This library is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;;; Lesser General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU Lesser General Public ;;;; License along with this library; if not, write to the Free Software ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;;;; (define-module (jsonld iri) #:use-module (ice-9 match) #:use-module (web uri) #:export (resolve-iri make-relative-iri)) (define (resolve-iri base reference) "Resolve an IRI from a @var{base} IRI and a relative @var{reference}. This is an implementation of the URI resolving algorithm described in section 5.2 of RFC3986. See @url{https://tools.ietf.org/html/rfc3986}." (define (remove-last-component path) (string-join (reverse (cdr (reverse (string-split path #\/)))) "/")) (define (transform-references base R) (if (uri-scheme R) (build-uri (uri-scheme R) #:host (uri-host R) #:path (remove-dot-segments (uri-path R)) #:query (uri-query R) #:fragment (uri-fragment R)) (if (uri-host R) (build-uri (uri-scheme base) #:host (uri-host R) #:path (remove-dot-segments (uri-path R)) #:query (uri-query R) #:fragment (uri-fragment R)) (if (equal? (uri-path R) "") (build-uri (uri-scheme base) #:host (uri-host base) #:path (uri-path base) #:query (or (uri-query R) (uri-query base)) #:fragment (if (uri-query R) (uri-fragment R) (or (uri-fragment R) (uri-fragment base)))) (build-uri (uri-scheme base) #:host (uri-host base) #:path (if (equal? (substring (uri-path R) 0 1) "/") (remove-dot-segments (uri-path R)) (remove-dot-segments (merge (uri-path base) (uri-path R)))) #:query (uri-query R) #:fragment (uri-fragment R)))))) (define (merge basepath referencepath) (string-append (string-join (append (reverse (cdr (reverse (string-split basepath #\/)))) '("")) "/") referencepath)) (define (remove-dot-segments path) (let loop ((input path) (output "")) (if (equal? input "") output (cond ((and (> (string-length input) 2) (equal? (substring input 0 3) "../")) (loop (substring input 3) output)) ((and (> (string-length input) 1) (equal? (substring input 0 2) "./")) (loop (substring input 2) output)) ((and (> (string-length input) 2) (equal? (substring input 0 3) "/./")) (loop (substring input 2) output)); keep trailing / ((equal? input "/.") (loop "/" output)) ((and (> (string-length input) 3) (equal? (substring input 0 4) "/../")) (loop (substring input 3) (remove-last-component output))); keep trailing / ((equal? input "/..") (loop "/" (remove-last-component output))) ((equal? input ".") (loop "" output)) ((equal? input "..") (loop "" output)) (else (let* ((segments (string-split input #\/)) (segment (car segments)) (add-slash? (equal? segment "")) (segment (if add-slash? (cadr segments) segment))) (loop (if add-slash? (string-join (cons "" (cdr (cdr segments))) "/") (string-join (cdr segments))) (if add-slash? (string-append output "/" segment) (string-append output segment))))))))) (define (set-iri-path uri) (build-uri (uri-scheme uri) #:host (uri-host uri) #:path (if (equal? (uri-path uri) "") "/" (uri-path uri)) #:query (uri-query uri) #:fragment (uri-fragment uri))) ;; This algorithm is not always called with sane values, so prevent errors ;; in some edge cases. (if (and base (string? reference) (not (string->uri reference)) (string->uri-reference reference)) (uri->string (transform-references (set-iri-path (string->uri base)) (string->uri-reference reference))) reference)) (define (canonical-iri iri) (let* ((iri (string->uri iri)) (path (uri-path iri)) (start (if (equal? (string-ref path 0) #\/) "/" "")) (path (reverse (string-split path #\/)))) (let loop ((path path) (result '()) (skip 0)) (match path ('() (build-uri (uri-scheme iri) #:host (uri-host iri) #:path (string-append start (string-join result "/")) #:query (uri-query iri) #:fragment (uri-fragment iri))) ((component path ...) (cond ((equal? component "") (if (null? result) (loop path (cons component result) skip) (loop path result skip))) ((equal? component ".") (loop path result skip)) ((equal? component "..") (loop path result (+ skip 1))) (else (if (> skip 0) (loop path result (- skip 1)) (loop path (cons component result) skip))))))))) (define (make-relative-iri iri base) (let ((iri (canonical-iri iri)) (base (canonical-iri base))) (uri->string (if (equal? (uri-scheme iri) (uri-scheme base)) ;; if we have the same scheme, we can skip it (if (equal? (uri-host iri) (uri-host base)) ;; if the host is the same, we try to compress the paths. There ;; are two solutions: keep the path from the iri, or use '..' ;; to go back whenever necessary. We select the shortes one. (let* ((base-path (string-split (uri-path base) #\/)) (iri-path (string-split (uri-path iri) #\/)) ;; the base path starts with /, so an empty component. ;; we also ignore the last component. (parents (cdr (cdr (map (const "..") base-path))))) (let loop ((base-path (cdr base-path)) (iri-path (cdr iri-path)) (parents parents)) (match base-path ((base-component base-path ...) (if (null? base-path) ;; we reached the end, we have the same directory structure (if (and (= (length iri-path) 1) (equal? (car iri-path) base-component) (or (uri-query iri) (uri-fragment iri))) ;; if it's the same path (build-uri-reference #:query (uri-query iri) #:fragment (uri-fragment iri)) ;; otherwise (let ((path (string-join iri-path "/"))) (build-uri-reference #:path (if (equal? base-component "") path (if (equal? path "") "." path)) #:query (uri-query iri) #:fragment (uri-fragment iri)))) ;; we didn't reach the end of the base iri yet, so ;; continue with the main logic (if (and (not (null? iri-path)) (equal? (car iri-path) base-component)) ;; we have the same directory, so we can skip a '..' (loop base-path (cdr iri-path) (cdr parents)) ;; we have a different directory, file, or we stopped ;; there, so return (let ((path (string-join (append parents iri-path) "/"))) (build-uri-reference #:path path #:query (uri-query iri) #:fragment (uri-fragment iri))))))))) ;; but if the host is different, we need to preserve everything ;; else, we have a url such as '//example.org' (build-uri-reference #:host (uri-host iri) #:path (uri-path iri) #:query (uri-query iri) #:fragment (uri-fragment iri))) ;; if we don't have the same scheme, we can't use a relative reference, ;; return the original iri iri))))