guile-jsonld/jsonld/iri.scm

iri.scm

1
;;;; Copyright (C) 2019, 2020 Julien Lepiller <julien@lepiller.eu>
2
;;;; 
3
;;;; This library is free software; you can redistribute it and/or
4
;;;; modify it under the terms of the GNU Lesser General Public
5
;;;; License as published by the Free Software Foundation; either
6
;;;; version 3 of the License, or (at your option) any later version.
7
;;;; 
8
;;;; This library is distributed in the hope that it will be useful,
9
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
;;;; Lesser General Public License for more details.
12
;;;; 
13
;;;; You should have received a copy of the GNU Lesser General Public
14
;;;; License along with this library; if not, write to the Free Software
15
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
;;;; 
17
18
(define-module (jsonld iri)
19
  #:use-module (ice-9 match)
20
  #:use-module (web uri)
21
  #:export (resolve-iri
22
            make-relative-iri))
23
24
(define (resolve-iri base reference)
25
  "Resolve an IRI from a @var{base} IRI and a relative @var{reference}.  This
26
is an implementation of the URI resolving algorithm described in section 5.2
27
of RFC3986.  See @url{https://tools.ietf.org/html/rfc3986}."
28
  (define (remove-last-component path)
29
    (string-join
30
      (reverse (cdr (reverse (string-split path #\/))))
31
      "/"))
32
33
  (define (transform-references base R)
34
    (if (uri-scheme R)
35
        (build-uri (uri-scheme R)
36
                   #:host (uri-host R)
37
                   #:path (remove-dot-segments (uri-path R))
38
                   #:query (uri-query R)
39
                   #:fragment (uri-fragment R))
40
        (if (uri-host R)
41
            (build-uri (uri-scheme base)
42
                       #:host (uri-host R)
43
                       #:path (remove-dot-segments (uri-path R))
44
                       #:query (uri-query R)
45
                       #:fragment (uri-fragment R))
46
            (if (equal? (uri-path R) "")
47
                (build-uri (uri-scheme base)
48
                           #:host (uri-host base)
49
                           #:path (uri-path base)
50
                           #:query (or (uri-query R) (uri-query base))
51
                           #:fragment (if (uri-query R)
52
                                          (uri-fragment R)
53
                                          (or (uri-fragment R) (uri-fragment base))))
54
                (build-uri (uri-scheme base)
55
                           #:host (uri-host base)
56
                           #:path (if (equal? (substring (uri-path R) 0 1) "/")
57
                                      (remove-dot-segments (uri-path R))
58
                                      (remove-dot-segments
59
                                        (merge (uri-path base) (uri-path R))))
60
                           #:query (uri-query R)
61
                           #:fragment (uri-fragment R))))))
62
63
  (define (merge basepath referencepath)
64
    (string-append
65
      (string-join
66
        (append (reverse (cdr (reverse (string-split basepath #\/)))) '(""))
67
        "/")
68
      referencepath))
69
70
  (define (remove-dot-segments path)
71
    (let loop ((input path) (output ""))
72
      (if (equal? input "")
73
          output
74
          (cond
75
            ((and (> (string-length input) 2) (equal? (substring input 0 3) "../"))
76
             (loop (substring input 3) output))
77
            ((and (> (string-length input) 1) (equal? (substring input 0 2) "./"))
78
             (loop (substring input 2) output))
79
            ((and (> (string-length input) 2) (equal? (substring input 0 3) "/./"))
80
             (loop (substring input 2) output)); keep trailing /
81
            ((equal? input "/.")
82
             (loop "/" output))
83
            ((and (> (string-length input) 3) (equal? (substring input 0 4) "/../"))
84
             (loop (substring input 3) (remove-last-component output))); keep trailing /
85
            ((equal? input "/..")
86
             (loop "/" (remove-last-component output)))
87
            ((equal? input ".")
88
             (loop "" output))
89
            ((equal? input "..")
90
             (loop "" output))
91
            (else
92
              (let* ((segments (string-split input #\/))
93
                     (segment (car segments))
94
                     (add-slash? (equal? segment ""))
95
                     (segment (if add-slash? (cadr segments) segment)))
96
                (loop
97
                  (if add-slash?
98
                      (string-join (cons "" (cdr (cdr segments))) "/")
99
                      (string-join (cdr segments)))
100
                  (if add-slash?
101
                      (string-append output "/" segment)
102
                      (string-append output segment)))))))))
103
104
  (define (set-iri-path uri)
105
    (build-uri
106
      (uri-scheme uri)
107
      #:host (uri-host uri)
108
      #:path (if (equal? (uri-path uri) "") "/" (uri-path uri))
109
      #:query (uri-query uri)
110
      #:fragment (uri-fragment uri)))
111
112
  ;; This algorithm is not always called with sane values, so prevent errors
113
  ;; in some edge cases.
114
  (if (and base (string? reference))
115
   (uri->string
116
     (transform-references (set-iri-path (string->uri base))
117
                           (string->uri-reference reference)))
118
   reference))
119
120
(define (canonical-iri iri)
121
  (let* ((iri (string->uri iri))
122
         (path (uri-path iri))
123
         (start (if (equal? (string-ref path 0) #\/) "/" ""))
124
         (path (reverse (string-split path #\/))))
125
    (let loop ((path path) (result '()) (skip 0))
126
      (match path
127
        ('() (build-uri
128
               (uri-scheme iri)
129
               #:host (uri-host iri)
130
               #:path (string-append start (string-join result "/"))
131
               #:query (uri-query iri)
132
               #:fragment (uri-fragment iri)))
133
        ((component path ...)
134
         (cond
135
           ((equal? component "")
136
            (if (null? result)
137
                (loop path (cons component result) skip)
138
                (loop path result skip)))
139
           ((equal? component ".")
140
            (loop path result skip))
141
           ((equal? component "..")
142
            (loop path result (+ skip 1)))
143
           (else
144
             (if (> skip 0)
145
                 (loop path result (- skip 1))
146
                 (loop path (cons component result) skip)))))))))
147
148
(define (make-relative-iri iri base)
149
  (let ((iri (canonical-iri iri))
150
        (base (canonical-iri base)))
151
    (uri->string
152
      (if (equal? (uri-scheme iri) (uri-scheme base))
153
          ;; if we have the same scheme, we can skip it
154
          (if (equal? (uri-host iri) (uri-host base))
155
              ;; if the host is the same, we try to compress the paths.  There
156
              ;; are two solutions: keep the path from the iri, or use '..'
157
              ;; to go back whenever necessary.  We select the shortes one.
158
              (let* ((base-path (string-split (uri-path base) #\/))
159
                     (iri-path (string-split (uri-path iri) #\/))
160
                     ;; the base path starts with /, so an empty component.
161
                     ;; we also ignore the last component.
162
                     (parents (cdr (cdr (map (const "..") base-path)))))
163
                (let loop ((base-path (cdr base-path))
164
                           (iri-path (cdr iri-path))
165
                           (parents parents))
166
                  (match base-path
167
                    ((base-component base-path ...)
168
                     (if (null? base-path)
169
                         ;; we reached the end, we have the same directory structure
170
                         (if (and (= (length iri-path) 1)
171
                                  (equal? (car iri-path) base-component)
172
                                  (or (uri-query iri)
173
                                      (uri-fragment iri)))
174
                             ;; if it's the same path
175
                             (build-uri-reference
176
                               #:query (uri-query iri)
177
                               #:fragment (uri-fragment iri))
178
                             ;; otherwise
179
                             (let ((path (string-join iri-path "/")))
180
                               (build-uri-reference
181
                                 #:path (if (equal? base-component "")
182
                                            path
183
                                            (if (equal? path "")
184
                                                "."
185
                                                path))
186
                                 #:query (uri-query iri)
187
                                 #:fragment (uri-fragment iri))))
188
                         ;; we didn't reach the end of the base iri yet, so 
189
                         ;; continue with the main logic
190
                         (if (and (not (null? iri-path))
191
                                  (equal? (car iri-path) base-component))
192
                             ;; we have the same directory, so we can skip a '..'
193
                             (loop base-path (cdr iri-path) (cdr parents))
194
                             ;; we have a different directory, file, or we stopped
195
                             ;; there, so return
196
                             (let ((path (string-join (append parents iri-path) "/")))
197
                               (build-uri-reference
198
                                 #:path path
199
                                 #:query (uri-query iri)
200
                                 #:fragment (uri-fragment iri)))))))))
201
              ;; but if the host is different, we need to preserve everything
202
              ;; else, we have a url such as '//example.org'
203
              (build-uri-reference
204
                #:host (uri-host iri)
205
                #:path (uri-path iri)
206
                #:query (uri-query iri)
207
                #:fragment (uri-fragment iri)))
208
          ;; if we don't have the same scheme, we can't use a relative reference,
209
          ;; return the original iri
210
          iri))))
211