Add iri.scm from jsonld

Julien LepillerTue Mar 31 04:36:16+0200 2020

14392df

Add iri.scm from jsonld

Makefile.am

22
33
SOURCES= \
44
  rdf/rdf.scm \
5-
  rdf/xsd.scm
5+
  rdf/xsd.scm \
6+
  iri/iri.scm \
67
78
TEST_EXTENSIONS = .scm
89
SCM_LOG_DRIVER = env AM_TAP_AWK='$(AWK)' $(SHELL) $(top_srcdir)/tap-driver.sh --color-tests yes --ignore-exit

iri/iri.scm unknown status 1

1+
;;;; Copyright (C) 2019, 2020 Julien Lepiller <julien@lepiller.eu>
2+
;;;; 
3+
;;;; This library is free software; you can redistribute it and/or
4+
;;;; modify it under the terms of the GNU Lesser General Public
5+
;;;; License as published by the Free Software Foundation; either
6+
;;;; version 3 of the License, or (at your option) any later version.
7+
;;;; 
8+
;;;; This library is distributed in the hope that it will be useful,
9+
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11+
;;;; Lesser General Public License for more details.
12+
;;;; 
13+
;;;; You should have received a copy of the GNU Lesser General Public
14+
;;;; License along with this library; if not, write to the Free Software
15+
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16+
;;;; 
17+
18+
(define-module (iri iri)
19+
  #:use-module (ice-9 match)
20+
  #:use-module (web uri)
21+
  #:export (resolve-iri
22+
            make-relative-iri))
23+
24+
(define (resolve-iri base reference)
25+
  "Resolve an IRI from a @var{base} IRI and a relative @var{reference}.  This
26+
is an implementation of the URI resolving algorithm described in section 5.2
27+
of RFC3986.  See @url{https://tools.ietf.org/html/rfc3986}."
28+
  (define (remove-last-component path)
29+
    (string-join
30+
      (reverse (cdr (reverse (string-split path #\/))))
31+
      "/"))
32+
33+
  (define (transform-references base R)
34+
    (if (uri-scheme R)
35+
        (build-uri (uri-scheme R)
36+
                   #:host (uri-host R)
37+
                   #:path (remove-dot-segments (uri-path R))
38+
                   #:query (uri-query R)
39+
                   #:fragment (uri-fragment R))
40+
        (if (uri-host R)
41+
            (build-uri (uri-scheme base)
42+
                       #:host (uri-host R)
43+
                       #:path (remove-dot-segments (uri-path R))
44+
                       #:query (uri-query R)
45+
                       #:fragment (uri-fragment R))
46+
            (if (equal? (uri-path R) "")
47+
                (build-uri (uri-scheme base)
48+
                           #:host (uri-host base)
49+
                           #:path (uri-path base)
50+
                           #:query (or (uri-query R) (uri-query base))
51+
                           #:fragment (if (uri-query R)
52+
                                          (uri-fragment R)
53+
                                          (or (uri-fragment R) (uri-fragment base))))
54+
                (build-uri (uri-scheme base)
55+
                           #:host (uri-host base)
56+
                           #:path (if (equal? (substring (uri-path R) 0 1) "/")
57+
                                      (remove-dot-segments (uri-path R))
58+
                                      (remove-dot-segments
59+
                                        (merge (uri-path base) (uri-path R))))
60+
                           #:query (uri-query R)
61+
                           #:fragment (uri-fragment R))))))
62+
63+
  (define (merge basepath referencepath)
64+
    (string-append
65+
      (string-join
66+
        (append (reverse (cdr (reverse (string-split basepath #\/)))) '(""))
67+
        "/")
68+
      referencepath))
69+
70+
  (define (remove-dot-segments path)
71+
    (let loop ((input path) (output ""))
72+
      (if (equal? input "")
73+
          output
74+
          (cond
75+
            ((and (> (string-length input) 2) (equal? (substring input 0 3) "../"))
76+
             (loop (substring input 3) output))
77+
            ((and (> (string-length input) 1) (equal? (substring input 0 2) "./"))
78+
             (loop (substring input 2) output))
79+
            ((and (> (string-length input) 2) (equal? (substring input 0 3) "/./"))
80+
             (loop (substring input 2) output)); keep trailing /
81+
            ((equal? input "/.")
82+
             (loop "/" output))
83+
            ((and (> (string-length input) 3) (equal? (substring input 0 4) "/../"))
84+
             (loop (substring input 3) (remove-last-component output))); keep trailing /
85+
            ((equal? input "/..")
86+
             (loop "/" (remove-last-component output)))
87+
            ((equal? input ".")
88+
             (loop "" output))
89+
            ((equal? input "..")
90+
             (loop "" output))
91+
            (else
92+
              (let* ((segments (string-split input #\/))
93+
                     (segment (car segments))
94+
                     (add-slash? (equal? segment ""))
95+
                     (segment (if add-slash? (cadr segments) segment)))
96+
                (loop
97+
                  (if add-slash?
98+
                      (string-join (cons "" (cdr (cdr segments))) "/")
99+
                      (string-join (cdr segments)))
100+
                  (if add-slash?
101+
                      (string-append output "/" segment)
102+
                      (string-append output segment)))))))))
103+
104+
  (define (set-iri-path uri)
105+
    (build-uri
106+
      (uri-scheme uri)
107+
      #:host (uri-host uri)
108+
      #:path (if (equal? (uri-path uri) "") "/" (uri-path uri))
109+
      #:query (uri-query uri)
110+
      #:fragment (uri-fragment uri)))
111+
112+
  ;; This algorithm is not always called with sane values, so prevent errors
113+
  ;; in some edge cases.
114+
  (if (and base (string? reference))
115+
   (uri->string
116+
     (transform-references (set-iri-path (string->uri base))
117+
                           (string->uri-reference reference)))
118+
   reference))
119+
120+
(define (canonical-iri iri)
121+
  (let* ((iri (string->uri iri))
122+
         (path (uri-path iri))
123+
         (start (if (equal? (string-ref path 0) #\/) "/" ""))
124+
         (path (reverse (string-split path #\/))))
125+
    (let loop ((path path) (result '()) (skip 0))
126+
      (match path
127+
        ('() (build-uri
128+
               (uri-scheme iri)
129+
               #:host (uri-host iri)
130+
               #:path (string-append start (string-join result "/"))
131+
               #:query (uri-query iri)
132+
               #:fragment (uri-fragment iri)))
133+
        ((component path ...)
134+
         (cond
135+
           ((equal? component "")
136+
            (if (null? result)
137+
                (loop path (cons component result) skip)
138+
                (loop path result skip)))
139+
           ((equal? component ".")
140+
            (loop path result skip))
141+
           ((equal? component "..")
142+
            (loop path result (+ skip 1)))
143+
           (else
144+
             (if (> skip 0)
145+
                 (loop path result (- skip 1))
146+
                 (loop path (cons component result) skip)))))))))
147+
148+
(define (make-relative-iri iri base)
149+
  (let ((iri (canonical-iri iri))
150+
        (base (canonical-iri base)))
151+
    (uri->string
152+
      (if (equal? (uri-scheme iri) (uri-scheme base))
153+
          ;; if we have the same scheme, we can skip it
154+
          (if (equal? (uri-host iri) (uri-host base))
155+
              ;; if the host is the same, we try to compress the paths.  There
156+
              ;; are two solutions: keep the path from the iri, or use '..'
157+
              ;; to go back whenever necessary.  We select the shortes one.
158+
              (let* ((base-path (string-split (uri-path base) #\/))
159+
                     (iri-path (string-split (uri-path iri) #\/))
160+
                     ;; the base path starts with /, so an empty component.
161+
                     ;; we also ignore the last component.
162+
                     (parents (cdr (cdr (map (const "..") base-path)))))
163+
                (let loop ((base-path (cdr base-path))
164+
                           (iri-path (cdr iri-path))
165+
                           (parents parents))
166+
                  (match base-path
167+
                    ((base-component base-path ...)
168+
                     (if (null? base-path)
169+
                         ;; we reached the end, we have the same directory structure
170+
                         (if (and (= (length iri-path) 1)
171+
                                  (equal? (car iri-path) base-component)
172+
                                  (or (uri-query iri)
173+
                                      (uri-fragment iri)))
174+
                             ;; if it's the same path
175+
                             (build-uri-reference
176+
                               #:query (uri-query iri)
177+
                               #:fragment (uri-fragment iri))
178+
                             ;; otherwise
179+
                             (let ((path (string-join iri-path "/")))
180+
                               (build-uri-reference
181+
                                 #:path (if (equal? base-component "")
182+
                                            path
183+
                                            (if (equal? path "")
184+
                                                "."
185+
                                                path))
186+
                                 #:query (uri-query iri)
187+
                                 #:fragment (uri-fragment iri))))
188+
                         ;; we didn't reach the end of the base iri yet, so 
189+
                         ;; continue with the main logic
190+
                         (if (and (not (null? iri-path))
191+
                                  (equal? (car iri-path) base-component))
192+
                             ;; we have the same directory, so we can skip a '..'
193+
                             (loop base-path (cdr iri-path) (cdr parents))
194+
                             ;; we have a different directory, file, or we stopped
195+
                             ;; there, so return
196+
                             (let ((path (string-join (append parents iri-path) "/")))
197+
                               (build-uri-reference
198+
                                 #:path path
199+
                                 #:query (uri-query iri)
200+
                                 #:fragment (uri-fragment iri)))))))))
201+
              ;; but if the host is different, we need to preserve everything
202+
              ;; else, we have a url such as '//example.org'
203+
              (build-uri-reference
204+
                #:host (uri-host iri)
205+
                #:path (uri-path iri)
206+
                #:query (uri-query iri)
207+
                #:fragment (uri-fragment iri)))
208+
          ;; if we don't have the same scheme, we can't use a relative reference,
209+
          ;; return the original iri
210+
          iri))))