;;;; Copyright (C) 2019, 2020 Julien Lepiller ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public ;;;; License as published by the Free Software Foundation; either ;;;; version 3 of the License, or (at your option) any later version. ;;;; ;;;; This library is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;;; Lesser General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU Lesser General Public ;;;; License along with this library; if not, write to the Free Software ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;;;; (define-module (jsonld download) #:use-module (ice-9 match) #:use-module (iri iri) #:use-module (json) #:use-module (jsonld json) #:use-module (web client) #:use-module (web response) #:use-module (web uri) #:use-module (rnrs bytevectors) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9) #:use-module (sxml simple) #:export (json-document make-json-document new-json-document json-document? json-document-context-url json-document-document-url json-document-document json-document-content-type json-document-profile download-json)) (define-record-type json-document (make-json-document context-url document-url document content-type profile) json-document? (context-url json-document-context-url) (document-url json-document-document-url) (document json-document-document) (content-type json-document-content-type) (profile json-document-profile)) (define* (new-json-document #:key context-url document-url document content-type profile) (make-json-document context-url document-url document content-type profile)) (define-record-type link-header (make-link-header uri params) link-header? (uri link-header-uri) (params link-header-params)) (define-record-type content-type-header (make-content-type-header type params) content-type-header? (type content-type-header-type) (params content-type-header-params)) (define (parse-link-header link) (let* ((header (string-split link #\;)) (uri (car header)) (uri (substring uri 1 (- (string-length uri) 1))) (params (cdr header)) (params (map (lambda (param) (match (map (lambda (l) (string-trim l #\space)) (string-split param #\=)) ((type val) (cons type (substring val 1 (- (string-length val) 1)))))) params))) (make-link-header uri params))) (define (parse-type type) (let* ((mime-type (string-split type #\;)) (type (car mime-type)) (params (map (lambda (param) (match (string-split param #\=) ((type val ...) (cons (string-trim type #\space) (string-trim (string-join val "=") #\space))))) (cdr mime-type)))) (make-content-type-header type params))) (define (html-document-base base document) (define (find-base document) (match document (('base ('@ attr ...)) (let loop ((attr attr)) (if (null? attr) #f (match (car attr) (('href base) base) (_ (loop (cdr attr))))))) ('() #f) ((tag ('@ opt ...) rest ...) (fold (lambda (element result) (or result (find-base element))) #f rest)) ((tag rest ...) (fold (lambda (element result) (or result (find-base element))) #f rest)) (_ #f))) (let ((new-base (find-base document))) (if new-base (resolve-iri base new-base) base))) (define (get-script-elements document) (define (find-scripts document) (match document (('script t ...) (list document)) ('() '()) ((tag ('@ opt ...) rest ...) (append-map get-script-elements rest)) ((tag rest ...) (append-map get-script-elements rest)) (_ '()))) (define (jsonld-script? element) (match element (('script ('@ opt ...) content) (let* ((types (filter (lambda (opt) (equal? (car opt) 'type)) opt)) (type (if (null? types) #f (cadr (car types))))) (and type (equal? (content-type-header-type (parse-type type)) "application/ld+json")))) (_ #f))) (filter jsonld-script? (find-scripts document))) (define (get-script-content-by-id document id) (define (has-correct-id? element) (match element ((tag ('@ opts ...) content) (let* ((ids (filter (lambda (o) (equal? (car o) 'id)) opts)) (element-id (if (null? ids) #f (cadr (car ids))))) (equal? element-id id))))) (let ((id-scripts (filter has-correct-id? (get-script-elements document)))) (when (null? id-scripts) (throw 'loading-document-failed)) (get-script-content (car id-scripts)))) (define (get-script-content-by-profile document profile) (define (has-correct-profile? element) (match element ((tag ('@ opts ...) content) (let* ((types (filter (lambda (o) (equal? (car o) 'type)) opts)) (type (if (null? types) #f (cadr (car types))))) (and type (equal? (assoc-ref (content-type-header-params (parse-type type)) "profile") profile)))))) (let ((profile-scripts (filter has-correct-profile? (get-script-elements document)))) (when (null? profile-scripts) (throw 'loading-document-failed)) (get-script-content (car profile-scripts)))) (define (get-first-script-content document) (let ((scripts (get-script-elements document))) (when (null? scripts) (throw 'loading-document-failed)) (get-script-content (car scripts)))) (define (get-script-content element) (match element (('script ('@ opts ...) content) content))) (define (reverse-document document) "Reverse the order of elements in @var{document}, a Json object, array or scalar. This recursively reverses the order of any array or key-pair association." (match document ((? json-array? document) (list->array 1 (map reverse-document (array->list document)))) ((? json-object? document) (fold (lambda (pair result) (cons (cons (car pair) (reverse-document (cdr pair))) result)) '() document)) (_ document))) (define (links-with-rel links rel) (filter (lambda (link) (equal? (assoc-ref (link-header-params link) "rel") rel)) links)) (define (html->sxml document) "Convert document (a string) into an sxml representation that preserves entities" (xml->sxml document #:entities '((lt . "&lt;") (gt . "&gt;") ;(amp . "&amp;") (apos . "&apos;")) #:default-entity-handler (lambda (port name) (string-append "&" (symbol->string name) ";")))) (define* (download-json uri #:key (mime-type "application/ld+json") (extract-all-scripts? #f) (profile #f) (request-profile #f) ;; Used for tests (http-get http-get)) "Download a JsonLD document from @var{uri}, using the HTTP protocol. The Accept HTTP header can be modified with @var{mime-type}." (define complete-mime-type (string-append (if request-profile (string-append mime-type ";profile=" request-profile) mime-type) (if (equal? request-profile "http://www.w3.org/ns/json-ld#context") "" ", text/html;q=0.8; application/xhtml+xml;q=0.5"))) (catch #t (lambda () (call-with-values (lambda () (http-get uri #:headers `((Accept . ,complete-mime-type)))) (lambda (hdr body) (cond ((member (response-code hdr) '(301 302 303 307)) (download-json (uri->string (response-location hdr)) #:mime-type mime-type #:extract-all-scripts? extract-all-scripts? #:profile profile #:request-profile request-profile)) ((equal? (response-code hdr) 200) (let* ((content-type (response-content-type hdr)) (type (symbol->string (car content-type))) (link-headers (map cdr (filter (lambda (p) (equal? (car p) 'link)) (response-headers hdr)))) (links (map parse-link-header link-headers)) (alternates (links-with-rel links "alternate")) (alternates (filter (lambda (alternate) (equal? (assoc-ref (link-header-params alternate) "type") "application/ld+json")) alternates)) (contexts (links-with-rel links "http://www.w3.org/ns/json-ld#context")) (context-url #f) (document #f) (base (if (string? uri) uri (uri->string uri)))) (if (and (not (equal? type "application/json")) (or (< (string-length type) 5) (not (equal? (substring type (- (string-length type) 5)) "+json"))) (not (null? alternates))) (download-json (resolve-iri base (link-header-uri (car alternates))) #:mime-type mime-type #:extract-all-scripts? extract-all-scripts? #:profile profile #:request-profile request-profile) (begin (when (and (or (equal? type "application/json") (and (> (string-length type) 5) (equal? (substring type (- (string-length type) 5)) "+json"))) (not (equal? type "application/ld+json")) (not (null? contexts))) (set! context-url (resolve-iri base (link-header-uri (car contexts)))) (when (> (length contexts) 1) (throw 'multiple-context-link-headers))) (cond ((or (equal? type "application/json") (and (> (string-length type) 5) (equal? (substring type (- (string-length type) 5)) "+json"))) (set! document (reverse-document (json-string->scm (if (string? body) body (utf8->string body)))))) ((or (equal? type "text/html") (equal? type "application/xhtml+xml")) (let* ((content (html->sxml (if (string? body) body (utf8->string body)))) (uri (string->uri base)) (source #f)) (set! base (html-document-base base content)) (when (uri-fragment uri) (set! source (get-script-content-by-id content (uri-fragment uri)))) (when (and (equal? source #f) profile) (set! source (get-script-content-by-profile content profile))) (when (and (equal? source #f) (not extract-all-scripts?)) (set! source (get-first-script-content content))) (if source (catch #t (lambda () (set! document (reverse-document (json-string->scm source)))) (lambda _ (throw 'invalid-script-element source))) (begin (unless extract-all-scripts? (throw 'loading-document-failed)) (set! document '()) (for-each (lambda (el) (catch #t (lambda () (let ((script-content (reverse-document (json-string->scm (get-script-content el))))) (set! document (append document (if (json-array? script-content) (array->list script-content) (list script-content)))))) (lambda _ (throw 'invalid-script-element el)))) (get-script-elements content)) (set! document (list->array 1 document)))))) (else (throw 'loading-remote-document-failed))) (new-json-document #:document-url base #:document document #:content-type type #:context-url context-url))))) (else (throw 'not-found)))))) (lambda (key . value) (cond ((equal? key 'multiple-context-link-headers) (apply throw 'multiple-context-link-headers value)) ((equal? key 'invalid-script-element) (apply throw 'invalid-script-element value)) (else (apply throw 'loading-remote-document-failed key value))))))