;;;; Copyright (C) 2020 Julien Lepiller <julien@lepiller.eu>
;;;; 
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;; 
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;;; Lesser General Public License for more details.
;;;; 
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;;; 

(define-module (rdf rdf)
  #:use-module (srfi srfi-1)
  #:use-module (srfi srfi-9)
  #:use-module (ice-9 match)
  #:export (rdf-datatype
            make-rdf-datatype
            rdf-datatype?
            rdf-datatype-iris
            rdf-datatype-description
            rdf-datatype-lexical?
            rdf-datatype-value?
            rdf-datatype-lexical->value
            rdf-datatype-value->lexical

            rdf-dataset
            make-rdf-dataset
            rdf-dataset?
            rdf-dataset-default-graph
            rdf-dataset-named-graphs

            rdf-triple
            make-rdf-triple
            rdf-triple?
            rdf-triple-subject
            rdf-triple-predicate
            rdf-triple-object

            rdf-literal
            make-rdf-literal
            rdf-literal?
            rdf-literal-lexical-form
            rdf-literal-type
            rdf-literal-langtag

            blank-node?
            rdf-graph?

            merge-graphs
            rdf-isomorphic?))

;; From the specification:
;;   Datatypes are used with RDF literals to represent values such as strings,
;;   numbers and dates.  A datatype consists of a lexical space, a value space
;;   and a lexical-to-value mapping, and is denoted by one or more IRIs.
;;
;;   The lexical space of a datatype is a set of Unicode [UNICODE] strings.
;;
;;   The lexical-to-value mapping of a datatype is a set of pairs whose first
;;   element belongs to the lexical space, and the second element belongs to the
;;   value space of the datatype.  Each member of the lexical space is paired
;;   with exactly one value, and is a lexical representation of that value.  The
;;   mapping can be seen as a function from the lexical space to the value space.
;;
;; In addition to the specification, we introduce value->lexical, a canonical
;; function to map values to the lexical space.  An important property is that
;;   for any val, (value? val) implies:
;;           (equal? (lexical->value (value->lexical val)) val)
;;
;; We also introduce a list of IRIs that denote this type, as more than one
;; IRI can denote a type.  This is set to a list of IRIs, but may be changed
;; to a function to denote a set in the future.
;;
;; We also introduce a description, a text that helps humans understand the
;; purpose of the datatype.

(define-record-type rdf-datatype
  (make-rdf-datatype iris description lexical? value? lexical->value value->lexical)
  rdf-datatype?
  (iris           rdf-datatype-iris)
  (description    rdf-datatype-description)
  (lexical?       rdf-datatype-lexical?)
  (value?         rdf-datatype-value?)
  (lexical->value rdf-datatype-lexical->value)
  (value->lexical rdf-datatype-value->lexical))

;; From the specification:
;;   An RDF dataset is a collection of RDF graphs, and comprises:
;;
;;   * Exactly one default graph, being an RDF graph.  The default graph does
;;     not have a name and MAY be empty.
;;   * Zero or more named graphs.  Each named graph is a pair consisting of an
;;     IRI or a blank node (the graph name), and an RDF graph.  Graph names are
;;     unique within an RDF dataset.
;;
;; We represent named graphs with a association list whose keys are IRIs or
;; blank nodes, and values are RDF graphs.

(define-record-type rdf-dataset
  (make-rdf-dataset default-graph named-graphs)
  rdf-dataset?
  (default-graph rdf-dataset-default-graph)
  (named-graphs  rdf-dataset-named-graphs))

;; From the specification:
;;   An RDF triple consists of three components:
;;
;;   * the subject, which is an IRI or a blank node
;;   * the predicate, which is an IRI
;;   * the object, which is an IRI, a literal or a blank node

(define-record-type rdf-triple
  (make-rdf-triple subject predicate object)
  rdf-triple?
  (subject   rdf-triple-subject)
  (predicate rdf-triple-predicate)
  (object    rdf-triple-object))

;; From the specification:
;;   A literal in an RDF graph consists of two or three elements:
;;
;;   * a lexical form, being a Unicode [UNICODE] string, which SHOULD be in
;;     Normal Form C [NFC],
;;   * a datatype IRI, being an IRI identifying a datatype that determines how
;;     the lexical form maps to a literal value, and
;;   * if and only if the datatype IRI is `http://www.w3.org/1999/02/22-rdf-syntax-ns#langString`,
;;     a non-empty language tag as defined by [BCP47].  The language tag MUST
;;     be well-formed according to section 2.2.9 of [BCP47].

(define-record-type rdf-literal
  (make-rdf-literal lexical-form datatype language-tag)
  rdf-literal?
  (lexical-form rdf-literal-lexical-form)
  (datatype     rdf-literal-datatype)
  (language-tag rdf-literal-language-tag))

;; From the specification:
;;   Blank nodes are disjoint from IRIs and literals.  Otherwise, the set of
;;   possible blank nodes is arbitrary.  RDF makes no reference to any internal
;;   structure of blank nodes.
;;
;; Here, we will use integers as blank nodes

(define blank-node? integer?)

;; From the specification:
;;   An RDF graph is a set of RDF triples.
;;
;; We represent a graph as a list of RDF triples

(define (rdf-graph? graph)
  (and (list? graph) (null? (filter (lambda (t) (not (rdf-triple? t))) graph))))

(define (last-blank g)
  "Retun the biggest blank node identifier in g"
  (let loop ((g g) (m 0))
    (match g
      ('() m)
      ((triple g ...)
       (loop g (max m
                   (if (blank-node? (rdf-triple-subject triple))
                       (rdf-triple-subject triple)
                       0)
                   (if (blank-node? (rdf-triple-object triple))
                       (rdf-triple-object triple)
                       0)))))))

(define (rename-blanks g num)
  "Return the same graph, but blank nodes are renamed from num"
  (let loop ((g g) (renamings '()) (num num) (result '()))
    (match g
      ('() result)
      ((triple g ...)
       (let* ((subject (rdf-triple-subject triple))
              (num (if (and (blank-node? subject)
                            (assoc-ref renamings subject))
                       num
                       (+ num 1)))
              (renamings
                (if (and (blank-node? subject)
                         (assoc-ref renamings subject))
                    renamings
                    (cons (cons subject num) renamings)))
              (subject
                (if (blank-node? subject)
                    (assoc-ref renamings subject)
                    subject))
              (predicate (rdf-triple-predicate triple))
              (object (rdf-triple-object triple))
              (num (if (and (blank-node? object)
                            (assoc-ref renamings object))
                       num
                       (+ num 1)))
              (renamings
                (if (and (blank-node? object)
                         (assoc-ref renamings object))
                    renamings
                    (cons (cons object num) renamings)))
              (object
                (if (blank-node? object)
                    (assoc-ref renamings object)
                    object)))
           (loop g renamings num (cons (make-rdf-triple subject predicate object)
                                       result)))))))

(define (merge-graphs g1 g2)
  "Merge two graphs g1 and g2.  This is the same as append, but we need to make
sure we rename blank nodes, or some nodes will be merged when they shouldn't."
  (append g1 (rename-blanks g2 (last-blank g1))))

;; Next, a predicate on isomorphisms between two graphs.  Two graphs are isomorphic
;; when each triple has a corresponding triple in the other graph.
;;
;; To take blank nodes into account, there needs to be a mapping from blank nodes
;; of the first graph to blank nodes of the other graph in order to prove
;; isomorphism.
;;
;; First, we compare the two graphs and find possible constraints on that mapping.
;; for instance, if one graph has (_:1, p, o) and the other (_:2, p, o), then
;; a possible constraint is that _:1 maps to _:2. If the other graph also has
;; (_:3, p, o) then maybe _:1 actually maps to _:3.
;;
;; Constraints are either "none" (no constraint), "equiv" (a mapping between two
;; blank node identifiers), "or" (a disjunction) or "and" (a conjunction).
;; By comparing the triples of the first graph, we create an conjunction between
;; the constraints collected from each triple. The constraints of a triple is
;; a disjunction between every case where it matches a triple from the other graph.
;; That creates zero, one or two constraints (depending on the number of blank
;; nodes).
;;
;; These constraints are transformed in a normal form, as a list of lists of
;; conjunctions. Each list is a candidate mapping. sat? is used to evaluate the
;; candidate mapping and ensure it is an isomorphism between the two sets of
;; blank nodes. For every sat? equivalences, we check that the mapping actually
;; maps triples of g1 to triples of g2, and its reverse mapping maps triples of
;; g2 to triples of g1. Whenever one mapping works, the two graphs are equivalent.
;; If no mapping works, the two graphs are not equivalent.

(define (sat? equivalences)
  "Return whether the set of equivalences satisfies the condition that it represents
an isomorphism between two blank node sets: for every equality, check that the
first component is always associated to the same second component, and that the
second component is always associated with the first."
  (match equivalences
    ('() #t)
    (((first . second) equivalences ...)
     (if (and (null? (filter
                       (lambda (eq)
                         (and (equal? (car eq) first)
                              (not (equal? (cdr eq) second))))
                       equivalences))
              (null? (filter
                       (lambda (eq)
                         (and (not (equal? (car eq) first))
                              (equal? (cdr eq) second)))
                       equivalences)))
         (sat? equivalences)
         #f))))

(define (merge-joins l1 l2)
  (match l1
    ('() l2)
    ((e1 l1 ...)
     (merge-joins l1 (map (lambda (e2) (append e1 e2)) l2)))))

(define (to-disjunctions constraints)
  (match constraints
    (('equiv b1 b2) (list (list (cons b1 b2))))
    ('none (list (list)))
    (('or e1 e2) (append (to-disjunctions e1) (to-disjunctions e2)))
    (('and e1 e2)
     (merge-joins (to-disjunctions e1) (to-disjunctions e2)))))

(define (generate-triple-constraints t1 t2)
  (match t1
    (($ rdf-triple s1 p1 o1)
     (match t2
       (($ rdf-triple s2 p2 o2)
        (if (and (or (equal? s1 s2) (and (blank-node? s1) (blank-node? s2)))
                 (equal? p1 p2)
                 (or (equal? o1 o2) (and (blank-node? o1) (blank-node? o2))))
            (list 'and
                  (if (blank-node? s1)
                      (list 'equiv s1 s2)
                      'none)
                  (if (blank-node? o1)
                      (list 'equiv o1 o2)
                      'none))
            #f))))))

(define (generate-constraints t1 g2)
  (match g2
    ('() 'none)
    ((t2 g2 ...)
     (let ((c (generate-triple-constraints t1 t2)))
       (if c
         (list 'or c (generate-constraints t1 g2))
         (generate-constraints t1 g2))))))

(define (reverse-mapping mapping)
  (let loop ((mapping mapping) (result '()))
  (match mapping
    ('() result)
    (((first . second) mapping ...)
     (loop mapping (cons (cons second first) result))))))

(define (validate-mapping mapping g1 g2)
  (match g1
    ('() #t)
    ((t1 g1 ...)
     (and (not (null? (filter
                        (lambda (t2)
                          (let ((s1 (rdf-triple-subject t1))
                                (s2 (rdf-triple-subject t2))
                                (p1 (rdf-triple-predicate t1))
                                (p2 (rdf-triple-predicate t2))
                                (o1 (rdf-triple-object t1))
                                (o2 (rdf-triple-object t2)))
                            (and
                              (if (blank-node? s1)
                                  (equal? (assoc-ref mapping s1) s2)
                                  (equal? s1 s2))
                              (equal? p1 p2)
                              (if (blank-node? o1)
                                  (equal? (assoc-ref mapping o1) o2)
                                  (equal? o1 o2)))))
                        g2)))
          (validate-mapping mapping g1 g2)))))

(define (rdf-isomorphic? g1 g2)
  "Compare two graphs and return whether they are isomorphic."
  (let* ((constraints (fold (lambda (t constraints)
                              (list 'and (generate-constraints t g2) constraints))
                            'none g1))
         (disjunctions (to-disjunctions constraints)))
    (pk 'dis disjunctions)
    (let loop ((disjunctions disjunctions))
      (match disjunctions
        ('() (and (null? g1) (null? g2)))
        ((mapping disjunctions ...)
         (if (and (validate-mapping mapping g1 g2)
                  (validate-mapping (reverse-mapping mapping) g2 g1))
           #t
           (loop disjunctions)))))))