Add rdf dataset isomorphism check

Julien LepillerWed Apr 08 00:56:05+0200 2020

5e138a4

Add rdf dataset isomorphism check

README.md

252252
253253
Because the literal representation of `10` differs.
254254
255+
#### **Scheme Procedure**: `rdf-dataset-isomorphic? d1 d2`
256+
257+
Returns whether two datasets are the same.  Two datasets can have a different
258+
representation because of order and because of differing blank node
259+
representations.  They are isomorphic when there is a one-to-one mapping
260+
between them, such that blank nodes from one map to blank nodes of the other,
261+
and vice-versa.  Note that the mapping can map differently named blank nodes
262+
even when the name a named graph.
263+
255264
#### **Scheme Procedure**: `recognize graph vocabulary`
256265
257266
Transforms a graph to replace every instance of recognized IRIs in the

rdf/rdf.scm

6565
6666
            merge-graphs
6767
            rdf-isomorphic?
68+
            rdf-dataset-isomorphic?
6869
            recognize))
6970
7071
;; From the specification:

385386
         (list 'or c (generate-constraints t1 g2))
386387
         (generate-constraints t1 g2))))))
387388
389+
(define (generate-graph-constraints g1 g2)
390+
  (fold (lambda (t constraints)
391+
          (list 'and (generate-constraints t g2) constraints))
392+
        'none g1))
393+
388394
(define (reverse-mapping mapping)
389395
  (let loop ((mapping mapping) (result '()))
390396
  (match mapping

417423
418424
(define (rdf-isomorphic? g1 g2)
419425
  "Compare two graphs and return whether they are isomorph."
420-
  (let* ((constraints (fold (lambda (t constraints)
421-
                              (list 'and (generate-constraints t g2) constraints))
422-
                            'none g1))
426+
  (let* ((constraints (generate-graph-constraints g1 g2))
423427
         (disjunctions (to-disjunctions constraints)))
424-
    (let loop ((disjunctions disjunctions))
425-
      (match (filter sat? disjunctions)
428+
    (let loop ((disjunctions (filter sat? disjunctions)))
429+
      (match disjunctions
426430
        ('() (and (null? g1) (null? g2)))
427431
        ((mapping disjunctions ...)
428432
         (if (and (validate-mapping mapping g1 g2)

430434
           #t
431435
           (loop disjunctions)))))))
432436
437+
(define (generate-dataset-constraints d1 d2)
438+
  (let ((g1 (rdf-dataset-default-graph d1))
439+
        (g2 (rdf-dataset-default-graph d2))
440+
        (ng1 (rdf-dataset-named-graphs d1))
441+
        (ng2 (rdf-dataset-named-graphs d2)))
442+
    (list 'and (generate-graph-constraints g1 g2)
443+
          (if (null? ng1)
444+
              'none
445+
              (fold (lambda (ng1 constraints)
446+
                      (match ng1
447+
                        ((n1 . g1)
448+
                         (if (blank-node? n1)
449+
                             (fold (lambda (ng2 constraints)
450+
                                     (list 'or (list 'and (list 'equiv n1 (car ng2))
451+
                                                     (generate-graph-constraints g1 g2))
452+
                                           constraints))
453+
                                   'bot
454+
                                   (filter (lambda (g2) (blank-node? (car g2))) ng2))
455+
                             (let ((g2 (assoc-ref ng2 n1)))
456+
                               (if g2
457+
                                   (list 'and (generate-graph-constraints g1 g2)
458+
                                         constraints)
459+
                                   'bot))))))
460+
                    'bot ng1)))))
461+
462+
(define (validate-dataset-mapping mapping d1 d2)
463+
  (define (validate-named-graph name graph)
464+
    (let ((graph2 (if (blank-node? name)
465+
                      (assoc-ref (rdf-dataset-named-graphs d2)
466+
                                 (assoc-ref mapping name))
467+
                      (assoc-ref (rdf-dataset-named-graphs d2) name))))
468+
      (validate-mapping mapping graph graph2)))
469+
470+
  (and (validate-mapping mapping (rdf-dataset-default-graph d1)
471+
                         (rdf-dataset-default-graph d2))
472+
       (null? (filter
473+
                (lambda (ng1)
474+
                  (match ng1
475+
                    ((name . graph)
476+
                     (not (validate-named-graph name graph)))))
477+
                (rdf-dataset-named-graphs d1)))))
478+
479+
(define (rdf-dataset-isomorphic? d1 d2)
480+
  "Compare two datasets and return whether they are isomorphic."
481+
  (let* ((constraints (generate-dataset-constraints d1 d2))
482+
         (disjunctions (to-disjunctions constraints)))
483+
    (let loop ((disjuctions (filter sat? disjunctions)))
484+
      (match disjunctions
485+
        ('() (and (null? (rdf-dataset-default-graph d1))
486+
                  (null? (rdf-dataset-default-graph d2))
487+
                  (null? (rdf-dataset-named-graphs d1))
488+
                  (null? (rdf-dataset-named-graphs d2))))
489+
        ((mapping disjunctions ...)
490+
         (or (and (validate-dataset-mapping mapping d1 d2)
491+
                  (validate-dataset-mapping (reverse-mapping mapping) d2 d1))
492+
             (loop disjunctions)))))))
493+
433494
;; Recognizing datatypes is a transformation on the graph to add the proper
434495
;; datatype to literals, and replace IRIs that represent a datatype with the
435496
;; datatype it represents.  This is useful for some entailment regimes, such