Add rdf dataset isomorphism check
README.md
252 | 252 | ||
253 | 253 | Because the literal representation of `10` differs. | |
254 | 254 | ||
255 | + | #### **Scheme Procedure**: `rdf-dataset-isomorphic? d1 d2` | |
256 | + | ||
257 | + | Returns whether two datasets are the same. Two datasets can have a different | |
258 | + | representation because of order and because of differing blank node | |
259 | + | representations. They are isomorphic when there is a one-to-one mapping | |
260 | + | between them, such that blank nodes from one map to blank nodes of the other, | |
261 | + | and vice-versa. Note that the mapping can map differently named blank nodes | |
262 | + | even when the name a named graph. | |
263 | + | ||
255 | 264 | #### **Scheme Procedure**: `recognize graph vocabulary` | |
256 | 265 | ||
257 | 266 | Transforms a graph to replace every instance of recognized IRIs in the |
rdf/rdf.scm
65 | 65 | ||
66 | 66 | merge-graphs | |
67 | 67 | rdf-isomorphic? | |
68 | + | rdf-dataset-isomorphic? | |
68 | 69 | recognize)) | |
69 | 70 | ||
70 | 71 | ;; From the specification: | |
… | |||
385 | 386 | (list 'or c (generate-constraints t1 g2)) | |
386 | 387 | (generate-constraints t1 g2)))))) | |
387 | 388 | ||
389 | + | (define (generate-graph-constraints g1 g2) | |
390 | + | (fold (lambda (t constraints) | |
391 | + | (list 'and (generate-constraints t g2) constraints)) | |
392 | + | 'none g1)) | |
393 | + | ||
388 | 394 | (define (reverse-mapping mapping) | |
389 | 395 | (let loop ((mapping mapping) (result '())) | |
390 | 396 | (match mapping | |
… | |||
417 | 423 | ||
418 | 424 | (define (rdf-isomorphic? g1 g2) | |
419 | 425 | "Compare two graphs and return whether they are isomorph." | |
420 | - | (let* ((constraints (fold (lambda (t constraints) | |
421 | - | (list 'and (generate-constraints t g2) constraints)) | |
422 | - | 'none g1)) | |
426 | + | (let* ((constraints (generate-graph-constraints g1 g2)) | |
423 | 427 | (disjunctions (to-disjunctions constraints))) | |
424 | - | (let loop ((disjunctions disjunctions)) | |
425 | - | (match (filter sat? disjunctions) | |
428 | + | (let loop ((disjunctions (filter sat? disjunctions))) | |
429 | + | (match disjunctions | |
426 | 430 | ('() (and (null? g1) (null? g2))) | |
427 | 431 | ((mapping disjunctions ...) | |
428 | 432 | (if (and (validate-mapping mapping g1 g2) | |
… | |||
430 | 434 | #t | |
431 | 435 | (loop disjunctions))))))) | |
432 | 436 | ||
437 | + | (define (generate-dataset-constraints d1 d2) | |
438 | + | (let ((g1 (rdf-dataset-default-graph d1)) | |
439 | + | (g2 (rdf-dataset-default-graph d2)) | |
440 | + | (ng1 (rdf-dataset-named-graphs d1)) | |
441 | + | (ng2 (rdf-dataset-named-graphs d2))) | |
442 | + | (list 'and (generate-graph-constraints g1 g2) | |
443 | + | (if (null? ng1) | |
444 | + | 'none | |
445 | + | (fold (lambda (ng1 constraints) | |
446 | + | (match ng1 | |
447 | + | ((n1 . g1) | |
448 | + | (if (blank-node? n1) | |
449 | + | (fold (lambda (ng2 constraints) | |
450 | + | (list 'or (list 'and (list 'equiv n1 (car ng2)) | |
451 | + | (generate-graph-constraints g1 g2)) | |
452 | + | constraints)) | |
453 | + | 'bot | |
454 | + | (filter (lambda (g2) (blank-node? (car g2))) ng2)) | |
455 | + | (let ((g2 (assoc-ref ng2 n1))) | |
456 | + | (if g2 | |
457 | + | (list 'and (generate-graph-constraints g1 g2) | |
458 | + | constraints) | |
459 | + | 'bot)))))) | |
460 | + | 'bot ng1))))) | |
461 | + | ||
462 | + | (define (validate-dataset-mapping mapping d1 d2) | |
463 | + | (define (validate-named-graph name graph) | |
464 | + | (let ((graph2 (if (blank-node? name) | |
465 | + | (assoc-ref (rdf-dataset-named-graphs d2) | |
466 | + | (assoc-ref mapping name)) | |
467 | + | (assoc-ref (rdf-dataset-named-graphs d2) name)))) | |
468 | + | (validate-mapping mapping graph graph2))) | |
469 | + | ||
470 | + | (and (validate-mapping mapping (rdf-dataset-default-graph d1) | |
471 | + | (rdf-dataset-default-graph d2)) | |
472 | + | (null? (filter | |
473 | + | (lambda (ng1) | |
474 | + | (match ng1 | |
475 | + | ((name . graph) | |
476 | + | (not (validate-named-graph name graph))))) | |
477 | + | (rdf-dataset-named-graphs d1))))) | |
478 | + | ||
479 | + | (define (rdf-dataset-isomorphic? d1 d2) | |
480 | + | "Compare two datasets and return whether they are isomorphic." | |
481 | + | (let* ((constraints (generate-dataset-constraints d1 d2)) | |
482 | + | (disjunctions (to-disjunctions constraints))) | |
483 | + | (let loop ((disjuctions (filter sat? disjunctions))) | |
484 | + | (match disjunctions | |
485 | + | ('() (and (null? (rdf-dataset-default-graph d1)) | |
486 | + | (null? (rdf-dataset-default-graph d2)) | |
487 | + | (null? (rdf-dataset-named-graphs d1)) | |
488 | + | (null? (rdf-dataset-named-graphs d2)))) | |
489 | + | ((mapping disjunctions ...) | |
490 | + | (or (and (validate-dataset-mapping mapping d1 d2) | |
491 | + | (validate-dataset-mapping (reverse-mapping mapping) d2 d1)) | |
492 | + | (loop disjunctions))))))) | |
493 | + | ||
433 | 494 | ;; Recognizing datatypes is a transformation on the graph to add the proper | |
434 | 495 | ;; datatype to literals, and replace IRIs that represent a datatype with the | |
435 | 496 | ;; datatype it represents. This is useful for some entailment regimes, such |