Graph2Entries.java
/*
* Copyright (c) 2007-2024 MetaSolutions AB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.entrystore.impl.converters;
import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.LinkedHashModel;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.entrystore.Context;
import org.entrystore.Entry;
import org.entrystore.GraphType;
import org.entrystore.ResourceType;
import org.entrystore.impl.ContextImpl;
import org.entrystore.impl.RDFResource;
import org.entrystore.impl.RepositoryProperties;
import org.entrystore.repository.util.NS;
import org.entrystore.repository.util.URISplit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.URI;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
/**
* Converts an RDF graph to a set of Entries
*
* @author Matthias Palmér
*/
public class Graph2Entries {
private static final ValueFactory valueFactory = SimpleValueFactory.getInstance();
private static final IRI mergeResourceId = valueFactory.createIRI(NS.entrystore, "mergeResourceId");
private static final IRI referenceResourceId = valueFactory.createIRI(NS.entrystore, "referenceResourceId");
private static final Logger log = LoggerFactory.getLogger(Graph2Entries.class);
private final Context context;
public Graph2Entries(Context context) {
this.context = context;
}
/**
* Detects and adds a set of entries from the graph via the anonymous closure algorithm starting from resources
* indicated with either of the two following properties that both indicate which entryId to use:<ul>
* <li>http://entrystore.org/terms/mergeResourceId or the</li>
* <li>http://entrystore.org/terms/referenceResourceId</li>
* </ul>
* The mergeResourceId indicates that the corresponding entry should be merged or created if it does not exist.
* The referenceResourceId only indicates that the relevant resource should be referenced.
*
* @param graph the RDF to merge
* @param destinationEntryId an entryId whose resource (resourcetype Graph) the graph should be stored in.
* A: If the id does not yet correspond to an existing entry, it will be created.
* B: An empty string indicates that a new entry should be created,
* C: null indicates that the graph should end up in multiple entries as indicated
* in the graph via the mergeResourceId properties on blank nodes.
* @param destinationListURI a list where the destinationEntryId will be created if a new entry is to be created.
* @return a collection of the merged entries (updated or created), the referenced entries are not included in the collection.
*/
public Set<Entry> merge(Model graph, String destinationEntryId, URI destinationListURI) {
if (graph == null) {
log.info("Supplied null instead of a graph.");
return null;
}
log.info("About to update/create entries in context {}.", this.context.getEntry().getId());
Set<Entry> entries = new HashSet<>();
HashMap<String, Resource> newResources = new HashMap<>();
HashMap<String, Resource> oldResources = new HashMap<>();
HashMap<Value, Resource> translate = new HashMap<>();
Iterator<Statement> statements = graph.filter(null, referenceResourceId, null).iterator();
// populate the translate Map with entries from statements
while (statements.hasNext()) {
Statement statement = statements.next();
String entryId = statement.getObject().stringValue();
Resource newResource = newResources.get(entryId);
if (newResource == null) {
URI uri = URISplit.createURI(
context.getEntry().getRepositoryManager().getRepositoryURL().toString(),
context.getEntry().getId(), RepositoryProperties.DATA_PATH, entryId);
newResource = valueFactory.createIRI(uri.toString());
}
translate.put(statement.getSubject(), newResource);
}
if (destinationEntryId != null) {
URI uri = URISplit.createURI(
context.getEntry().getRepositoryManager().getRepositoryURL().toString(),
context.getEntry().getId(), RepositoryProperties.DATA_PATH, destinationEntryId);
Resource newResource = valueFactory.createIRI(uri.toString());
statements = graph.filter(null, mergeResourceId, null).iterator();
while (statements.hasNext()) {
Statement statement = statements.next();
translate.put(statement.getSubject(), newResource);
}
Entry entry;
boolean entryCreated = false;
if (destinationEntryId.isEmpty()) {
entry = this.context.createResource(null, GraphType.Graph, ResourceType.InformationResource, destinationListURI);
entryCreated = true;
((ContextImpl) this.context).setMetadata(entry, "RDF Graph created at " + new Date(), null);
} else {
entry = this.context.get(destinationEntryId); //Try to fetch existing entry.
if (entry == null) {
entry = this.context.createResource(destinationEntryId, GraphType.Graph, ResourceType.InformationResource, destinationListURI);
entryCreated = true;
}
}
Model resourceGraph = this.translate(graph, translate);
((RDFResource) entry.getResource()).setGraph(resourceGraph);
Model subGraph = this.extract(resourceGraph, newResource, new HashSet<>(), new HashMap<>());
if (!subGraph.isEmpty()) {
entry.getLocalMetadata().setGraph(subGraph);
} else if (entryCreated) {
((ContextImpl) this.context).setMetadata(entry, "RDF Graph created at " + new Date(), null);
}
entries.add(entry);
return entries;
}
statements = graph.filter(null, mergeResourceId, null).iterator();
while (statements.hasNext()) {
Statement statement = statements.next();
String entryId = statement.getObject().stringValue();
URI uri = URISplit.createURI(
context.getEntry().getRepositoryManager().getRepositoryURL().toString(),
context.getEntry().getId(), RepositoryProperties.DATA_PATH, entryId);
Resource newRe = valueFactory.createIRI(uri.toString());
newResources.put(entryId, newRe);
oldResources.put(entryId, statement.getSubject());
translate.put(statement.getSubject(), newRe);
}
log.info("Found {} resources that will be updated/created.", oldResources.size());
int newResCounter = 0;
int updResCounter = 0;
Collection<Resource> ignore = newResources.values();
for (String entryId : newResources.keySet()) {
Model subGraph = this.extract(graph, oldResources.get(entryId), ignore, translate);
Entry entry = this.context.get(entryId); // Try to fetch existing entry.
if (entry == null) { // If none exists, create it.
entry = this.context.createResource(entryId, GraphType.None, ResourceType.NamedResource, null);
newResCounter++;
} else {
updResCounter++;
}
entry.getLocalMetadata().setGraph(subGraph);
entries.add(entry);
}
log.info("Updated {} existing entries and created {} new entries.", updResCounter, newResCounter);
log.info("Finished updating/creating entries in context {}.", this.context.getEntry().getId());
return entries;
}
private boolean checkPredicate(IRI predicate) {
return !mergeResourceId.equals(predicate) && !referenceResourceId.equals(predicate);
}
private void populateModel(Model model, Statement statement, Map<Value, Resource> translate) {
Resource subject = statement.getSubject();
IRI predicate = statement.getPredicate();
Value object = statement.getObject();
if (translate.get(subject) != null) {
subject = translate.get(subject);
}
if (translate.get(object) != null) {
object = translate.get(object);
}
model.add(subject, predicate, object);
}
private Model translate(Model from, Map<Value, Resource> translate) {
Model to = new LinkedHashModel();
for (Statement statement : from) {
if (checkPredicate(statement.getPredicate())) {
populateModel(to, statement, translate);
}
}
return to;
}
/**
* Extracts a smaller graph by starting from a given resource and collects all direct and indirect outgoing triples,
* only stopping when non-blank nodes or resources in the "ignore" set are encountered. It also replaces resources
* found in the "translate" map.
*
* @param from the graph to extract triples from
* @param subject the resource to start detecting triples from
* @param ignore a set of resources that when encountered no further triples (outgoing) from that resource should be included
* @param translate a map of resources, the keys should be replaced with the values when encountered.
* @return the extracted subgraph may be empty, not null.
*/
private Model extract(Model from, Resource subject, Collection<Resource> ignore, Map<Value, Resource> translate) {
Model to = new LinkedHashModel();
HashSet<Resource> collected = new HashSet<>(ignore);
this._extract(from, to, subject, collected, translate);
return to;
}
private void _extract(Model from, Model to, Resource resource, Set<Resource> collected, Map<Value, Resource> translate) {
for (Statement statement : from.filter(resource, null, null)) {
Value object = statement.getObject();
// Recursive step.
if (object instanceof BNode && !collected.contains((Resource) object)) {
collected.add((BNode) object);
this._extract(from, to, (BNode) object, collected, translate);
}
if (checkPredicate(statement.getPredicate())) {
populateModel(to, statement, translate);
}
}
}
}