OAI_DC2RDFGraphConverter.java

/*
 * Copyright (c) 2007-2024 MetaSolutions AB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.entrystore.impl.converters;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.impl.LinkedHashModel;
import org.entrystore.Converter;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import java.net.URI;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

import static org.eclipse.rdf4j.model.util.Values.iri;
import static org.eclipse.rdf4j.model.util.Values.literal;


public class OAI_DC2RDFGraphConverter implements Converter {

	private static final Log log = LogFactory.getLog(OAI_DC2RDFGraphConverter.class);

	static Map<String, Locale> localeMap;

	static {
		String[] languages = Locale.getISOLanguages();
		localeMap = new HashMap<>(languages.length);
		for (String language : languages) {
			Locale locale = new Locale.Builder().setLanguage(language).build();
			localeMap.put(locale.getISO3Language(), locale);
		}
	}

	/**
	 * Converts an oai_dc xml document tag metadata to a graph.
	 *
	 * @param from An XML Node.
	 * @param resourceURI Root URI of the resource's metadata.
	 * @return the new metadata graph Model.
	 */
	public Model convertToModel(Node from, URI resourceURI) {
		NodeList metadataList;

		if (from != null && from.getChildNodes().getLength() > 0) {
			metadataList = from.getChildNodes();
		} else {
			log.warn("Unable to convert Node to Model graph, as the Node is null or has empty childNodes");
			return null;
		}

		Model model = new LinkedHashModel();
		IRI root = iri(resourceURI.toString());

		for (int i = 0; i < metadataList.getLength(); i++) {
			Node n = metadataList.item(i);
			if (n == null || "#text".equals(n.getNodeName())) {
				continue;
			}

			String nodeNS = n.getNamespaceURI();
			String nodeName = n.getNodeName();
			String predicate;
			if (nodeName.contains(":") && (nodeNS != null)) {
				nodeName = nodeName.substring(nodeName.indexOf(":") + 1);
				predicate = nodeNS + nodeName;
			} else {
				predicate = nodeName;
			}
			String nodeContent = n.getTextContent();
			if (nodeContent == null) {
				continue;
			}
			nodeContent = nodeContent.trim();

			// fix to create a valid language literal with a 2-letter ISO code
			// this is about the language value as a literal, attributes are treated further down
			if ("language".equalsIgnoreCase(nodeName)) {
				// convert 3-letter to 2-letter ISO code
				if (nodeContent.length() == 3) {
					nodeContent = getISO2Language(nodeContent);

					if (nodeContent == null) {
						continue;
					}
				}
			}
			// <- fix

			// fix to convert ISO 3-letter lang codes to 2-letter codes
			// this is about LangStrings in general
			NamedNodeMap nodeAttributes = n.getAttributes();
			String lang = null;
			if (nodeAttributes != null) {
				Node langNode = nodeAttributes.getNamedItem("xml:lang");
				if (langNode != null) {
					lang = langNode.getNodeValue();
					if (lang != null) {
						lang = lang.trim();
						if (lang.length() == 3) {
							lang = getISO2Language(lang.toLowerCase());
						}
					}
				}
			}
			// <- fix

			Literal lit;
			if (lang != null) {
				lit = literal(nodeContent, lang);
			} else {
				lit = literal(nodeContent);
			}

			model.add(root, iri(predicate), lit);
		}

		return model;
	}

	private String getISO2Language(String iso3Language) {
		if (localeMap == null) {
			String[] languages = Locale.getISOLanguages();
			localeMap = new HashMap<>(languages.length);
			for (String language : languages) {
				Locale locale = new Locale.Builder().setLanguage(language).build();
				localeMap.put(locale.getISO3Language(), locale);
			}
		}

		Locale locale = localeMap.get(iso3Language);
		if (locale != null) {
			return locale.getLanguage();
		}

		return null;
	}

}