MetadataUtil.java

/*
 * Copyright (c) 2007-2024 MetaSolutions AB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.entrystore.repository.util;

import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.base.CoreDatatype;
import org.eclipse.rdf4j.rio.RDFFormat;

import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * @author Hannes Ebner
 */
public class MetadataUtil {

	public static final String INTEGER_TYPE = "integer";
	public static final String DATE_TYPE = "date";
	public static final String STRING_TYPE = "string";

	public static Set<CoreDatatype> integerDataTypes;

	public static Set<CoreDatatype> dateDataTypes;

	public static Set<CoreDatatype> stringDataTypes;

	static {
		integerDataTypes = new HashSet<>();
		integerDataTypes.add(CoreDatatype.XSD.BYTE);
		integerDataTypes.add(CoreDatatype.XSD.INT);
		integerDataTypes.add(CoreDatatype.XSD.INTEGER);
		integerDataTypes.add(CoreDatatype.XSD.LONG);
		integerDataTypes.add(CoreDatatype.XSD.NEGATIVE_INTEGER);
		integerDataTypes.add(CoreDatatype.XSD.NON_NEGATIVE_INTEGER);
		integerDataTypes.add(CoreDatatype.XSD.NON_POSITIVE_INTEGER);
		integerDataTypes.add(CoreDatatype.XSD.POSITIVE_INTEGER);
		integerDataTypes.add(CoreDatatype.XSD.SHORT);
		integerDataTypes.add(CoreDatatype.XSD.UNSIGNED_LONG);
		integerDataTypes.add(CoreDatatype.XSD.UNSIGNED_INT);
		integerDataTypes.add(CoreDatatype.XSD.UNSIGNED_SHORT);
		integerDataTypes.add(CoreDatatype.XSD.UNSIGNED_BYTE);
		integerDataTypes.add(CoreDatatype.XSD.GYEAR);

		dateDataTypes = new HashSet<>();
		dateDataTypes.add(CoreDatatype.XSD.DATE);
		dateDataTypes.add(CoreDatatype.XSD.DATETIME);
		dateDataTypes.add(CoreDatatype.XSD.GYEAR);
		dateDataTypes.add(CoreDatatype.XSD.GYEARMONTH);
		dateDataTypes.add(CoreDatatype.XSD.GDAY);
		dateDataTypes.add(CoreDatatype.XSD.GMONTH);
		dateDataTypes.add(CoreDatatype.XSD.DATETIMESTAMP);

		stringDataTypes = new HashSet<>();
		stringDataTypes.add(CoreDatatype.RDF.LANGSTRING);
		stringDataTypes.add(CoreDatatype.XSD.STRING);
	}

	/**
	 * Filters all invalid XML characters out of the string.
	 *
	 * @param s
	 *            string to be filtered.
	 * @return A valid XML string.
	 */
	/* NEVER USED
	public static String removeInvalidXMLCharacters(String s) {
		StringBuilder out = new StringBuilder(); // Used to hold the output.
		// used to reference the current char
		int codePoint;

		// unicode character, represented by two code units
		// String ss = "\ud801\udc00";
		// System.out.println(ss.codePointCount(0, ss.length())); // See: 1

		int i = 0;
		while (i < s.length()) {
			// unicode code of the character
			codePoint = s.codePointAt(i);
			if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD)
					|| ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
					|| ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
					|| ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {
				out.append(Character.toChars(codePoint));
			}
			// increment with number of code units (java chars) needed to
			// represent a unicode char
			i += Character.charCount(codePoint);
		}

		return out.toString();
	}
	*/

	public static boolean isIntegerLiteral(Literal l) {
		return isTypedLiteral(l, INTEGER_TYPE);
	}

	public static boolean isDateLiteral(Literal l) {
		return isTypedLiteral(l, DATE_TYPE);
	}

	public static boolean isStringLiteral(Literal l) {
		return isTypedLiteral(l, STRING_TYPE);
	}

	private static boolean isTypedLiteral(Literal l, String type) {

		if (l == null) {
			throw new IllegalArgumentException("Literal must not be null.");
		}

		CoreDatatype datatype = CoreDatatype.from(l.getDatatype());

		if (datatype == CoreDatatype.XSD.NONE) {
			return false;
		}

		return switch (type) {
			case INTEGER_TYPE -> integerDataTypes.contains(datatype);
			case DATE_TYPE -> dateDataTypes.contains(datatype);
			default -> stringDataTypes.contains(datatype);
		};
	}

	public static RDFFormat getRDFFormat(String formatString) {
		List<RDFFormat> allFormats = Arrays.asList(
			RDFFormat.RDFXML,
			RDFFormat.NTRIPLES,
			RDFFormat.TURTLE,
			RDFFormat.N3,
			RDFFormat.TRIX,
			RDFFormat.TRIG,
			RDFFormat.BINARY,
			RDFFormat.NQUADS,
			RDFFormat.JSONLD,
			RDFFormat.RDFJSON,
			RDFFormat.RDFA);

		for (RDFFormat format : allFormats) {
			if (format.getName().equalsIgnoreCase(formatString)) {
				return format;
			}
		}

		return null;
	}

}