ProxyResource.java
/*
* Copyright (c) 2007-2017 MetaSolutions AB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.entrystore.rest.resources;
import com.google.common.base.Joiner;
import org.apache.commons.lang.StringEscapeUtils;
import org.entrystore.AuthorizationException;
import org.entrystore.Entry;
import org.entrystore.PrincipalManager;
import org.entrystore.repository.config.Settings;
import org.restlet.Client;
import org.restlet.Context;
import org.restlet.Request;
import org.restlet.Response;
import org.restlet.data.Method;
import org.restlet.data.Protocol;
import org.restlet.data.Reference;
import org.restlet.data.Status;
import org.restlet.representation.Representation;
import org.restlet.resource.Get;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This class provides support for proxying requests to web services on other
* servers.
*
* If a request with the URL-parameter fromFormat is received, the response is
* converted into RDF/JSON, otherwise the content is only proxied without conversion.
*
* @author Hannes Ebner
*/
public class ProxyResource extends BaseResource {
static Logger log = LoggerFactory.getLogger(ProxyResource.class);
private Client client;
private Response clientResponse;
private static List<String> whitelistAnon;
private final static List<Pattern> blacklistRegEx;
static {
blacklistRegEx = Arrays.asList(
Pattern.compile("localhost"), // localhost
Pattern.compile("(.+)\\.local"), // any local domains
Pattern.compile("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$"), // IPv4
Pattern.compile("^\\d$"), // IPv4
Pattern.compile(":") // IPv6
);
log.info("Proxy blacklist consists of following regular expressions: " + Joiner.on(", ").join(blacklistRegEx));
}
@Override
public void doInit() {
if (whitelistAnon == null) {
whitelistAnon = new ArrayList<>();
List<String> tmpWhitelistAnon = getRM().getConfiguration().getStringList(Settings.PROXY_WHITELIST_ANONYMOUS);
// we normalize the list to lower case and to not contain null
for (String domain : tmpWhitelistAnon) {
if (domain != null) {
whitelistAnon.add(domain.toLowerCase());
}
}
if (whitelistAnon.size() > 0) {
log.info("Proxy whitelist for guest users initialized with following domains: " +
Joiner.on(", ").join(whitelistAnon)+
"; Requests to other domains require authentication");
} else {
log.info("No domains provided for proxy whitelist; only authenticated users are allowed to perform proxy requests");
}
}
}
@Get
public Representation represent() {
String extResourceURL = null;
if (parameters.containsKey("url")) {
extResourceURL = URLDecoder.decode(parameters.get("url"), StandardCharsets.UTF_8);
}
if (extResourceURL == null) {
getResponse().setStatus(Status.CLIENT_ERROR_BAD_REQUEST);
return null;
}
if (contextId != null && context == null) {
getResponse().setStatus(Status.CLIENT_ERROR_NOT_FOUND);
return null;
}
// for /{context-id}/proxy only principals with read access may access the context's proxy resource
if (context != null && !canReadContextResource(context.getEntry())) {
getResponse().setStatus(Status.CLIENT_ERROR_FORBIDDEN);
return null;
} else {
// For /proxy in general: any user, including _guest may access hosts that are whitelisted,
// otherwise access is restricted to logged-in users.
// If the host is blacklisted, nobody is allowed to fetch the URL via the proxy
String host = null;
try {
host = new URI(extResourceURL).getHost().toLowerCase();
} catch (URISyntaxException | NullPointerException e) {
getResponse().setStatus(Status.CLIENT_ERROR_BAD_REQUEST);
return null;
}
if (!whitelistAnon.contains(host) && getPM().getGuestUser().getURI().equals(getPM().getAuthenticatedUserURI())) {
getResponse().setStatus(Status.CLIENT_ERROR_FORBIDDEN);
return null;
}
}
log.info("Received proxy request for " + extResourceURL);
clientResponse = getResourceFromURL(extResourceURL, 0);
Representation representation = null;
if (clientResponse != null && clientResponse.getStatus().isSuccess()) {
representation = clientResponse.getEntity();
getResponse().getHeaders().set("Content-Security-Policy", "script-src 'none'; form-action 'none';"); // XSS and SSRF protection
getResponse().setOnSent((request, response) -> {
try {
clientResponse.release();
client.stop();
client = null;
} catch (Exception e) {
log.error(e.getMessage());
}
});
if (Status.isConnectorError(clientResponse.getStatus().getCode())) {
log.debug("Proxy request to " + extResourceURL + " timed out");
getResponse().setStatus(Status.SERVER_ERROR_GATEWAY_TIMEOUT);
return null;
} else {
getResponse().setStatus(clientResponse.getStatus());
}
if (representation != null) {
return representation;
}
}
if (clientResponse == null) {
getResponse().setStatus(Status.CLIENT_ERROR_BAD_REQUEST);
} else {
getResponse().setStatus(clientResponse.getStatus());
}
return null;
}
private boolean canReadContextResource(Entry contextEntry) {
try {
getPM().checkAuthenticatedUserAuthorized(contextEntry, PrincipalManager.AccessProperty.ReadResource);
} catch (AuthorizationException ae) {
return false;
}
return true;
}
private Response getResourceFromURL(String url, int loopCount) {
String host;
try {
host = new URI(url).getHost();
} catch (URISyntaxException e) {
log.debug(e.getMessage());
Response errorResponse = new Response(new Request());
errorResponse.setStatus(Status.CLIENT_ERROR_BAD_REQUEST);
return errorResponse;
}
if (isBlacklisted(host)) {
Response errorResponse = new Response(new Request());
errorResponse.setStatus(Status.CLIENT_ERROR_FORBIDDEN);
return errorResponse;
}
if (loopCount > 15) {
log.warn("More than 15 redirect loops detected, aborting");
return null;
}
if (client == null) {
client = new Client(Arrays.asList(Protocol.HTTP, Protocol.HTTPS));
client.setContext(new Context());
client.getContext().getParameters().set("connectTimeout", "30000");
client.getContext().getParameters().set("socketConnectTimeoutMs", "30000");
client.getContext().getParameters().set("socketTimeout", "60000");
client.getContext().getParameters().set("readTimeout", "60000");
log.info("Initialized HTTP client for proxy requests");
}
Request request = new Request(Method.GET, url);
request.getClientInfo().setAcceptedMediaTypes(getRequest().getClientInfo().getAcceptedMediaTypes());
Response response = client.handle(request);
if (response.getStatus().isRedirection()) {
Reference ref = response.getLocationRef();
response.getEntity().release();
if (ref != null) {
String refURL = ref.getIdentifier();
log.debug("Request redirected to " + refURL);
return getResourceFromURL(refURL, ++loopCount);
}
}
if (response.getEntity() != null) {
if (response.getEntity().getLocationRef() != null && response.getEntity().getLocationRef().getBaseRef() == null) {
response.getEntity().getLocationRef().setBaseRef(url.substring(0, url.lastIndexOf("/") + 1));
}
}
return response;
}
private String getTitle(String htmlString) {
htmlString = htmlString.replaceAll("\\s+", " ");
Pattern p = Pattern.compile("<title>(.*?)</title>", Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(htmlString);
if (m.find()) {
return StringEscapeUtils.unescapeHtml(m.group(1)).trim();
}
return null;
}
private String getMetaValue(String metaName, String htmlString) {
htmlString = htmlString.replaceAll("\\s+", " ");
Pattern p = Pattern.compile("<meta name=\"" + metaName + "\" content=\"(.*?)\" />", Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(htmlString);
if (m.find()) {
return StringEscapeUtils.unescapeHtml(m.group(1)).trim();
}
return null;
}
private String getDescription(String htmlString) {
String[] lines = htmlString.split("\\r?\\n");
for (String s : lines) {
String description = getMetaValue("description", s);
if (description != null) {
return description;
}
}
return null;
}
private Set<String> getKeywords(String htmlString) {
Set<String> result = new HashSet<String>();
String[] lines = htmlString.split("\\r?\\n");
for (String s : lines) {
String keywords = getMetaValue("keywords", s);
if (keywords != null) {
for (String st : keywords.split(",")) {
result.add(st.trim());
}
}
}
return result;
}
private boolean isBlacklisted(String host) {
host = host.toLowerCase();
for (Pattern p : blacklistRegEx) {
if (p.matcher(host).find()) {
return true;
}
}
// all hosts that do not resolve into a "regular" Unicast address are automatically
// blacklisted, among other reasons to avoid access to local networks
try {
InetAddress ia = InetAddress.getByName(host);
if (ia.isAnyLocalAddress() ||
ia.isSiteLocalAddress() ||
ia.isLoopbackAddress() ||
ia.isLinkLocalAddress() ||
ia.isMulticastAddress()) {
return true;
}
} catch (UnknownHostException e) {
log.warn(e.getMessage());
return true;
}
return false;
}
}