Files
Athou_commafeed/src/main/java/com/commafeed/backend/HttpGetter.java

255 lines
9.0 KiB
Java
Raw Normal View History

2013-04-03 15:53:57 +02:00
package com.commafeed.backend;
2013-04-11 12:49:54 +02:00
import java.io.IOException;
2013-04-19 13:24:46 +02:00
import java.security.SecureRandom;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
2013-04-19 13:24:46 +02:00
2014-08-17 14:16:30 +02:00
import javax.inject.Inject;
import javax.inject.Singleton;
2013-04-19 13:24:46 +02:00
import javax.net.ssl.KeyManager;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
2013-04-11 12:49:54 +02:00
2014-08-19 00:56:21 +02:00
import lombok.Getter;
2014-08-17 14:16:30 +02:00
import lombok.RequiredArgsConstructor;
2013-08-11 11:45:32 +02:00
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
2013-11-05 15:10:09 +01:00
import org.apache.http.Consts;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
2013-04-03 15:53:57 +02:00
import org.apache.http.HttpEntity;
import org.apache.http.HttpException;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.HttpResponseInterceptor;
import org.apache.http.HttpStatus;
2013-04-11 12:49:54 +02:00
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpResponseException;
2013-11-05 15:10:09 +01:00
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
2013-04-03 15:53:57 +02:00
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
2013-11-05 15:10:09 +01:00
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.config.ConnectionConfig;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.entity.HttpEntityWrapper;
2013-11-05 15:10:09 +01:00
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.protocol.HttpContext;
2013-04-03 15:53:57 +02:00
import org.apache.http.util.EntityUtils;
import com.commafeed.CommaFeedConfiguration;
2014-08-14 16:19:06 +02:00
2013-07-26 16:00:02 +02:00
/**
2013-08-13 12:15:30 +02:00
* Smart HTTP getter: handles gzip, ssl, last modified and etag headers
2013-07-26 16:00:02 +02:00
*
*/
2013-08-11 11:45:32 +02:00
@Slf4j
2014-08-17 14:16:30 +02:00
@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
@Singleton
2013-04-03 15:53:57 +02:00
public class HttpGetter {
2013-07-02 16:17:34 +02:00
private static final String ACCEPT_LANGUAGE = "en";
private static final String PRAGMA_NO_CACHE = "No-cache";
private static final String CACHE_CONTROL_NO_CACHE = "no-cache";
private static final List<String> ALLOWED_CONTENT_ENCODINGS = Arrays.asList("gzip", "x-gzip", "deflate", "identity");
private static final HttpResponseInterceptor REMOVE_INCORRECT_CONTENT_ENCODING = new HttpResponseInterceptor() {
@Override
public void process(HttpResponse response, HttpContext context) throws HttpException, IOException {
HttpEntity entity = response.getEntity();
if (entity != null && entity.getContentLength() != 0) {
Header header = entity.getContentEncoding();
if (header != null) {
HeaderElement[] codecs = header.getElements();
for (final HeaderElement codec : codecs) {
String codecName = codec.getName().toLowerCase(Locale.US);
if (!ALLOWED_CONTENT_ENCODINGS.contains(codecName)) {
response.setEntity(new HttpEntityWrapper(entity) {
@Override
public Header getContentEncoding() {
return null;
};
});
}
}
}
}
}
};
2013-04-19 13:24:46 +02:00
private static SSLContext SSL_CONTEXT = null;
static {
try {
SSL_CONTEXT = SSLContext.getInstance("TLS");
2013-07-25 09:17:33 +02:00
SSL_CONTEXT.init(new KeyManager[0], new TrustManager[] { new DefaultTrustManager() }, new SecureRandom());
2013-04-19 13:24:46 +02:00
} catch (Exception e) {
log.error("Could not configure ssl context");
}
}
2014-08-17 14:16:30 +02:00
private String userAgent;
2014-08-14 16:19:06 +02:00
public HttpGetter(CommaFeedConfiguration config) {
this.userAgent = String.format("CommaFeed/%s (https://www.commafeed.com)", config.getVersion());
2014-08-14 16:19:06 +02:00
}
2013-07-25 09:17:33 +02:00
public HttpResult getBinary(String url, int timeout) throws ClientProtocolException, IOException, NotModifiedException {
return getBinary(url, null, null, timeout);
2013-04-03 15:53:57 +02:00
}
/**
*
* @param url
* the url to retrive
* @param lastModified
* header we got last time we queried that url, or null
* @param eTag
* header we got last time we queried that url, or null
* @return
* @throws ClientProtocolException
* @throws IOException
* @throws NotModifiedException
* if the url hasn't changed since we asked for it last time
*/
2013-07-25 09:17:33 +02:00
public HttpResult getBinary(String url, String lastModified, String eTag, int timeout) throws ClientProtocolException, IOException,
NotModifiedException {
HttpResult result = null;
2013-04-19 09:37:07 +02:00
long start = System.currentTimeMillis();
2013-04-03 15:53:57 +02:00
2013-11-05 15:10:09 +01:00
CloseableHttpClient client = newClient(timeout);
CloseableHttpResponse response = null;
2013-04-03 15:53:57 +02:00
try {
HttpGet httpget = new HttpGet(url);
HttpClientContext context = HttpClientContext.create();
2013-07-02 16:17:34 +02:00
httpget.addHeader(HttpHeaders.ACCEPT_LANGUAGE, ACCEPT_LANGUAGE);
httpget.addHeader(HttpHeaders.PRAGMA, PRAGMA_NO_CACHE);
httpget.addHeader(HttpHeaders.CACHE_CONTROL, CACHE_CONTROL_NO_CACHE);
2014-08-14 16:19:06 +02:00
httpget.addHeader(HttpHeaders.USER_AGENT, userAgent);
if (lastModified != null) {
httpget.addHeader(HttpHeaders.IF_MODIFIED_SINCE, lastModified);
}
if (eTag != null) {
httpget.addHeader(HttpHeaders.IF_NONE_MATCH, eTag);
}
try {
response = client.execute(httpget, context);
int code = response.getStatusLine().getStatusCode();
if (code == HttpStatus.SC_NOT_MODIFIED) {
2013-08-13 12:15:30 +02:00
throw new NotModifiedException("'304 - not modified' http code received");
} else if (code >= 300) {
2013-07-25 09:17:33 +02:00
throw new HttpResponseException(code, "Server returned HTTP error code " + code);
}
} catch (HttpResponseException e) {
if (e.getStatusCode() == HttpStatus.SC_NOT_MODIFIED) {
2013-08-13 12:15:30 +02:00
throw new NotModifiedException("'304 - not modified' http code received");
} else {
throw e;
}
}
2013-07-25 09:17:33 +02:00
Header lastModifiedHeader = response.getFirstHeader(HttpHeaders.LAST_MODIFIED);
String lastModifiedHeaderValue = lastModifiedHeader == null ? null : StringUtils.trimToNull(lastModifiedHeader.getValue());
if (lastModifiedHeaderValue != null && StringUtils.equals(lastModified, lastModifiedHeaderValue)) {
2013-07-03 07:56:52 +02:00
throw new NotModifiedException("lastModifiedHeader is the same");
}
Header eTagHeader = response.getFirstHeader(HttpHeaders.ETAG);
String eTagHeaderValue = eTagHeader == null ? null : StringUtils.trimToNull(eTagHeader.getValue());
if (eTag != null && StringUtils.equals(eTag, eTagHeaderValue)) {
2013-07-03 07:56:52 +02:00
throw new NotModifiedException("eTagHeader is the same");
}
2013-04-19 11:49:22 +02:00
HttpEntity entity = response.getEntity();
byte[] content = null;
2013-07-26 15:34:02 +02:00
String contentType = null;
2013-04-03 15:53:57 +02:00
if (entity != null) {
content = EntityUtils.toByteArray(entity);
2013-07-26 15:34:02 +02:00
if (entity.getContentType() != null) {
contentType = entity.getContentType().getValue();
}
2013-04-03 15:53:57 +02:00
}
2013-11-05 15:10:09 +01:00
HttpUriRequest req = (HttpUriRequest) context.getRequest();
HttpHost host = context.getTargetHost();
String urlAfterRedirect = req.getURI().isAbsolute() ? req.getURI().toString() : host.toURI() + req.getURI();
2013-04-19 13:24:46 +02:00
2013-04-19 09:37:07 +02:00
long duration = System.currentTimeMillis() - start;
result = new HttpResult(content, contentType, lastModifiedHeaderValue, eTagHeaderValue, duration, urlAfterRedirect);
2013-04-03 15:53:57 +02:00
} finally {
2013-11-05 15:10:09 +01:00
IOUtils.closeQuietly(response);
IOUtils.closeQuietly(client);
2013-04-03 15:53:57 +02:00
}
return result;
}
2014-08-19 00:56:21 +02:00
@Getter
@RequiredArgsConstructor
public static class HttpResult {
2014-08-19 00:56:21 +02:00
private final byte[] content;
private final String contentType;
private final String lastModifiedSince;
private final String eTag;
private final long duration;
private final String urlAfterRedirect;
}
2013-11-05 15:10:09 +01:00
public static CloseableHttpClient newClient(int timeout) {
HttpClientBuilder builder = HttpClients.custom();
builder.useSystemProperties();
builder.addInterceptorFirst(REMOVE_INCORRECT_CONTENT_ENCODING);
2013-11-05 15:10:09 +01:00
builder.disableAutomaticRetries();
builder.setSslcontext(SSL_CONTEXT);
builder.setHostnameVerifier(SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
RequestConfig.Builder configBuilder = RequestConfig.custom();
configBuilder.setCookieSpec(CookieSpecs.IGNORE_COOKIES);
configBuilder.setSocketTimeout(timeout);
configBuilder.setConnectTimeout(timeout);
configBuilder.setConnectionRequestTimeout(timeout);
builder.setDefaultRequestConfig(configBuilder.build());
builder.setDefaultConnectionConfig(ConnectionConfig.custom().setCharset(Consts.ISO_8859_1).build());
2013-11-05 15:10:09 +01:00
return builder.build();
2013-04-19 13:24:46 +02:00
}
public static class NotModifiedException extends Exception {
private static final long serialVersionUID = 1L;
2013-07-03 08:09:42 +02:00
public NotModifiedException(String message) {
2013-07-03 07:56:52 +02:00
super(message);
}
2013-04-03 15:53:57 +02:00
}
2013-04-19 13:24:46 +02:00
private static class DefaultTrustManager implements X509TrustManager {
@Override
2013-07-25 09:17:33 +02:00
public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
2013-04-19 13:24:46 +02:00
}
@Override
2013-07-25 09:17:33 +02:00
public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
2013-04-19 13:24:46 +02:00
}
@Override
public X509Certificate[] getAcceptedIssuers() {
return null;
}
}
2013-04-03 15:53:57 +02:00
}