Files
Athou_commafeed/commafeed-server/src/main/java/com/commafeed/backend/HttpGetter.java

260 lines
9.8 KiB
Java
Raw Normal View History

2013-04-03 15:53:57 +02:00
package com.commafeed.backend;
2013-04-11 12:49:54 +02:00
import java.io.IOException;
import java.io.InputStream;
2023-12-25 19:41:14 +01:00
import java.net.URI;
2024-08-16 21:12:54 +02:00
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
2013-04-19 13:24:46 +02:00
2014-10-28 16:36:09 +01:00
import org.apache.commons.lang3.StringUtils;
2023-12-25 19:41:14 +01:00
import org.apache.hc.client5.http.config.ConnectionConfig;
import org.apache.hc.client5.http.config.RequestConfig;
2024-08-16 14:40:16 +02:00
import org.apache.hc.client5.http.config.TlsConfig;
2023-12-25 19:41:14 +01:00
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.HttpClientBuilder;
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager;
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManagerBuilder;
import org.apache.hc.client5.http.io.HttpClientConnectionManager;
2023-12-25 19:41:14 +01:00
import org.apache.hc.client5.http.protocol.HttpClientContext;
import org.apache.hc.client5.http.protocol.RedirectLocations;
import org.apache.hc.core5.http.ClassicHttpRequest;
import org.apache.hc.core5.http.Header;
import org.apache.hc.core5.http.HttpEntity;
2024-08-07 08:10:14 +02:00
import org.apache.hc.core5.http.HttpStatus;
2023-12-25 19:41:14 +01:00
import org.apache.hc.core5.http.NameValuePair;
import org.apache.hc.core5.http.io.support.ClassicRequestBuilder;
import org.apache.hc.core5.http.message.BasicHeader;
import org.apache.hc.core5.util.TimeValue;
import org.apache.hc.core5.util.Timeout;
2013-04-03 15:53:57 +02:00
import com.codahale.metrics.MetricRegistry;
import com.commafeed.CommaFeedConfiguration;
2024-08-07 08:10:14 +02:00
import com.commafeed.CommaFeedVersion;
import com.google.common.collect.Iterables;
import com.google.common.io.ByteStreams;
import com.google.common.net.HttpHeaders;
2023-12-17 14:11:15 +01:00
import jakarta.inject.Singleton;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import nl.altindag.ssl.SSLFactory;
2023-12-25 19:41:14 +01:00
import nl.altindag.ssl.apache5.util.Apache5SslUtils;
2013-07-26 16:00:02 +02:00
/**
2013-08-13 12:15:30 +02:00
* Smart HTTP getter: handles gzip, ssl, last modified and etag headers
2013-07-26 16:00:02 +02:00
*/
2014-08-17 14:16:30 +02:00
@Singleton
@Slf4j
public class HttpGetter {
2024-08-16 21:12:54 +02:00
private final CommaFeedConfiguration config;
private final CloseableHttpClient client;
2024-08-07 08:10:14 +02:00
public HttpGetter(CommaFeedConfiguration config, CommaFeedVersion version, MetricRegistry metrics) {
2024-08-16 21:12:54 +02:00
this.config = config;
PoolingHttpClientConnectionManager connectionManager = newConnectionManager(config);
String userAgent = config.httpClient()
2024-08-07 08:10:14 +02:00
.userAgent()
.orElseGet(() -> String.format("CommaFeed/%s (https://github.com/Athou/commafeed)", version.getVersion()));
2024-08-16 21:12:54 +02:00
this.client = newClient(connectionManager, userAgent, config.httpClient().idleConnectionsEvictionInterval());
metrics.registerGauge(MetricRegistry.name(getClass(), "pool", "max"), () -> connectionManager.getTotalStats().getMax());
2024-08-05 09:05:01 +02:00
metrics.registerGauge(MetricRegistry.name(getClass(), "pool", "size"),
() -> connectionManager.getTotalStats().getAvailable() + connectionManager.getTotalStats().getLeased());
metrics.registerGauge(MetricRegistry.name(getClass(), "pool", "leased"), () -> connectionManager.getTotalStats().getLeased());
metrics.registerGauge(MetricRegistry.name(getClass(), "pool", "pending"), () -> connectionManager.getTotalStats().getPending());
}
2024-08-16 21:12:54 +02:00
public HttpResult getBinary(String url) throws IOException, NotModifiedException {
return getBinary(url, null, null);
2013-04-03 15:53:57 +02:00
}
/**
* @param url
* the url to retrive
* @param lastModified
* header we got last time we queried that url, or null
* @param eTag
* header we got last time we queried that url, or null
* @throws NotModifiedException
* if the url hasn't changed since we asked for it last time
*/
2024-08-16 21:12:54 +02:00
public HttpResult getBinary(String url, String lastModified, String eTag) throws IOException, NotModifiedException {
log.debug("fetching {}", url);
2013-04-03 15:53:57 +02:00
2023-12-25 19:41:14 +01:00
ClassicHttpRequest request = ClassicRequestBuilder.get(url).build();
if (lastModified != null) {
request.addHeader(HttpHeaders.IF_MODIFIED_SINCE, lastModified);
}
if (eTag != null) {
request.addHeader(HttpHeaders.IF_NONE_MATCH, eTag);
}
2013-11-05 15:10:09 +01:00
HttpClientContext context = HttpClientContext.create();
2024-08-16 21:12:54 +02:00
context.setRequestConfig(RequestConfig.custom().setResponseTimeout(Timeout.of(config.httpClient().responseTimeout())).build());
2023-12-25 19:41:14 +01:00
HttpResponse response = client.execute(request, context, resp -> {
2024-08-16 21:12:54 +02:00
byte[] content = resp.getEntity() == null ? null
: toByteArray(resp.getEntity(), config.httpClient().maxResponseSize().asLongValue());
2023-12-25 19:41:14 +01:00
int code = resp.getCode();
String lastModifiedHeader = Optional.ofNullable(resp.getFirstHeader(HttpHeaders.LAST_MODIFIED))
.map(NameValuePair::getValue)
.map(StringUtils::trimToNull)
.orElse(null);
2023-12-25 19:41:14 +01:00
String eTagHeader = Optional.ofNullable(resp.getFirstHeader(HttpHeaders.ETAG))
.map(NameValuePair::getValue)
.map(StringUtils::trimToNull)
.orElse(null);
2023-12-25 19:41:14 +01:00
String contentType = Optional.ofNullable(resp.getEntity()).map(HttpEntity::getContentType).orElse(null);
String urlAfterRedirect = Optional.ofNullable(context.getRedirectLocations())
.map(RedirectLocations::getAll)
.map(l -> Iterables.getLast(l, null))
.map(URI::toString)
.orElse(url);
return new HttpResponse(code, lastModifiedHeader, eTagHeader, content, contentType, urlAfterRedirect);
});
int code = response.getCode();
2024-08-07 08:10:14 +02:00
if (code == HttpStatus.SC_NOT_MODIFIED) {
2023-12-25 19:41:14 +01:00
throw new NotModifiedException("'304 - not modified' http code received");
} else if (code >= 300) {
throw new HttpResponseException(code, "Server returned HTTP error code " + code);
}
2023-12-25 19:41:14 +01:00
String lastModifiedHeader = response.getLastModifiedHeader();
if (lastModifiedHeader != null && lastModifiedHeader.equals(lastModified)) {
throw new NotModifiedException("lastModifiedHeader is the same");
}
String eTagHeader = response.getETagHeader();
if (eTagHeader != null && eTagHeader.equals(eTag)) {
throw new NotModifiedException("eTagHeader is the same");
}
2024-08-16 21:12:54 +02:00
return new HttpResult(response.getContent(), response.getContentType(), lastModifiedHeader, eTagHeader,
2023-12-25 19:41:14 +01:00
response.getUrlAfterRedirect());
}
2013-11-05 15:10:09 +01:00
private static byte[] toByteArray(HttpEntity entity, long maxBytes) throws IOException {
if (entity.getContentLength() > maxBytes) {
throw new IOException(
"Response size (%s bytes) exceeds the maximum allowed size (%s bytes)".formatted(entity.getContentLength(), maxBytes));
}
try (InputStream input = entity.getContent()) {
if (input == null) {
return null;
}
byte[] bytes = ByteStreams.limit(input, maxBytes).readAllBytes();
if (bytes.length == maxBytes) {
throw new IOException("Response size exceeds the maximum allowed size (%s bytes)".formatted(maxBytes));
}
return bytes;
}
}
2024-08-16 21:12:54 +02:00
private static PoolingHttpClientConnectionManager newConnectionManager(CommaFeedConfiguration config) {
SSLFactory sslFactory = SSLFactory.builder().withUnsafeTrustMaterial().withUnsafeHostnameVerifier().build();
2024-08-16 21:12:54 +02:00
int poolSize = config.feedRefresh().httpThreads();
return PoolingHttpClientConnectionManagerBuilder.create()
2023-12-25 19:41:14 +01:00
.setSSLSocketFactory(Apache5SslUtils.toSocketFactory(sslFactory))
2024-08-16 21:12:54 +02:00
.setDefaultConnectionConfig(ConnectionConfig.custom()
.setConnectTimeout(Timeout.of(config.httpClient().connectTimeout()))
.setSocketTimeout(Timeout.of(config.httpClient().socketTimeout()))
.setTimeToLive(Timeout.of(config.httpClient().connectionTimeToLive()))
.build())
.setDefaultTlsConfig(TlsConfig.custom().setHandshakeTimeout(Timeout.of(config.httpClient().sslHandshakeTimeout())).build())
2023-12-25 19:41:14 +01:00
.setMaxConnPerRoute(poolSize)
.setMaxConnTotal(poolSize)
.build();
}
2024-08-16 21:12:54 +02:00
private static CloseableHttpClient newClient(HttpClientConnectionManager connectionManager, String userAgent,
Duration idleConnectionsEvictionInterval) {
List<Header> headers = new ArrayList<>();
headers.add(new BasicHeader(HttpHeaders.ACCEPT_LANGUAGE, "en"));
headers.add(new BasicHeader(HttpHeaders.PRAGMA, "No-cache"));
headers.add(new BasicHeader(HttpHeaders.CACHE_CONTROL, "no-cache"));
return HttpClientBuilder.create()
.useSystemProperties()
.disableAutomaticRetries()
.disableCookieManagement()
.setUserAgent(userAgent)
.setDefaultHeaders(headers)
2023-12-25 19:41:14 +01:00
.setConnectionManager(connectionManager)
2024-08-05 08:41:12 +02:00
.evictExpiredConnections()
2024-08-16 21:12:54 +02:00
.evictIdleConnections(TimeValue.of(idleConnectionsEvictionInterval))
.build();
2013-04-19 13:24:46 +02:00
}
@Getter
public static class NotModifiedException extends Exception {
private static final long serialVersionUID = 1L;
2013-07-03 08:09:42 +02:00
/**
* if the value of this header changed, this is its new value
*/
private final String newLastModifiedHeader;
/**
* if the value of this header changed, this is its new value
*/
private final String newEtagHeader;
2013-07-03 08:09:42 +02:00
public NotModifiedException(String message) {
this(message, null, null);
}
public NotModifiedException(String message, String newLastModifiedHeader, String newEtagHeader) {
2013-07-03 07:56:52 +02:00
super(message);
this.newLastModifiedHeader = newLastModifiedHeader;
this.newEtagHeader = newEtagHeader;
2013-07-03 07:56:52 +02:00
}
}
@Getter
public static class HttpResponseException extends IOException {
private static final long serialVersionUID = 1L;
private final int code;
public HttpResponseException(int code, String message) {
super(message);
this.code = code;
}
2013-04-03 15:53:57 +02:00
}
2013-04-19 13:24:46 +02:00
2023-12-25 19:41:14 +01:00
@Getter
@RequiredArgsConstructor
private static class HttpResponse {
private final int code;
private final String lastModifiedHeader;
private final String eTagHeader;
private final byte[] content;
private final String contentType;
private final String urlAfterRedirect;
}
@Getter
@RequiredArgsConstructor
public static class HttpResult {
private final byte[] content;
private final String contentType;
private final String lastModifiedSince;
private final String eTag;
private final String urlAfterRedirect;
2013-04-19 13:24:46 +02:00
}
2018-02-06 15:17:37 +01:00
2013-04-03 15:53:57 +02:00
}