package com.commafeed.backend; import java.io.IOException; import java.io.InputStream; import java.net.InetAddress; import java.net.URI; import java.net.UnknownHostException; import java.time.Duration; import java.time.Instant; import java.time.InstantSource; import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.concurrent.ExecutionException; import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; import org.apache.hc.client5.http.DnsResolver; import org.apache.hc.client5.http.SystemDefaultDnsResolver; import org.apache.hc.client5.http.config.ConnectionConfig; import org.apache.hc.client5.http.config.RequestConfig; import org.apache.hc.client5.http.config.TlsConfig; import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; import org.apache.hc.client5.http.impl.classic.HttpClientBuilder; import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager; import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManagerBuilder; import org.apache.hc.client5.http.io.HttpClientConnectionManager; import org.apache.hc.client5.http.protocol.HttpClientContext; import org.apache.hc.client5.http.protocol.RedirectLocations; import org.apache.hc.client5.http.utils.DateUtils; import org.apache.hc.core5.http.ClassicHttpRequest; import org.apache.hc.core5.http.Header; import org.apache.hc.core5.http.HttpEntity; import org.apache.hc.core5.http.HttpStatus; import org.apache.hc.core5.http.NameValuePair; import org.apache.hc.core5.http.io.support.ClassicRequestBuilder; import org.apache.hc.core5.http.message.BasicHeader; import org.apache.hc.core5.util.TimeValue; import org.apache.hc.core5.util.Timeout; import org.jboss.resteasy.reactive.common.headers.CacheControlDelegate; import com.codahale.metrics.MetricRegistry; import com.commafeed.CommaFeedConfiguration; import com.commafeed.CommaFeedConfiguration.HttpClientCache; import com.commafeed.CommaFeedVersion; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.collect.Iterables; import com.google.common.io.ByteStreams; import com.google.common.net.HttpHeaders; import jakarta.inject.Singleton; import jakarta.ws.rs.core.CacheControl; import lombok.Builder; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.Value; import lombok.extern.slf4j.Slf4j; import nl.altindag.ssl.SSLFactory; import nl.altindag.ssl.apache5.util.Apache5SslUtils; /** * Smart HTTP getter: handles gzip, ssl, last modified and etag headers */ @Singleton @Slf4j public class HttpGetter { private final CommaFeedConfiguration config; private final InstantSource instantSource; private final CloseableHttpClient client; private final Cache cache; private final DnsResolver dnsResolver = SystemDefaultDnsResolver.INSTANCE; public HttpGetter(CommaFeedConfiguration config, InstantSource instantSource, CommaFeedVersion version, MetricRegistry metrics) { this.config = config; this.instantSource = instantSource; PoolingHttpClientConnectionManager connectionManager = newConnectionManager(config); String userAgent = config.httpClient() .userAgent() .orElseGet(() -> String.format("CommaFeed/%s (https://github.com/Athou/commafeed)", version.getVersion())); this.client = newClient(connectionManager, userAgent, config.httpClient().idleConnectionsEvictionInterval()); this.cache = newCache(config); metrics.registerGauge(MetricRegistry.name(getClass(), "pool", "max"), () -> connectionManager.getTotalStats().getMax()); metrics.registerGauge(MetricRegistry.name(getClass(), "pool", "size"), () -> connectionManager.getTotalStats().getAvailable() + connectionManager.getTotalStats().getLeased()); metrics.registerGauge(MetricRegistry.name(getClass(), "pool", "leased"), () -> connectionManager.getTotalStats().getLeased()); metrics.registerGauge(MetricRegistry.name(getClass(), "pool", "pending"), () -> connectionManager.getTotalStats().getPending()); metrics.registerGauge(MetricRegistry.name(getClass(), "cache", "size"), () -> cache == null ? 0 : cache.size()); metrics.registerGauge(MetricRegistry.name(getClass(), "cache", "memoryUsage"), () -> cache == null ? 0 : cache.asMap().values().stream().mapToInt(e -> e.content != null ? e.content.length : 0).sum()); } public HttpResult get(String url) throws IOException, NotModifiedException, TooManyRequestsException, SchemeNotAllowedException, HostNotAllowedException { return get(HttpRequest.builder(url).build()); } public HttpResult get(HttpRequest request) throws IOException, NotModifiedException, TooManyRequestsException, SchemeNotAllowedException, HostNotAllowedException { URI uri = URI.create(request.getUrl()); ensureHttpScheme(uri.getScheme()); if (config.httpClient().blockLocalAddresses()) { ensurePublicAddress(uri.getHost()); } final HttpResponse response; if (cache == null) { response = invoke(request); } else { try { response = cache.get(request, () -> invoke(request)); } catch (ExecutionException e) { if (e.getCause() instanceof IOException ioe) { throw ioe; } else { throw new RuntimeException(e); } } } int code = response.getCode(); if (code == HttpStatus.SC_TOO_MANY_REQUESTS || code == HttpStatus.SC_SERVICE_UNAVAILABLE && response.getRetryAfter() != null) { throw new TooManyRequestsException(response.getRetryAfter()); } if (code == HttpStatus.SC_NOT_MODIFIED) { throw new NotModifiedException("'304 - not modified' http code received"); } if (code >= 300) { throw new HttpResponseException(code, "Server returned HTTP error code " + code); } String lastModifiedHeader = response.getLastModifiedHeader(); if (lastModifiedHeader != null && lastModifiedHeader.equals(request.getLastModified())) { throw new NotModifiedException("lastModifiedHeader is the same"); } String eTagHeader = response.getETagHeader(); if (eTagHeader != null && eTagHeader.equals(request.getETag())) { throw new NotModifiedException("eTagHeader is the same"); } Duration validFor = Optional.ofNullable(response.getCacheControl()) .filter(cc -> cc.getMaxAge() >= 0) .map(cc -> Duration.ofSeconds(cc.getMaxAge())) .orElse(Duration.ZERO); return new HttpResult(response.getContent(), response.getContentType(), lastModifiedHeader, eTagHeader, response.getUrlAfterRedirect(), validFor); } private void ensureHttpScheme(String scheme) throws SchemeNotAllowedException { if (!"http".equals(scheme) && !"https".equals(scheme)) { throw new SchemeNotAllowedException(scheme); } } private void ensurePublicAddress(String host) throws HostNotAllowedException, UnknownHostException { if (host == null) { throw new HostNotAllowedException(null); } InetAddress[] addresses = dnsResolver.resolve(host); if (Stream.of(addresses).anyMatch(this::isPrivateAddress)) { throw new HostNotAllowedException(host); } } private boolean isPrivateAddress(InetAddress address) { return address.isSiteLocalAddress() || address.isAnyLocalAddress() || address.isLinkLocalAddress() || address.isLoopbackAddress() || address.isMulticastAddress(); } private HttpResponse invoke(HttpRequest request) throws IOException { log.debug("fetching {}", request.getUrl()); HttpClientContext context = HttpClientContext.create(); context.setRequestConfig(RequestConfig.custom() .setResponseTimeout(Timeout.of(config.httpClient().responseTimeout())) // causes issues with some feeds // see https://github.com/Athou/commafeed/issues/1572 // and https://issues.apache.org/jira/browse/HTTPCLIENT-2344 .setProtocolUpgradeEnabled(false) .build()); return client.execute(request.toClassicHttpRequest(), context, resp -> { byte[] content = resp.getEntity() == null ? null : toByteArray(resp.getEntity(), config.httpClient().maxResponseSize().asLongValue()); int code = resp.getCode(); String lastModifiedHeader = Optional.ofNullable(resp.getFirstHeader(HttpHeaders.LAST_MODIFIED)) .map(NameValuePair::getValue) .map(StringUtils::trimToNull) .orElse(null); String eTagHeader = Optional.ofNullable(resp.getFirstHeader(HttpHeaders.ETAG)) .map(NameValuePair::getValue) .map(StringUtils::trimToNull) .orElse(null); CacheControl cacheControl = Optional.ofNullable(resp.getFirstHeader(HttpHeaders.CACHE_CONTROL)) .map(NameValuePair::getValue) .map(StringUtils::trimToNull) .map(HttpGetter::toCacheControl) .orElse(null); Instant retryAfter = Optional.ofNullable(resp.getFirstHeader(HttpHeaders.RETRY_AFTER)) .map(NameValuePair::getValue) .map(StringUtils::trimToNull) .map(this::toInstant) .orElse(null); String contentType = Optional.ofNullable(resp.getEntity()).map(HttpEntity::getContentType).orElse(null); String urlAfterRedirect = Optional.ofNullable(context.getRedirectLocations()) .map(RedirectLocations::getAll) .map(l -> Iterables.getLast(l, null)) .map(URI::toString) .orElse(request.getUrl()); return new HttpResponse(code, lastModifiedHeader, eTagHeader, cacheControl, retryAfter, content, contentType, urlAfterRedirect); }); } private static CacheControl toCacheControl(String headerValue) { try { return CacheControlDelegate.INSTANCE.fromString(headerValue); } catch (Exception e) { log.debug("Invalid Cache-Control header: {}", headerValue); return null; } } private Instant toInstant(String headerValue) { if (headerValue == null) { return null; } if (StringUtils.isNumeric(headerValue)) { return instantSource.instant().plusSeconds(Long.parseLong(headerValue)); } return DateUtils.parseStandardDate(headerValue); } private static byte[] toByteArray(HttpEntity entity, long maxBytes) throws IOException { if (entity.getContentLength() > maxBytes) { throw new IOException( "Response size (%s bytes) exceeds the maximum allowed size (%s bytes)".formatted(entity.getContentLength(), maxBytes)); } try (InputStream input = entity.getContent()) { if (input == null) { return null; } byte[] bytes = ByteStreams.limit(input, maxBytes).readAllBytes(); if (bytes.length == maxBytes) { throw new IOException("Response size exceeds the maximum allowed size (%s bytes)".formatted(maxBytes)); } return bytes; } } private PoolingHttpClientConnectionManager newConnectionManager(CommaFeedConfiguration config) { SSLFactory sslFactory = SSLFactory.builder().withUnsafeTrustMaterial().withUnsafeHostnameVerifier().build(); int poolSize = config.feedRefresh().httpThreads(); return PoolingHttpClientConnectionManagerBuilder.create() .setTlsSocketStrategy(Apache5SslUtils.toTlsSocketStrategy(sslFactory)) .setDefaultConnectionConfig(ConnectionConfig.custom() .setConnectTimeout(Timeout.of(config.httpClient().connectTimeout())) .setSocketTimeout(Timeout.of(config.httpClient().socketTimeout())) .setTimeToLive(Timeout.of(config.httpClient().connectionTimeToLive())) .build()) .setDefaultTlsConfig(TlsConfig.custom().setHandshakeTimeout(Timeout.of(config.httpClient().sslHandshakeTimeout())).build()) .setMaxConnPerRoute(poolSize) .setMaxConnTotal(poolSize) .setDnsResolver(dnsResolver) .build(); } private static CloseableHttpClient newClient(HttpClientConnectionManager connectionManager, String userAgent, Duration idleConnectionsEvictionInterval) { List
headers = new ArrayList<>(); headers.add(new BasicHeader(HttpHeaders.ACCEPT_LANGUAGE, "en")); headers.add(new BasicHeader(HttpHeaders.PRAGMA, "No-cache")); headers.add(new BasicHeader(HttpHeaders.CACHE_CONTROL, "no-cache")); return HttpClientBuilder.create() .useSystemProperties() .disableAutomaticRetries() .disableCookieManagement() .setUserAgent(userAgent) .setDefaultHeaders(headers) .setConnectionManager(connectionManager) .evictExpiredConnections() .evictIdleConnections(TimeValue.of(idleConnectionsEvictionInterval)) .build(); } private static Cache newCache(CommaFeedConfiguration config) { HttpClientCache cacheConfig = config.httpClient().cache(); if (!cacheConfig.enabled()) { return null; } return CacheBuilder.newBuilder() .weigher((HttpRequest key, HttpResponse value) -> value.getContent() != null ? value.getContent().length : 0) .maximumWeight(cacheConfig.maximumMemorySize().asLongValue()) .expireAfterWrite(cacheConfig.expiration()) .build(); } public static class SchemeNotAllowedException extends Exception { private static final long serialVersionUID = 1L; public SchemeNotAllowedException(String scheme) { super("Scheme not allowed: " + scheme); } } public static class HostNotAllowedException extends Exception { private static final long serialVersionUID = 1L; public HostNotAllowedException(String host) { super("Host not allowed: " + host); } } @Getter public static class NotModifiedException extends Exception { private static final long serialVersionUID = 1L; /** * if the value of this header changed, this is its new value */ private final String newLastModifiedHeader; /** * if the value of this header changed, this is its new value */ private final String newEtagHeader; public NotModifiedException(String message) { this(message, null, null); } public NotModifiedException(String message, String newLastModifiedHeader, String newEtagHeader) { super(message); this.newLastModifiedHeader = newLastModifiedHeader; this.newEtagHeader = newEtagHeader; } } @RequiredArgsConstructor @Getter public static class TooManyRequestsException extends Exception { private static final long serialVersionUID = 1L; private final Instant retryAfter; } @Getter public static class HttpResponseException extends IOException { private static final long serialVersionUID = 1L; private final int code; public HttpResponseException(int code, String message) { super(message); this.code = code; } } @Builder(builderMethodName = "") @EqualsAndHashCode @Getter public static class HttpRequest { private String url; private String lastModified; private String eTag; public static HttpRequestBuilder builder(String url) { return new HttpRequestBuilder().url(url); } public ClassicHttpRequest toClassicHttpRequest() { ClassicHttpRequest req = ClassicRequestBuilder.get(url).build(); if (lastModified != null) { req.addHeader(HttpHeaders.IF_MODIFIED_SINCE, lastModified); } if (eTag != null) { req.addHeader(HttpHeaders.IF_NONE_MATCH, eTag); } return req; } } @Value private static class HttpResponse { int code; String lastModifiedHeader; String eTagHeader; CacheControl cacheControl; Instant retryAfter; byte[] content; String contentType; String urlAfterRedirect; } @Value public static class HttpResult { byte[] content; String contentType; String lastModifiedSince; String eTag; String urlAfterRedirect; Duration validFor; } }