mirror of
https://github.com/Athou/commafeed.git
synced 2026-03-21 21:37:29 +00:00
add http cache to avoid fetching feeds too often (#1431)
This commit is contained in:
@@ -137,6 +137,33 @@ public interface CommaFeedConfiguration {
|
||||
*/
|
||||
@WithDefault("5M")
|
||||
MemorySize maxResponseSize();
|
||||
|
||||
/**
|
||||
* HTTP client cache configuration
|
||||
*/
|
||||
@ConfigDocSection
|
||||
HttpClientCache cache();
|
||||
}
|
||||
|
||||
interface HttpClientCache {
|
||||
/**
|
||||
* Whether to enable the cache. This cache is used to avoid spamming feeds too often (e.g. when subscribing to a feed for the first
|
||||
* time or when clicking "fetch all my feeds now").
|
||||
*/
|
||||
@WithDefault("true")
|
||||
boolean enabled();
|
||||
|
||||
/**
|
||||
* Maximum amount of memory the cache can use.
|
||||
*/
|
||||
@WithDefault("10M")
|
||||
MemorySize maximumMemorySize();
|
||||
|
||||
/**
|
||||
* Duration after which an entry is removed from the cache.
|
||||
*/
|
||||
@WithDefault("1m")
|
||||
Duration expiration();
|
||||
}
|
||||
|
||||
interface FeedRefresh {
|
||||
|
||||
@@ -7,6 +7,7 @@ import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hc.client5.http.config.ConnectionConfig;
|
||||
@@ -31,14 +32,19 @@ import org.apache.hc.core5.util.Timeout;
|
||||
|
||||
import com.codahale.metrics.MetricRegistry;
|
||||
import com.commafeed.CommaFeedConfiguration;
|
||||
import com.commafeed.CommaFeedConfiguration.HttpClientCache;
|
||||
import com.commafeed.CommaFeedVersion;
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.io.ByteStreams;
|
||||
import com.google.common.net.HttpHeaders;
|
||||
|
||||
import jakarta.inject.Singleton;
|
||||
import lombok.Builder;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.Value;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import nl.altindag.ssl.SSLFactory;
|
||||
import nl.altindag.ssl.apache5.util.Apache5SslUtils;
|
||||
@@ -52,6 +58,7 @@ public class HttpGetter {
|
||||
|
||||
private final CommaFeedConfiguration config;
|
||||
private final CloseableHttpClient client;
|
||||
private final Cache<HttpRequest, HttpResponse> cache;
|
||||
|
||||
public HttpGetter(CommaFeedConfiguration config, CommaFeedVersion version, MetricRegistry metrics) {
|
||||
this.config = config;
|
||||
@@ -62,42 +69,66 @@ public class HttpGetter {
|
||||
.orElseGet(() -> String.format("CommaFeed/%s (https://github.com/Athou/commafeed)", version.getVersion()));
|
||||
|
||||
this.client = newClient(connectionManager, userAgent, config.httpClient().idleConnectionsEvictionInterval());
|
||||
this.cache = newCache(config);
|
||||
|
||||
metrics.registerGauge(MetricRegistry.name(getClass(), "pool", "max"), () -> connectionManager.getTotalStats().getMax());
|
||||
metrics.registerGauge(MetricRegistry.name(getClass(), "pool", "size"),
|
||||
() -> connectionManager.getTotalStats().getAvailable() + connectionManager.getTotalStats().getLeased());
|
||||
metrics.registerGauge(MetricRegistry.name(getClass(), "pool", "leased"), () -> connectionManager.getTotalStats().getLeased());
|
||||
metrics.registerGauge(MetricRegistry.name(getClass(), "pool", "pending"), () -> connectionManager.getTotalStats().getPending());
|
||||
metrics.registerGauge(MetricRegistry.name(getClass(), "cache", "size"), () -> cache == null ? 0 : cache.size());
|
||||
metrics.registerGauge(MetricRegistry.name(getClass(), "cache", "memoryUsage"),
|
||||
() -> cache == null ? 0 : cache.asMap().values().stream().mapToInt(e -> e.content != null ? e.content.length : 0).sum());
|
||||
}
|
||||
|
||||
public HttpResult getBinary(String url) throws IOException, NotModifiedException {
|
||||
return getBinary(url, null, null);
|
||||
public HttpResult get(String url) throws IOException, NotModifiedException {
|
||||
return get(HttpRequest.builder(url).build());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param url
|
||||
* the url to retrive
|
||||
* @param lastModified
|
||||
* header we got last time we queried that url, or null
|
||||
* @param eTag
|
||||
* header we got last time we queried that url, or null
|
||||
* @throws NotModifiedException
|
||||
* if the url hasn't changed since we asked for it last time
|
||||
*/
|
||||
public HttpResult getBinary(String url, String lastModified, String eTag) throws IOException, NotModifiedException {
|
||||
log.debug("fetching {}", url);
|
||||
public HttpResult get(HttpRequest request) throws IOException, NotModifiedException {
|
||||
final HttpResponse response;
|
||||
if (cache == null) {
|
||||
response = invoke(request);
|
||||
} else {
|
||||
try {
|
||||
response = cache.get(request, () -> invoke(request));
|
||||
} catch (ExecutionException e) {
|
||||
if (e.getCause() instanceof IOException ioe) {
|
||||
throw ioe;
|
||||
} else {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ClassicHttpRequest request = ClassicRequestBuilder.get(url).build();
|
||||
if (lastModified != null) {
|
||||
request.addHeader(HttpHeaders.IF_MODIFIED_SINCE, lastModified);
|
||||
int code = response.getCode();
|
||||
if (code == HttpStatus.SC_NOT_MODIFIED) {
|
||||
throw new NotModifiedException("'304 - not modified' http code received");
|
||||
} else if (code >= 300) {
|
||||
throw new HttpResponseException(code, "Server returned HTTP error code " + code);
|
||||
}
|
||||
if (eTag != null) {
|
||||
request.addHeader(HttpHeaders.IF_NONE_MATCH, eTag);
|
||||
|
||||
String lastModifiedHeader = response.getLastModifiedHeader();
|
||||
if (lastModifiedHeader != null && lastModifiedHeader.equals(request.getLastModified())) {
|
||||
throw new NotModifiedException("lastModifiedHeader is the same");
|
||||
}
|
||||
|
||||
String eTagHeader = response.getETagHeader();
|
||||
if (eTagHeader != null && eTagHeader.equals(request.getETag())) {
|
||||
throw new NotModifiedException("eTagHeader is the same");
|
||||
}
|
||||
|
||||
return new HttpResult(response.getContent(), response.getContentType(), lastModifiedHeader, eTagHeader,
|
||||
response.getUrlAfterRedirect());
|
||||
}
|
||||
|
||||
private HttpResponse invoke(HttpRequest request) throws IOException {
|
||||
log.debug("fetching {}", request.getUrl());
|
||||
|
||||
HttpClientContext context = HttpClientContext.create();
|
||||
context.setRequestConfig(RequestConfig.custom().setResponseTimeout(Timeout.of(config.httpClient().responseTimeout())).build());
|
||||
HttpResponse response = client.execute(request, context, resp -> {
|
||||
|
||||
return client.execute(request.toClassicHttpRequest(), context, resp -> {
|
||||
byte[] content = resp.getEntity() == null ? null
|
||||
: toByteArray(resp.getEntity(), config.httpClient().maxResponseSize().asLongValue());
|
||||
int code = resp.getCode();
|
||||
@@ -115,30 +146,10 @@ public class HttpGetter {
|
||||
.map(RedirectLocations::getAll)
|
||||
.map(l -> Iterables.getLast(l, null))
|
||||
.map(URI::toString)
|
||||
.orElse(url);
|
||||
.orElse(request.getUrl());
|
||||
|
||||
return new HttpResponse(code, lastModifiedHeader, eTagHeader, content, contentType, urlAfterRedirect);
|
||||
});
|
||||
|
||||
int code = response.getCode();
|
||||
if (code == HttpStatus.SC_NOT_MODIFIED) {
|
||||
throw new NotModifiedException("'304 - not modified' http code received");
|
||||
} else if (code >= 300) {
|
||||
throw new HttpResponseException(code, "Server returned HTTP error code " + code);
|
||||
}
|
||||
|
||||
String lastModifiedHeader = response.getLastModifiedHeader();
|
||||
if (lastModifiedHeader != null && lastModifiedHeader.equals(lastModified)) {
|
||||
throw new NotModifiedException("lastModifiedHeader is the same");
|
||||
}
|
||||
|
||||
String eTagHeader = response.getETagHeader();
|
||||
if (eTagHeader != null && eTagHeader.equals(eTag)) {
|
||||
throw new NotModifiedException("eTagHeader is the same");
|
||||
}
|
||||
|
||||
return new HttpResult(response.getContent(), response.getContentType(), lastModifiedHeader, eTagHeader,
|
||||
response.getUrlAfterRedirect());
|
||||
}
|
||||
|
||||
private static byte[] toByteArray(HttpEntity entity, long maxBytes) throws IOException {
|
||||
@@ -197,6 +208,19 @@ public class HttpGetter {
|
||||
.build();
|
||||
}
|
||||
|
||||
private static Cache<HttpRequest, HttpResponse> newCache(CommaFeedConfiguration config) {
|
||||
HttpClientCache cacheConfig = config.httpClient().cache();
|
||||
if (!cacheConfig.enabled()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return CacheBuilder.newBuilder()
|
||||
.weigher((HttpRequest key, HttpResponse value) -> value.getContent() != null ? value.getContent().length : 0)
|
||||
.maximumWeight(cacheConfig.maximumMemorySize().asLongValue())
|
||||
.expireAfterWrite(cacheConfig.expiration())
|
||||
.build();
|
||||
}
|
||||
|
||||
@Getter
|
||||
public static class NotModifiedException extends Exception {
|
||||
private static final long serialVersionUID = 1L;
|
||||
@@ -232,28 +256,49 @@ public class HttpGetter {
|
||||
super(message);
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Builder(builderMethodName = "")
|
||||
@EqualsAndHashCode
|
||||
@Getter
|
||||
@RequiredArgsConstructor
|
||||
public static class HttpRequest {
|
||||
private String url;
|
||||
private String lastModified;
|
||||
private String eTag;
|
||||
|
||||
public static HttpRequestBuilder builder(String url) {
|
||||
return new HttpRequestBuilder().url(url);
|
||||
}
|
||||
|
||||
public ClassicHttpRequest toClassicHttpRequest() {
|
||||
ClassicHttpRequest req = ClassicRequestBuilder.get(url).build();
|
||||
if (lastModified != null) {
|
||||
req.addHeader(HttpHeaders.IF_MODIFIED_SINCE, lastModified);
|
||||
}
|
||||
if (eTag != null) {
|
||||
req.addHeader(HttpHeaders.IF_NONE_MATCH, eTag);
|
||||
}
|
||||
return req;
|
||||
}
|
||||
}
|
||||
|
||||
@Value
|
||||
private static class HttpResponse {
|
||||
private final int code;
|
||||
private final String lastModifiedHeader;
|
||||
private final String eTagHeader;
|
||||
private final byte[] content;
|
||||
private final String contentType;
|
||||
private final String urlAfterRedirect;
|
||||
int code;
|
||||
String lastModifiedHeader;
|
||||
String eTagHeader;
|
||||
byte[] content;
|
||||
String contentType;
|
||||
String urlAfterRedirect;
|
||||
}
|
||||
|
||||
@Getter
|
||||
@RequiredArgsConstructor
|
||||
@Value
|
||||
public static class HttpResult {
|
||||
private final byte[] content;
|
||||
private final String contentType;
|
||||
private final String lastModifiedSince;
|
||||
private final String eTag;
|
||||
private final String urlAfterRedirect;
|
||||
byte[] content;
|
||||
String contentType;
|
||||
String lastModifiedSince;
|
||||
String eTag;
|
||||
String urlAfterRedirect;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -69,7 +69,7 @@ public class DefaultFaviconFetcher extends AbstractFaviconFetcher {
|
||||
try {
|
||||
url = FeedUtils.removeTrailingSlash(url) + "/favicon.ico";
|
||||
log.debug("getting root icon at {}", url);
|
||||
HttpResult result = getter.getBinary(url);
|
||||
HttpResult result = getter.get(url);
|
||||
bytes = result.getContent();
|
||||
contentType = result.getContentType();
|
||||
} catch (Exception e) {
|
||||
@@ -87,7 +87,7 @@ public class DefaultFaviconFetcher extends AbstractFaviconFetcher {
|
||||
|
||||
Document doc;
|
||||
try {
|
||||
HttpResult result = getter.getBinary(url);
|
||||
HttpResult result = getter.get(url);
|
||||
doc = Jsoup.parse(new String(result.getContent()), url);
|
||||
} catch (Exception e) {
|
||||
log.debug("Failed to retrieve page to find icon");
|
||||
@@ -113,7 +113,7 @@ public class DefaultFaviconFetcher extends AbstractFaviconFetcher {
|
||||
byte[] bytes;
|
||||
String contentType;
|
||||
try {
|
||||
HttpResult result = getter.getBinary(href);
|
||||
HttpResult result = getter.get(href);
|
||||
bytes = result.getContent();
|
||||
contentType = result.getContentType();
|
||||
} catch (Exception e) {
|
||||
|
||||
@@ -43,7 +43,7 @@ public class FacebookFaviconFetcher extends AbstractFaviconFetcher {
|
||||
try {
|
||||
log.debug("Getting Facebook user's icon, {}", url);
|
||||
|
||||
HttpResult iconResult = getter.getBinary(iconUrl);
|
||||
HttpResult iconResult = getter.get(iconUrl);
|
||||
bytes = iconResult.getContent();
|
||||
contentType = iconResult.getContentType();
|
||||
} catch (Exception e) {
|
||||
|
||||
@@ -78,7 +78,7 @@ public class YoutubeFaviconFetcher extends AbstractFaviconFetcher {
|
||||
return null;
|
||||
}
|
||||
|
||||
HttpResult iconResult = getter.getBinary(thumbnailUrl.asText());
|
||||
HttpResult iconResult = getter.get(thumbnailUrl.asText());
|
||||
bytes = iconResult.getContent();
|
||||
contentType = iconResult.getContentType();
|
||||
} catch (Exception e) {
|
||||
@@ -97,7 +97,7 @@ public class YoutubeFaviconFetcher extends AbstractFaviconFetcher {
|
||||
.queryParam("key", googleAuthKey)
|
||||
.queryParam("forUsername", userId)
|
||||
.build();
|
||||
return getter.getBinary(uri.toString()).getContent();
|
||||
return getter.get(uri.toString()).getContent();
|
||||
}
|
||||
|
||||
private byte[] fetchForChannel(String googleAuthKey, String channelId) throws IOException, NotModifiedException {
|
||||
@@ -106,7 +106,7 @@ public class YoutubeFaviconFetcher extends AbstractFaviconFetcher {
|
||||
.queryParam("key", googleAuthKey)
|
||||
.queryParam("id", channelId)
|
||||
.build();
|
||||
return getter.getBinary(uri.toString()).getContent();
|
||||
return getter.get(uri.toString()).getContent();
|
||||
}
|
||||
|
||||
private byte[] fetchForPlaylist(String googleAuthKey, String playlistId) throws IOException, NotModifiedException {
|
||||
@@ -115,7 +115,7 @@ public class YoutubeFaviconFetcher extends AbstractFaviconFetcher {
|
||||
.queryParam("key", googleAuthKey)
|
||||
.queryParam("id", playlistId)
|
||||
.build();
|
||||
byte[] playlistBytes = getter.getBinary(uri.toString()).getContent();
|
||||
byte[] playlistBytes = getter.get(uri.toString()).getContent();
|
||||
|
||||
JsonNode channelId = objectMapper.readTree(playlistBytes).at(PLAYLIST_CHANNEL_ID);
|
||||
if (channelId.isMissingNode()) {
|
||||
|
||||
@@ -9,6 +9,7 @@ import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.commafeed.backend.Digests;
|
||||
import com.commafeed.backend.HttpGetter;
|
||||
import com.commafeed.backend.HttpGetter.HttpRequest;
|
||||
import com.commafeed.backend.HttpGetter.HttpResult;
|
||||
import com.commafeed.backend.HttpGetter.NotModifiedException;
|
||||
import com.commafeed.backend.feed.parser.FeedParser;
|
||||
@@ -41,7 +42,7 @@ public class FeedFetcher {
|
||||
Instant lastPublishedDate, String lastContentHash) throws FeedException, IOException, NotModifiedException {
|
||||
log.debug("Fetching feed {}", feedUrl);
|
||||
|
||||
HttpResult result = getter.getBinary(feedUrl, lastModified, eTag);
|
||||
HttpResult result = getter.get(HttpRequest.builder(feedUrl).lastModified(lastModified).eTag(eTag).build());
|
||||
byte[] content = result.getContent();
|
||||
|
||||
FeedParserResult parserResult;
|
||||
@@ -53,7 +54,7 @@ public class FeedFetcher {
|
||||
if (org.apache.commons.lang3.StringUtils.isNotBlank(extractedUrl)) {
|
||||
feedUrl = extractedUrl;
|
||||
|
||||
result = getter.getBinary(extractedUrl, lastModified, eTag);
|
||||
result = getter.get(HttpRequest.builder(extractedUrl).lastModified(lastModified).eTag(eTag).build());
|
||||
content = result.getContent();
|
||||
parserResult = parser.parse(result.getUrlAfterRedirect(), content);
|
||||
} else {
|
||||
|
||||
@@ -40,9 +40,14 @@ public class FeedSubscriptionService {
|
||||
this.feedRefreshEngine = feedRefreshEngine;
|
||||
this.config = config;
|
||||
|
||||
// automatically refresh feeds after they are subscribed to
|
||||
// we need to use this hook because the feed needs to have been persisted because the queue processing is asynchronous
|
||||
feedSubscriptionDAO.onPostCommitInsert(sub -> feedRefreshEngine.refreshImmediately(sub.getFeed()));
|
||||
// automatically refresh new feeds after they are subscribed to
|
||||
// we need to use this hook because the feed needs to have been persisted before being processed by the feed engine
|
||||
feedSubscriptionDAO.onPostCommitInsert(sub -> {
|
||||
Feed feed = sub.getFeed();
|
||||
if (feed.getDisabledUntil() == null || feed.getDisabledUntil().isBefore(Instant.now())) {
|
||||
feedRefreshEngine.refreshImmediately(feed);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public long subscribe(User user, String url, String title) {
|
||||
|
||||
@@ -77,7 +77,7 @@ public class ServerREST {
|
||||
|
||||
url = FeedUtils.imageProxyDecoder(url);
|
||||
try {
|
||||
HttpResult result = httpGetter.getBinary(url);
|
||||
HttpResult result = httpGetter.get(url);
|
||||
return Response.ok(result.getContent()).build();
|
||||
} catch (Exception e) {
|
||||
return Response.status(Status.SERVICE_UNAVAILABLE).entity(e.getMessage()).build();
|
||||
|
||||
Reference in New Issue
Block a user