on fetch error and not under heavy load, don't increase refresh interval exponentially

This commit is contained in:
Athou
2023-01-14 22:04:09 +01:00
parent f7d34983e0
commit 2b4ff4a8a5
3 changed files with 91 additions and 69 deletions

View File

@@ -0,0 +1,84 @@
package com.commafeed.backend.feed;
import java.util.Date;
import javax.inject.Inject;
import javax.inject.Singleton;
import org.apache.commons.lang3.time.DateUtils;
import com.commafeed.CommaFeedConfiguration;
import com.commafeed.backend.model.Feed;
@Singleton
public class FeedRefreshIntervalCalculator {
private boolean heavyLoad;
private int refreshIntervalMinutes;
@Inject
public FeedRefreshIntervalCalculator(CommaFeedConfiguration config) {
this.heavyLoad = config.getApplicationSettings().getHeavyLoad();
this.refreshIntervalMinutes = config.getApplicationSettings().getRefreshIntervalMinutes();
}
public Date onFetchSuccess(FetchedFeed fetchedFeed) {
Date defaultRefreshInterval = getDefaultRefreshInterval();
return heavyLoad ? computeRefreshIntervalForHeavyLoad(fetchedFeed.getFeed(), defaultRefreshInterval) : defaultRefreshInterval;
}
public Date onFeedNotModified(Feed feed) {
Date defaultRefreshInterval = getDefaultRefreshInterval();
return heavyLoad ? computeRefreshIntervalForHeavyLoad(feed, defaultRefreshInterval) : defaultRefreshInterval;
}
public Date onFetchError(Feed feed) {
int errorCount = feed.getErrorCount();
int retriesBeforeDisable = 3;
if (errorCount < retriesBeforeDisable || !heavyLoad) {
return getDefaultRefreshInterval();
}
int disabledHours = Math.min(24 * 7, errorCount - retriesBeforeDisable + 1);
return DateUtils.addHours(new Date(), disabledHours);
}
private Date getDefaultRefreshInterval() {
return DateUtils.addMinutes(new Date(), refreshIntervalMinutes);
}
private Date computeRefreshIntervalForHeavyLoad(Feed feed, Date defaultRefreshInterval) {
Date now = new Date();
Date publishedDate = feed.getLastEntryDate();
Long averageEntryInterval = feed.getAverageEntryInterval();
if (publishedDate == null) {
// feed with no entries, recheck in 24 hours
return DateUtils.addHours(now, 24);
} else if (publishedDate.before(DateUtils.addMonths(now, -1))) {
// older than a month, recheck in 24 hours
return DateUtils.addHours(now, 24);
} else if (publishedDate.before(DateUtils.addDays(now, -14))) {
// older than two weeks, recheck in 12 hours
return DateUtils.addHours(now, 12);
} else if (publishedDate.before(DateUtils.addDays(now, -7))) {
// older than a week, recheck in 6 hours
return DateUtils.addHours(now, 6);
} else if (averageEntryInterval != null) {
// use average time between entries to decide when to refresh next, divided by factor
int factor = 2;
// not more than 6 hours
long date = Math.min(DateUtils.addHours(now, 6).getTime(), now.getTime() + averageEntryInterval / factor);
// not less than default refresh interval
date = Math.max(defaultRefreshInterval.getTime(), date);
return new Date(date);
} else {
// unknown case, recheck in 24 hours
return DateUtils.addHours(now, 24);
}
}
}

View File

@@ -1,6 +1,5 @@
package com.commafeed.backend.feed;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
@@ -10,7 +9,6 @@ import javax.inject.Singleton;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.DateUtils;
import com.codahale.metrics.MetricRegistry;
import com.commafeed.CommaFeedConfiguration;
@@ -31,15 +29,17 @@ import lombok.extern.slf4j.Slf4j;
public class FeedRefreshWorker implements Managed {
private final FeedRefreshUpdater feedRefreshUpdater;
private final FeedRefreshIntervalCalculator refreshIntervalCalculator;
private final FeedFetcher fetcher;
private final FeedQueues queues;
private final CommaFeedConfiguration config;
private final FeedRefreshExecutor pool;
@Inject
public FeedRefreshWorker(FeedRefreshUpdater feedRefreshUpdater, FeedFetcher fetcher, FeedQueues queues, CommaFeedConfiguration config,
MetricRegistry metrics) {
public FeedRefreshWorker(FeedRefreshUpdater feedRefreshUpdater, FeedRefreshIntervalCalculator refreshIntervalCalculator,
FeedFetcher fetcher, FeedQueues queues, CommaFeedConfiguration config, MetricRegistry metrics) {
this.feedRefreshUpdater = feedRefreshUpdater;
this.refreshIntervalCalculator = refreshIntervalCalculator;
this.fetcher = fetcher;
this.config = config;
this.queues = queues;
@@ -62,8 +62,6 @@ public class FeedRefreshWorker implements Managed {
private void update(FeedRefreshContext context) {
Feed feed = context.getFeed();
int refreshInterval = config.getApplicationSettings().getRefreshIntervalMinutes();
Date disabledUntil = DateUtils.addMinutes(new Date(), refreshInterval);
try {
String url = Optional.ofNullable(feed.getUrlAfterRedirect()).orElse(feed.getUrl());
FetchedFeed fetchedFeed = fetcher.fetch(url, false, feed.getLastModifiedHeader(), feed.getEtagHeader(),
@@ -76,10 +74,6 @@ public class FeedRefreshWorker implements Managed {
entries = entries.stream().limit(maxFeedCapacity).collect(Collectors.toList());
}
if (config.getApplicationSettings().getHeavyLoad()) {
disabledUntil = FeedUtils.buildDisabledUntil(fetchedFeed.getFeed().getLastEntryDate(),
fetchedFeed.getFeed().getAverageEntryInterval(), disabledUntil);
}
String urlAfterRedirect = fetchedFeed.getUrlAfterRedirect();
if (StringUtils.equals(url, urlAfterRedirect)) {
urlAfterRedirect = null;
@@ -95,7 +89,7 @@ public class FeedRefreshWorker implements Managed {
feed.setErrorCount(0);
feed.setMessage(null);
feed.setDisabledUntil(disabledUntil);
feed.setDisabledUntil(refreshIntervalCalculator.onFetchSuccess(fetchedFeed));
handlePubSub(feed, fetchedFeed.getFeed());
context.setEntries(entries);
@@ -104,12 +98,9 @@ public class FeedRefreshWorker implements Managed {
} catch (NotModifiedException e) {
log.debug("Feed not modified : {} - {}", feed.getUrl(), e.getMessage());
if (config.getApplicationSettings().getHeavyLoad()) {
disabledUntil = FeedUtils.buildDisabledUntil(feed.getLastEntryDate(), feed.getAverageEntryInterval(), disabledUntil);
}
feed.setErrorCount(0);
feed.setMessage(e.getMessage());
feed.setDisabledUntil(disabledUntil);
feed.setDisabledUntil(refreshIntervalCalculator.onFeedNotModified(feed));
queues.giveBack(feed);
} catch (Exception e) {
@@ -118,7 +109,7 @@ public class FeedRefreshWorker implements Managed {
feed.setErrorCount(feed.getErrorCount() + 1);
feed.setMessage(message);
feed.setDisabledUntil(FeedUtils.buildDisabledUntil(feed.getErrorCount()));
feed.setDisabledUntil(refreshIntervalCalculator.onFetchError(feed));
queues.giveBack(feed);
}

View File

@@ -8,7 +8,6 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
@@ -20,7 +19,6 @@ import org.ahocorasick.trie.Trie.TrieBuilder;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.DateUtils;
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
@@ -359,57 +357,6 @@ public class FeedUtils {
return sb.toString();
}
/**
* When there was an error fetching the feed
*
*/
public static Date buildDisabledUntil(int errorCount) {
Date now = new Date();
int retriesBeforeDisable = 3;
if (errorCount >= retriesBeforeDisable) {
int disabledHours = errorCount - retriesBeforeDisable + 1;
disabledHours = Math.min(24 * 7, disabledHours);
return DateUtils.addHours(now, disabledHours);
}
return now;
}
/**
* When the feed was refreshed successfully
*/
public static Date buildDisabledUntil(Date publishedDate, Long averageEntryInterval, Date defaultRefreshInterval) {
Date now = new Date();
if (publishedDate == null) {
// feed with no entries, recheck in 24 hours
return DateUtils.addHours(now, 24);
} else if (publishedDate.before(DateUtils.addMonths(now, -1))) {
// older than a month, recheck in 24 hours
return DateUtils.addHours(now, 24);
} else if (publishedDate.before(DateUtils.addDays(now, -14))) {
// older than two weeks, recheck in 12 hours
return DateUtils.addHours(now, 12);
} else if (publishedDate.before(DateUtils.addDays(now, -7))) {
// older than a week, recheck in 6 hours
return DateUtils.addHours(now, 6);
} else if (averageEntryInterval != null) {
// use average time between entries to decide when to refresh next, divided by factor
int factor = 2;
// not more than 6 hours
long date = Math.min(DateUtils.addHours(now, 6).getTime(), now.getTime() + averageEntryInterval / factor);
// not less than default refresh interval
date = Math.max(defaultRefreshInterval.getTime(), date);
return new Date(date);
} else {
// unknown case, recheck in 24 hours
return DateUtils.addHours(now, 24);
}
}
public static Long averageTimeBetweenEntries(List<FeedEntry> entries) {
if (entries.isEmpty() || entries.size() == 1) {
return null;