From 2b4ff4a8a5019c81517696b9d615bc5755a1668e Mon Sep 17 00:00:00 2001 From: Athou Date: Sat, 14 Jan 2023 22:04:09 +0100 Subject: [PATCH] on fetch error and not under heavy load, don't increase refresh interval exponentially --- .../feed/FeedRefreshIntervalCalculator.java | 84 +++++++++++++++++++ .../backend/feed/FeedRefreshWorker.java | 23 ++--- .../com/commafeed/backend/feed/FeedUtils.java | 53 ------------ 3 files changed, 91 insertions(+), 69 deletions(-) create mode 100644 commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshIntervalCalculator.java diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshIntervalCalculator.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshIntervalCalculator.java new file mode 100644 index 00000000..f630722e --- /dev/null +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshIntervalCalculator.java @@ -0,0 +1,84 @@ +package com.commafeed.backend.feed; + +import java.util.Date; + +import javax.inject.Inject; +import javax.inject.Singleton; + +import org.apache.commons.lang3.time.DateUtils; + +import com.commafeed.CommaFeedConfiguration; +import com.commafeed.backend.model.Feed; + +@Singleton +public class FeedRefreshIntervalCalculator { + + private boolean heavyLoad; + private int refreshIntervalMinutes; + + @Inject + public FeedRefreshIntervalCalculator(CommaFeedConfiguration config) { + this.heavyLoad = config.getApplicationSettings().getHeavyLoad(); + this.refreshIntervalMinutes = config.getApplicationSettings().getRefreshIntervalMinutes(); + } + + public Date onFetchSuccess(FetchedFeed fetchedFeed) { + Date defaultRefreshInterval = getDefaultRefreshInterval(); + return heavyLoad ? computeRefreshIntervalForHeavyLoad(fetchedFeed.getFeed(), defaultRefreshInterval) : defaultRefreshInterval; + } + + public Date onFeedNotModified(Feed feed) { + Date defaultRefreshInterval = getDefaultRefreshInterval(); + return heavyLoad ? computeRefreshIntervalForHeavyLoad(feed, defaultRefreshInterval) : defaultRefreshInterval; + } + + public Date onFetchError(Feed feed) { + int errorCount = feed.getErrorCount(); + int retriesBeforeDisable = 3; + if (errorCount < retriesBeforeDisable || !heavyLoad) { + return getDefaultRefreshInterval(); + } + + int disabledHours = Math.min(24 * 7, errorCount - retriesBeforeDisable + 1); + return DateUtils.addHours(new Date(), disabledHours); + } + + private Date getDefaultRefreshInterval() { + return DateUtils.addMinutes(new Date(), refreshIntervalMinutes); + } + + private Date computeRefreshIntervalForHeavyLoad(Feed feed, Date defaultRefreshInterval) { + Date now = new Date(); + Date publishedDate = feed.getLastEntryDate(); + Long averageEntryInterval = feed.getAverageEntryInterval(); + + if (publishedDate == null) { + // feed with no entries, recheck in 24 hours + return DateUtils.addHours(now, 24); + } else if (publishedDate.before(DateUtils.addMonths(now, -1))) { + // older than a month, recheck in 24 hours + return DateUtils.addHours(now, 24); + } else if (publishedDate.before(DateUtils.addDays(now, -14))) { + // older than two weeks, recheck in 12 hours + return DateUtils.addHours(now, 12); + } else if (publishedDate.before(DateUtils.addDays(now, -7))) { + // older than a week, recheck in 6 hours + return DateUtils.addHours(now, 6); + } else if (averageEntryInterval != null) { + // use average time between entries to decide when to refresh next, divided by factor + int factor = 2; + + // not more than 6 hours + long date = Math.min(DateUtils.addHours(now, 6).getTime(), now.getTime() + averageEntryInterval / factor); + + // not less than default refresh interval + date = Math.max(defaultRefreshInterval.getTime(), date); + + return new Date(date); + } else { + // unknown case, recheck in 24 hours + return DateUtils.addHours(now, 24); + } + } + +} diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshWorker.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshWorker.java index 8c83e51e..643fff75 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshWorker.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshWorker.java @@ -1,6 +1,5 @@ package com.commafeed.backend.feed; -import java.util.Date; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; @@ -10,7 +9,6 @@ import javax.inject.Singleton; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.time.DateUtils; import com.codahale.metrics.MetricRegistry; import com.commafeed.CommaFeedConfiguration; @@ -31,15 +29,17 @@ import lombok.extern.slf4j.Slf4j; public class FeedRefreshWorker implements Managed { private final FeedRefreshUpdater feedRefreshUpdater; + private final FeedRefreshIntervalCalculator refreshIntervalCalculator; private final FeedFetcher fetcher; private final FeedQueues queues; private final CommaFeedConfiguration config; private final FeedRefreshExecutor pool; @Inject - public FeedRefreshWorker(FeedRefreshUpdater feedRefreshUpdater, FeedFetcher fetcher, FeedQueues queues, CommaFeedConfiguration config, - MetricRegistry metrics) { + public FeedRefreshWorker(FeedRefreshUpdater feedRefreshUpdater, FeedRefreshIntervalCalculator refreshIntervalCalculator, + FeedFetcher fetcher, FeedQueues queues, CommaFeedConfiguration config, MetricRegistry metrics) { this.feedRefreshUpdater = feedRefreshUpdater; + this.refreshIntervalCalculator = refreshIntervalCalculator; this.fetcher = fetcher; this.config = config; this.queues = queues; @@ -62,8 +62,6 @@ public class FeedRefreshWorker implements Managed { private void update(FeedRefreshContext context) { Feed feed = context.getFeed(); - int refreshInterval = config.getApplicationSettings().getRefreshIntervalMinutes(); - Date disabledUntil = DateUtils.addMinutes(new Date(), refreshInterval); try { String url = Optional.ofNullable(feed.getUrlAfterRedirect()).orElse(feed.getUrl()); FetchedFeed fetchedFeed = fetcher.fetch(url, false, feed.getLastModifiedHeader(), feed.getEtagHeader(), @@ -76,10 +74,6 @@ public class FeedRefreshWorker implements Managed { entries = entries.stream().limit(maxFeedCapacity).collect(Collectors.toList()); } - if (config.getApplicationSettings().getHeavyLoad()) { - disabledUntil = FeedUtils.buildDisabledUntil(fetchedFeed.getFeed().getLastEntryDate(), - fetchedFeed.getFeed().getAverageEntryInterval(), disabledUntil); - } String urlAfterRedirect = fetchedFeed.getUrlAfterRedirect(); if (StringUtils.equals(url, urlAfterRedirect)) { urlAfterRedirect = null; @@ -95,7 +89,7 @@ public class FeedRefreshWorker implements Managed { feed.setErrorCount(0); feed.setMessage(null); - feed.setDisabledUntil(disabledUntil); + feed.setDisabledUntil(refreshIntervalCalculator.onFetchSuccess(fetchedFeed)); handlePubSub(feed, fetchedFeed.getFeed()); context.setEntries(entries); @@ -104,12 +98,9 @@ public class FeedRefreshWorker implements Managed { } catch (NotModifiedException e) { log.debug("Feed not modified : {} - {}", feed.getUrl(), e.getMessage()); - if (config.getApplicationSettings().getHeavyLoad()) { - disabledUntil = FeedUtils.buildDisabledUntil(feed.getLastEntryDate(), feed.getAverageEntryInterval(), disabledUntil); - } feed.setErrorCount(0); feed.setMessage(e.getMessage()); - feed.setDisabledUntil(disabledUntil); + feed.setDisabledUntil(refreshIntervalCalculator.onFeedNotModified(feed)); queues.giveBack(feed); } catch (Exception e) { @@ -118,7 +109,7 @@ public class FeedRefreshWorker implements Managed { feed.setErrorCount(feed.getErrorCount() + 1); feed.setMessage(message); - feed.setDisabledUntil(FeedUtils.buildDisabledUntil(feed.getErrorCount())); + feed.setDisabledUntil(refreshIntervalCalculator.onFetchError(feed)); queues.giveBack(feed); } diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java index 65f99ac6..5de8adf8 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java @@ -8,7 +8,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.Date; import java.util.Iterator; import java.util.List; import java.util.regex.Pattern; @@ -20,7 +19,6 @@ import org.ahocorasick.trie.Trie.TrieBuilder; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.time.DateUtils; import org.apache.commons.math3.stat.descriptive.SummaryStatistics; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -359,57 +357,6 @@ public class FeedUtils { return sb.toString(); } - /** - * When there was an error fetching the feed - * - */ - public static Date buildDisabledUntil(int errorCount) { - Date now = new Date(); - int retriesBeforeDisable = 3; - - if (errorCount >= retriesBeforeDisable) { - int disabledHours = errorCount - retriesBeforeDisable + 1; - disabledHours = Math.min(24 * 7, disabledHours); - return DateUtils.addHours(now, disabledHours); - } - return now; - } - - /** - * When the feed was refreshed successfully - */ - public static Date buildDisabledUntil(Date publishedDate, Long averageEntryInterval, Date defaultRefreshInterval) { - Date now = new Date(); - - if (publishedDate == null) { - // feed with no entries, recheck in 24 hours - return DateUtils.addHours(now, 24); - } else if (publishedDate.before(DateUtils.addMonths(now, -1))) { - // older than a month, recheck in 24 hours - return DateUtils.addHours(now, 24); - } else if (publishedDate.before(DateUtils.addDays(now, -14))) { - // older than two weeks, recheck in 12 hours - return DateUtils.addHours(now, 12); - } else if (publishedDate.before(DateUtils.addDays(now, -7))) { - // older than a week, recheck in 6 hours - return DateUtils.addHours(now, 6); - } else if (averageEntryInterval != null) { - // use average time between entries to decide when to refresh next, divided by factor - int factor = 2; - - // not more than 6 hours - long date = Math.min(DateUtils.addHours(now, 6).getTime(), now.getTime() + averageEntryInterval / factor); - - // not less than default refresh interval - date = Math.max(defaultRefreshInterval.getTime(), date); - - return new Date(date); - } else { - // unknown case, recheck in 24 hours - return DateUtils.addHours(now, 24); - } - } - public static Long averageTimeBetweenEntries(List entries) { if (entries.isEmpty() || entries.size() == 1) { return null;