forked from Archives/Athou_commafeed
on fetch error and not under heavy load, don't increase refresh interval exponentially
This commit is contained in:
@@ -0,0 +1,84 @@
|
|||||||
|
package com.commafeed.backend.feed;
|
||||||
|
|
||||||
|
import java.util.Date;
|
||||||
|
|
||||||
|
import javax.inject.Inject;
|
||||||
|
import javax.inject.Singleton;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.time.DateUtils;
|
||||||
|
|
||||||
|
import com.commafeed.CommaFeedConfiguration;
|
||||||
|
import com.commafeed.backend.model.Feed;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class FeedRefreshIntervalCalculator {
|
||||||
|
|
||||||
|
private boolean heavyLoad;
|
||||||
|
private int refreshIntervalMinutes;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public FeedRefreshIntervalCalculator(CommaFeedConfiguration config) {
|
||||||
|
this.heavyLoad = config.getApplicationSettings().getHeavyLoad();
|
||||||
|
this.refreshIntervalMinutes = config.getApplicationSettings().getRefreshIntervalMinutes();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Date onFetchSuccess(FetchedFeed fetchedFeed) {
|
||||||
|
Date defaultRefreshInterval = getDefaultRefreshInterval();
|
||||||
|
return heavyLoad ? computeRefreshIntervalForHeavyLoad(fetchedFeed.getFeed(), defaultRefreshInterval) : defaultRefreshInterval;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Date onFeedNotModified(Feed feed) {
|
||||||
|
Date defaultRefreshInterval = getDefaultRefreshInterval();
|
||||||
|
return heavyLoad ? computeRefreshIntervalForHeavyLoad(feed, defaultRefreshInterval) : defaultRefreshInterval;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Date onFetchError(Feed feed) {
|
||||||
|
int errorCount = feed.getErrorCount();
|
||||||
|
int retriesBeforeDisable = 3;
|
||||||
|
if (errorCount < retriesBeforeDisable || !heavyLoad) {
|
||||||
|
return getDefaultRefreshInterval();
|
||||||
|
}
|
||||||
|
|
||||||
|
int disabledHours = Math.min(24 * 7, errorCount - retriesBeforeDisable + 1);
|
||||||
|
return DateUtils.addHours(new Date(), disabledHours);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Date getDefaultRefreshInterval() {
|
||||||
|
return DateUtils.addMinutes(new Date(), refreshIntervalMinutes);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Date computeRefreshIntervalForHeavyLoad(Feed feed, Date defaultRefreshInterval) {
|
||||||
|
Date now = new Date();
|
||||||
|
Date publishedDate = feed.getLastEntryDate();
|
||||||
|
Long averageEntryInterval = feed.getAverageEntryInterval();
|
||||||
|
|
||||||
|
if (publishedDate == null) {
|
||||||
|
// feed with no entries, recheck in 24 hours
|
||||||
|
return DateUtils.addHours(now, 24);
|
||||||
|
} else if (publishedDate.before(DateUtils.addMonths(now, -1))) {
|
||||||
|
// older than a month, recheck in 24 hours
|
||||||
|
return DateUtils.addHours(now, 24);
|
||||||
|
} else if (publishedDate.before(DateUtils.addDays(now, -14))) {
|
||||||
|
// older than two weeks, recheck in 12 hours
|
||||||
|
return DateUtils.addHours(now, 12);
|
||||||
|
} else if (publishedDate.before(DateUtils.addDays(now, -7))) {
|
||||||
|
// older than a week, recheck in 6 hours
|
||||||
|
return DateUtils.addHours(now, 6);
|
||||||
|
} else if (averageEntryInterval != null) {
|
||||||
|
// use average time between entries to decide when to refresh next, divided by factor
|
||||||
|
int factor = 2;
|
||||||
|
|
||||||
|
// not more than 6 hours
|
||||||
|
long date = Math.min(DateUtils.addHours(now, 6).getTime(), now.getTime() + averageEntryInterval / factor);
|
||||||
|
|
||||||
|
// not less than default refresh interval
|
||||||
|
date = Math.max(defaultRefreshInterval.getTime(), date);
|
||||||
|
|
||||||
|
return new Date(date);
|
||||||
|
} else {
|
||||||
|
// unknown case, recheck in 24 hours
|
||||||
|
return DateUtils.addHours(now, 24);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
package com.commafeed.backend.feed;
|
package com.commafeed.backend.feed;
|
||||||
|
|
||||||
import java.util.Date;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
@@ -10,7 +9,6 @@ import javax.inject.Singleton;
|
|||||||
|
|
||||||
import org.apache.commons.codec.digest.DigestUtils;
|
import org.apache.commons.codec.digest.DigestUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.lang3.time.DateUtils;
|
|
||||||
|
|
||||||
import com.codahale.metrics.MetricRegistry;
|
import com.codahale.metrics.MetricRegistry;
|
||||||
import com.commafeed.CommaFeedConfiguration;
|
import com.commafeed.CommaFeedConfiguration;
|
||||||
@@ -31,15 +29,17 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
public class FeedRefreshWorker implements Managed {
|
public class FeedRefreshWorker implements Managed {
|
||||||
|
|
||||||
private final FeedRefreshUpdater feedRefreshUpdater;
|
private final FeedRefreshUpdater feedRefreshUpdater;
|
||||||
|
private final FeedRefreshIntervalCalculator refreshIntervalCalculator;
|
||||||
private final FeedFetcher fetcher;
|
private final FeedFetcher fetcher;
|
||||||
private final FeedQueues queues;
|
private final FeedQueues queues;
|
||||||
private final CommaFeedConfiguration config;
|
private final CommaFeedConfiguration config;
|
||||||
private final FeedRefreshExecutor pool;
|
private final FeedRefreshExecutor pool;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public FeedRefreshWorker(FeedRefreshUpdater feedRefreshUpdater, FeedFetcher fetcher, FeedQueues queues, CommaFeedConfiguration config,
|
public FeedRefreshWorker(FeedRefreshUpdater feedRefreshUpdater, FeedRefreshIntervalCalculator refreshIntervalCalculator,
|
||||||
MetricRegistry metrics) {
|
FeedFetcher fetcher, FeedQueues queues, CommaFeedConfiguration config, MetricRegistry metrics) {
|
||||||
this.feedRefreshUpdater = feedRefreshUpdater;
|
this.feedRefreshUpdater = feedRefreshUpdater;
|
||||||
|
this.refreshIntervalCalculator = refreshIntervalCalculator;
|
||||||
this.fetcher = fetcher;
|
this.fetcher = fetcher;
|
||||||
this.config = config;
|
this.config = config;
|
||||||
this.queues = queues;
|
this.queues = queues;
|
||||||
@@ -62,8 +62,6 @@ public class FeedRefreshWorker implements Managed {
|
|||||||
|
|
||||||
private void update(FeedRefreshContext context) {
|
private void update(FeedRefreshContext context) {
|
||||||
Feed feed = context.getFeed();
|
Feed feed = context.getFeed();
|
||||||
int refreshInterval = config.getApplicationSettings().getRefreshIntervalMinutes();
|
|
||||||
Date disabledUntil = DateUtils.addMinutes(new Date(), refreshInterval);
|
|
||||||
try {
|
try {
|
||||||
String url = Optional.ofNullable(feed.getUrlAfterRedirect()).orElse(feed.getUrl());
|
String url = Optional.ofNullable(feed.getUrlAfterRedirect()).orElse(feed.getUrl());
|
||||||
FetchedFeed fetchedFeed = fetcher.fetch(url, false, feed.getLastModifiedHeader(), feed.getEtagHeader(),
|
FetchedFeed fetchedFeed = fetcher.fetch(url, false, feed.getLastModifiedHeader(), feed.getEtagHeader(),
|
||||||
@@ -76,10 +74,6 @@ public class FeedRefreshWorker implements Managed {
|
|||||||
entries = entries.stream().limit(maxFeedCapacity).collect(Collectors.toList());
|
entries = entries.stream().limit(maxFeedCapacity).collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (config.getApplicationSettings().getHeavyLoad()) {
|
|
||||||
disabledUntil = FeedUtils.buildDisabledUntil(fetchedFeed.getFeed().getLastEntryDate(),
|
|
||||||
fetchedFeed.getFeed().getAverageEntryInterval(), disabledUntil);
|
|
||||||
}
|
|
||||||
String urlAfterRedirect = fetchedFeed.getUrlAfterRedirect();
|
String urlAfterRedirect = fetchedFeed.getUrlAfterRedirect();
|
||||||
if (StringUtils.equals(url, urlAfterRedirect)) {
|
if (StringUtils.equals(url, urlAfterRedirect)) {
|
||||||
urlAfterRedirect = null;
|
urlAfterRedirect = null;
|
||||||
@@ -95,7 +89,7 @@ public class FeedRefreshWorker implements Managed {
|
|||||||
|
|
||||||
feed.setErrorCount(0);
|
feed.setErrorCount(0);
|
||||||
feed.setMessage(null);
|
feed.setMessage(null);
|
||||||
feed.setDisabledUntil(disabledUntil);
|
feed.setDisabledUntil(refreshIntervalCalculator.onFetchSuccess(fetchedFeed));
|
||||||
|
|
||||||
handlePubSub(feed, fetchedFeed.getFeed());
|
handlePubSub(feed, fetchedFeed.getFeed());
|
||||||
context.setEntries(entries);
|
context.setEntries(entries);
|
||||||
@@ -104,12 +98,9 @@ public class FeedRefreshWorker implements Managed {
|
|||||||
} catch (NotModifiedException e) {
|
} catch (NotModifiedException e) {
|
||||||
log.debug("Feed not modified : {} - {}", feed.getUrl(), e.getMessage());
|
log.debug("Feed not modified : {} - {}", feed.getUrl(), e.getMessage());
|
||||||
|
|
||||||
if (config.getApplicationSettings().getHeavyLoad()) {
|
|
||||||
disabledUntil = FeedUtils.buildDisabledUntil(feed.getLastEntryDate(), feed.getAverageEntryInterval(), disabledUntil);
|
|
||||||
}
|
|
||||||
feed.setErrorCount(0);
|
feed.setErrorCount(0);
|
||||||
feed.setMessage(e.getMessage());
|
feed.setMessage(e.getMessage());
|
||||||
feed.setDisabledUntil(disabledUntil);
|
feed.setDisabledUntil(refreshIntervalCalculator.onFeedNotModified(feed));
|
||||||
|
|
||||||
queues.giveBack(feed);
|
queues.giveBack(feed);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
@@ -118,7 +109,7 @@ public class FeedRefreshWorker implements Managed {
|
|||||||
|
|
||||||
feed.setErrorCount(feed.getErrorCount() + 1);
|
feed.setErrorCount(feed.getErrorCount() + 1);
|
||||||
feed.setMessage(message);
|
feed.setMessage(message);
|
||||||
feed.setDisabledUntil(FeedUtils.buildDisabledUntil(feed.getErrorCount()));
|
feed.setDisabledUntil(refreshIntervalCalculator.onFetchError(feed));
|
||||||
|
|
||||||
queues.giveBack(feed);
|
queues.giveBack(feed);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import java.util.ArrayList;
|
|||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
@@ -20,7 +19,6 @@ import org.ahocorasick.trie.Trie.TrieBuilder;
|
|||||||
import org.apache.commons.codec.binary.Base64;
|
import org.apache.commons.codec.binary.Base64;
|
||||||
import org.apache.commons.lang3.ArrayUtils;
|
import org.apache.commons.lang3.ArrayUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.lang3.time.DateUtils;
|
|
||||||
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
|
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
@@ -359,57 +357,6 @@ public class FeedUtils {
|
|||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* When there was an error fetching the feed
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public static Date buildDisabledUntil(int errorCount) {
|
|
||||||
Date now = new Date();
|
|
||||||
int retriesBeforeDisable = 3;
|
|
||||||
|
|
||||||
if (errorCount >= retriesBeforeDisable) {
|
|
||||||
int disabledHours = errorCount - retriesBeforeDisable + 1;
|
|
||||||
disabledHours = Math.min(24 * 7, disabledHours);
|
|
||||||
return DateUtils.addHours(now, disabledHours);
|
|
||||||
}
|
|
||||||
return now;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* When the feed was refreshed successfully
|
|
||||||
*/
|
|
||||||
public static Date buildDisabledUntil(Date publishedDate, Long averageEntryInterval, Date defaultRefreshInterval) {
|
|
||||||
Date now = new Date();
|
|
||||||
|
|
||||||
if (publishedDate == null) {
|
|
||||||
// feed with no entries, recheck in 24 hours
|
|
||||||
return DateUtils.addHours(now, 24);
|
|
||||||
} else if (publishedDate.before(DateUtils.addMonths(now, -1))) {
|
|
||||||
// older than a month, recheck in 24 hours
|
|
||||||
return DateUtils.addHours(now, 24);
|
|
||||||
} else if (publishedDate.before(DateUtils.addDays(now, -14))) {
|
|
||||||
// older than two weeks, recheck in 12 hours
|
|
||||||
return DateUtils.addHours(now, 12);
|
|
||||||
} else if (publishedDate.before(DateUtils.addDays(now, -7))) {
|
|
||||||
// older than a week, recheck in 6 hours
|
|
||||||
return DateUtils.addHours(now, 6);
|
|
||||||
} else if (averageEntryInterval != null) {
|
|
||||||
// use average time between entries to decide when to refresh next, divided by factor
|
|
||||||
int factor = 2;
|
|
||||||
|
|
||||||
// not more than 6 hours
|
|
||||||
long date = Math.min(DateUtils.addHours(now, 6).getTime(), now.getTime() + averageEntryInterval / factor);
|
|
||||||
|
|
||||||
// not less than default refresh interval
|
|
||||||
date = Math.max(defaultRefreshInterval.getTime(), date);
|
|
||||||
|
|
||||||
return new Date(date);
|
|
||||||
} else {
|
|
||||||
// unknown case, recheck in 24 hours
|
|
||||||
return DateUtils.addHours(now, 24);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Long averageTimeBetweenEntries(List<FeedEntry> entries) {
|
public static Long averageTimeBetweenEntries(List<FeedEntry> entries) {
|
||||||
if (entries.isEmpty() || entries.size() == 1) {
|
if (entries.isEmpty() || entries.size() == 1) {
|
||||||
return null;
|
return null;
|
||||||
|
|||||||
Reference in New Issue
Block a user