From b11fd7897d7e47c302858c725bd608c542df73ce Mon Sep 17 00:00:00 2001 From: Athou Date: Mon, 20 May 2013 10:56:20 +0200 Subject: [PATCH] first try at #44 --- .../backend/feeds/FeedRefreshWorker.java | 46 ++++++-------- .../commafeed/backend/feeds/FeedUtils.java | 63 ++++++++++++++++++- 2 files changed, 79 insertions(+), 30 deletions(-) diff --git a/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java b/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java index f6721901..d5892ed6 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java @@ -13,7 +13,6 @@ import javax.transaction.RollbackException; import javax.transaction.SystemException; import org.apache.commons.lang.mutable.MutableBoolean; -import org.apache.commons.lang.time.DateUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -69,19 +68,33 @@ public class FeedRefreshWorker { RollbackException, HeuristicMixedException, HeuristicRollbackException, JMSException { + FetchedFeed fetchedFeed = null; + Collection entries = null; + String message = null; int errorCount = 0; Date disabledUntil = null; - FetchedFeed fetchedFeed = null; - boolean modified = true; try { fetchedFeed = fetcher.fetch(feed.getUrl(), false, feed.getLastModifiedHeader(), feed.getEtagHeader()); + + // stops here if NotModifiedException is thrown + entries = fetchedFeed.getEntries(); + disabledUntil = FeedUtils.calculateDisabledDate(fetchedFeed); + feed.setLastUpdateSuccess(Calendar.getInstance().getTime()); + feed.setLink(fetchedFeed.getFeed().getLink()); + feed.setLastModifiedHeader(fetchedFeed.getFeed() + .getLastModifiedHeader()); + feed.setEtagHeader(fetchedFeed.getFeed().getEtagHeader()); + } catch (NotModifiedException e) { - modified = false; log.debug("Feed not modified (304) : " + feed.getUrl()); + if (feed.getErrorCount() == 0) { + // not modified and had no error before, do nothing + return; + } } catch (Exception e) { message = "Unable to refresh feed " + feed.getUrl() + " : " + e.getMessage(); @@ -92,34 +105,13 @@ public class FeedRefreshWorker { } errorCount = feed.getErrorCount() + 1; - - int retriesBeforeDisable = 3; - if (feed.getErrorCount() >= retriesBeforeDisable) { - int disabledMinutes = 10 * (feed.getErrorCount() - - retriesBeforeDisable + 1); - disabledMinutes = Math.min(60 * 12, disabledMinutes); - disabledUntil = DateUtils.addMinutes(Calendar.getInstance() - .getTime(), disabledMinutes); - } - } - - if (modified == false && feed.getErrorCount() == 0) { - // not modified - return; + disabledUntil = FeedUtils.calculateDisabledDate(errorCount); } feed.setErrorCount(errorCount); feed.setMessage(message); feed.setDisabledUntil(disabledUntil); - - Collection entries = null; - if (fetchedFeed != null) { - feed.setLink(fetchedFeed.getFeed().getLink()); - feed.setLastModifiedHeader(fetchedFeed.getFeed() - .getLastModifiedHeader()); - feed.setEtagHeader(fetchedFeed.getFeed().getEtagHeader()); - entries = fetchedFeed.getEntries(); - } + log.info(feed.getUrl() + " disabledUntil " + disabledUntil); feedRefreshUpdater.updateEntries(feed, entries); } diff --git a/src/main/java/com/commafeed/backend/feeds/FeedUtils.java b/src/main/java/com/commafeed/backend/feeds/FeedUtils.java index adcec9cf..eed46ca9 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedUtils.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedUtils.java @@ -1,8 +1,13 @@ package com.commafeed.backend.feeds; +import java.util.Calendar; +import java.util.Collections; +import java.util.Date; import java.util.List; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.time.DateUtils; import org.apache.commons.math.stat.descriptive.SummaryStatistics; import org.jsoup.Jsoup; import org.jsoup.nodes.Document.OutputSettings; @@ -11,6 +16,7 @@ import org.jsoup.safety.Whitelist; import org.mozilla.universalchardet.UniversalDetector; import com.commafeed.backend.model.FeedEntry; +import com.google.api.client.util.Lists; public class FeedUtils { @@ -102,11 +108,62 @@ public class FeedUtils { return sb.toString(); } + /** + * Whene there was an error fetching the feed + * + */ + public static Date calculateDisabledDate(int errorCount) { + Date now = Calendar.getInstance().getTime(); + int retriesBeforeDisable = 3; + + if (errorCount >= retriesBeforeDisable) { + int disabledMinutes = 10 * (errorCount - retriesBeforeDisable + 1); + disabledMinutes = Math.min(60 * 12, disabledMinutes); + return DateUtils.addMinutes(now, disabledMinutes); + } + return null; + } + + /** + * When the feed was refreshed successfully + */ + public static Date calculateDisabledDate(FetchedFeed feed) { + Date now = Calendar.getInstance().getTime(); + Date publishedDate = feed.getPublishedDate(); + + if (publishedDate.before(DateUtils.addMonths(now, -1))) { + // older tahn a month, recheck in 24 hours + return DateUtils.addHours(now, 24); + } else if (publishedDate.before(DateUtils.addDays(now, -14))) { + // older than two weekds, recheck in 12 hours + return DateUtils.addHours(now, 12); + } else if (publishedDate.before(DateUtils.addDays(now, -7))) { + // older than a week, recheck in 6 hours + return DateUtils.addHours(now, 6); + } else if (CollectionUtils.isNotEmpty(feed.getEntries())) { + // + long average = average(feed.getEntries()); + return new Date(Math.min(DateUtils.addHours(now, 6).getTime(), + now.getTime() + average / 3)); + } else { + return null; + } + } + public static long average(List entries) { + List timestamps = Lists.newArrayList(); + int i = 0; + for (FeedEntry entry : entries) { + timestamps.add(entry.getUpdated().getTime()); + i++; + if (i >= 10) + break; + } + Collections.sort(timestamps); + SummaryStatistics stats = new SummaryStatistics(); - for (int i = 0; i < entries.size() - 1; i++) { - long diff = Math.abs(entries.get(i).getUpdated().getTime() - - entries.get(i + 1).getUpdated().getTime()); + for (i = 0; i < timestamps.size() - 1; i++) { + long diff = Math.abs(timestamps.get(i) - timestamps.get(i + 1)); stats.addValue(diff); } return (long) stats.getMean();