From 2baa2bbfe56b6d9e3f9e1a4907929c5585941ebb Mon Sep 17 00:00:00 2001 From: Athou Date: Mon, 10 Jun 2013 12:53:46 +0200 Subject: [PATCH] store average entry interval and use it to decide when to reschedule feed refresh when it returns 304 --- .../commafeed/backend/feeds/FeedParser.java | 10 ++-- .../backend/feeds/FeedRefreshWorker.java | 38 +++++-------- .../commafeed/backend/feeds/FeedUtils.java | 14 ++--- .../commafeed/backend/feeds/FetchedFeed.java | 37 ------------- .../com/commafeed/backend/model/Feed.java | 54 +++++++++++++++++++ .../resources/PubSubHubbubCallbackREST.java | 2 +- 6 files changed, 81 insertions(+), 74 deletions(-) diff --git a/src/main/java/com/commafeed/backend/feeds/FeedParser.java b/src/main/java/com/commafeed/backend/feeds/FeedParser.java index f669d9c7..b8b56061 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedParser.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedParser.java @@ -68,8 +68,8 @@ public class FeedParser { handleForeignMarkup(rss); fetchedFeed.setTitle(rss.getTitle()); - fetchedFeed.setHub(findHub(rss)); - fetchedFeed.setTopic(findSelf(rss)); + feed.setPushHub(findHub(rss)); + feed.setPushTopic(findSelf(rss)); feed.setUrl(feedUrl); feed.setLink(rss.getLink()); List items = rss.getEntries(); @@ -117,7 +117,9 @@ public class FeedParser { publishedDate = getFeedPublishedDate(publishedDate, entries); } feed.setLastPublishedDate(publishedDate); - fetchedFeed.setLastEntryDate(lastEntryDate); + feed.setAverageEntryInterval(FeedUtils + .averageTimeBetweenEntries(entries)); + feed.setLastEntryDate(lastEntryDate); } catch (Exception e) { throw new FeedException(String.format( @@ -158,7 +160,7 @@ public class FeedParser { if (publishedDate == null) { return null; } - + for (FeedEntry entry : entries) { if (entry.getUpdated().getTime() > publishedDate.getTime()) { publishedDate = entry.getUpdated(); diff --git a/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java b/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java index d40f2f0a..64f7a267 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java @@ -90,8 +90,9 @@ public class FeedRefreshWorker { Date disabledUntil = null; if (applicationSettingsService.get().isHeavyLoad()) { - disabledUntil = FeedUtils.buildDisabledUntil( - fetchedFeed.getLastEntryDate(), entries); + disabledUntil = FeedUtils.buildDisabledUntil(fetchedFeed + .getFeed().getLastEntryDate(), fetchedFeed.getFeed() + .getAverageEntryInterval()); } feed.setLastUpdateSuccess(now); @@ -102,12 +103,14 @@ public class FeedRefreshWorker { feed.setLastContentHash(fetchedFeed.getFeed().getLastContentHash()); feed.setLastPublishedDate(fetchedFeed.getFeed() .getLastPublishedDate()); + feed.setAverageEntryInterval(fetchedFeed.getFeed() + .getAverageEntryInterval()); feed.setErrorCount(0); feed.setMessage(null); feed.setDisabledUntil(disabledUntil); - handlePubSub(feed, fetchedFeed); + handlePubSub(feed); feedRefreshUpdater.updateFeed(feed, entries); } catch (NotModifiedException e) { @@ -115,26 +118,9 @@ public class FeedRefreshWorker { Date disabledUntil = null; if (applicationSettingsService.get().isHeavyLoad()) { - - Date lastUpdateSuccess = feed.getLastUpdateSuccess(); - Date lastDisabledUntil = feed.getDisabledUntil(); - if (feed.getErrorCount() == 0 && lastUpdateSuccess != null - && lastDisabledUntil != null - && lastUpdateSuccess.before(lastDisabledUntil)) { - long millis = now.getTime() + lastDisabledUntil.getTime() - - lastUpdateSuccess.getTime(); - disabledUntil = new Date(millis); - } else { - List feedEntries = feedEntryDAO.findByFeed(feed, - 0, 10); - - Date publishedDate = null; - if (feedEntries.size() > 0) { - publishedDate = feedEntries.get(0).getInserted(); - } - disabledUntil = FeedUtils.buildDisabledUntil(publishedDate, - feedEntries); - } + disabledUntil = FeedUtils + .buildDisabledUntil(feed.getLastEntryDate(), + feed.getAverageEntryInterval()); } feed.setErrorCount(0); feed.setMessage(null); @@ -159,9 +145,9 @@ public class FeedRefreshWorker { } } - private void handlePubSub(Feed feed, FetchedFeed fetchedFeed) { - String hub = fetchedFeed.getHub(); - String topic = fetchedFeed.getTopic(); + private void handlePubSub(Feed feed) { + String hub = feed.getPushHub(); + String topic = feed.getPushTopic(); if (hub != null && topic != null) { if (hub.contains("hubbub.api.typepad.com")) { // that hub does not exist anymore diff --git a/src/main/java/com/commafeed/backend/feeds/FeedUtils.java b/src/main/java/com/commafeed/backend/feeds/FeedUtils.java index 5cff969d..88aee7e9 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedUtils.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedUtils.java @@ -8,7 +8,6 @@ import java.util.Collections; import java.util.Date; import java.util.List; -import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.ObjectUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.time.DateUtils; @@ -194,7 +193,7 @@ public class FeedUtils { * When the feed was refreshed successfully */ public static Date buildDisabledUntil(Date publishedDate, - List entries) { + Long averageEntryInterval) { Date now = Calendar.getInstance().getTime(); if (publishedDate == null) { @@ -209,19 +208,22 @@ public class FeedUtils { } else if (publishedDate.before(DateUtils.addDays(now, -7))) { // older than a week, recheck in 6 hours return DateUtils.addHours(now, 6); - } else if (CollectionUtils.isNotEmpty(entries)) { + } else if (averageEntryInterval != null) { // use average time between entries to decide when to refresh next - long average = averageTimeBetweenEntries(entries); int factor = 2; return new Date(Math.min(DateUtils.addHours(now, 6).getTime(), - now.getTime() + average / factor)); + now.getTime() + averageEntryInterval / factor)); } else { // unknown case, recheck in 24 hours return DateUtils.addHours(now, 24); } } - public static long averageTimeBetweenEntries(List entries) { + public static Long averageTimeBetweenEntries(List entries) { + if (entries.isEmpty() || entries.size() == 1) { + return null; + } + List timestamps = getSortedTimestamps(entries); SummaryStatistics stats = new SummaryStatistics(); diff --git a/src/main/java/com/commafeed/backend/feeds/FetchedFeed.java b/src/main/java/com/commafeed/backend/feeds/FetchedFeed.java index 45f72bfb..77fab91f 100644 --- a/src/main/java/com/commafeed/backend/feeds/FetchedFeed.java +++ b/src/main/java/com/commafeed/backend/feeds/FetchedFeed.java @@ -1,6 +1,5 @@ package com.commafeed.backend.feeds; -import java.util.Date; import java.util.List; import com.commafeed.backend.model.Feed; @@ -14,17 +13,6 @@ public class FetchedFeed { private String title; private long fetchDuration; - private Date lastEntryDate; - - /** - * pubsubhubbub hub url - */ - private String hub; - - /** - * pubsubhubbub topic - */ - private String topic; public Feed getFeed() { return feed; @@ -57,29 +45,4 @@ public class FetchedFeed { public void setFetchDuration(long fetchDuration) { this.fetchDuration = fetchDuration; } - - public String getHub() { - return hub; - } - - public void setHub(String hub) { - this.hub = hub; - } - - public String getTopic() { - return topic; - } - - public void setTopic(String topic) { - this.topic = topic; - } - - public Date getLastEntryDate() { - return lastEntryDate; - } - - public void setLastEntryDate(Date lastEntryDate) { - this.lastEntryDate = lastEntryDate; - } - } diff --git a/src/main/java/com/commafeed/backend/model/Feed.java b/src/main/java/com/commafeed/backend/model/Feed.java index aa54e332..3af09bd7 100644 --- a/src/main/java/com/commafeed/backend/model/Feed.java +++ b/src/main/java/com/commafeed/backend/model/Feed.java @@ -51,27 +51,56 @@ public class Feed extends AbstractModel { @Temporal(TemporalType.TIMESTAMP) private Date lastPublishedDate; + /** + * date of the last entry of the feed + */ + @Temporal(TemporalType.TIMESTAMP) + private Date lastEntryDate; + /** * Last time we successfully refreshed the feed */ @Temporal(TemporalType.TIMESTAMP) private Date lastUpdateSuccess; + /** + * error message while retrieving the feed + */ @Column(length = 1024) private String message; + /** + * times we failed to retrieve the feed + */ private int errorCount; + /** + * feed refresh is disabled until this date + */ @Temporal(TemporalType.TIMESTAMP) @Index(name = "disableduntil_index") private Date disabledUntil; + /** + * http header returned by the feed + */ @Column(length = 64) private String lastModifiedHeader; + /** + * http header returned by the feed + */ @Column(length = 255) private String etagHeader; + /** + * average time between entries in the feed + */ + private Long averageEntryInterval; + + /** + * last hash of the content of the feed xml + */ @Column(length = 40) private String lastContentHash; @@ -81,13 +110,22 @@ public class Feed extends AbstractModel { @OneToMany(mappedBy = "feed") private Set subscriptions; + /** + * detected hub for pubsubhubbub + */ @Column(length = 2048) private String pushHub; + /** + * detected topic for pubsubhubbub + */ @Column(length = 2048) @Index(name = "topic_index") private String pushTopic; + /** + * last time we subscribed for that topic on that hub + */ @Temporal(TemporalType.TIMESTAMP) private Date pushLastPing; @@ -235,4 +273,20 @@ public class Feed extends AbstractModel { this.lastContentHash = lastContentHash; } + public Long getAverageEntryInterval() { + return averageEntryInterval; + } + + public void setAverageEntryInterval(Long averageEntryInterval) { + this.averageEntryInterval = averageEntryInterval; + } + + public Date getLastEntryDate() { + return lastEntryDate; + } + + public void setLastEntryDate(Date lastEntryDate) { + this.lastEntryDate = lastEntryDate; + } + } diff --git a/src/main/java/com/commafeed/frontend/rest/resources/PubSubHubbubCallbackREST.java b/src/main/java/com/commafeed/frontend/rest/resources/PubSubHubbubCallbackREST.java index 954b928f..d5d5cd50 100644 --- a/src/main/java/com/commafeed/frontend/rest/resources/PubSubHubbubCallbackREST.java +++ b/src/main/java/com/commafeed/frontend/rest/resources/PubSubHubbubCallbackREST.java @@ -82,7 +82,7 @@ public class PubSubHubbubCallbackREST { try { byte[] bytes = IOUtils.toByteArray(request.getInputStream()); FetchedFeed fetchedFeed = parser.parse(null, bytes); - String topic = fetchedFeed.getTopic(); + String topic = fetchedFeed.getFeed().getPushTopic(); if (topic != null) { log.debug("content callback received for {}", topic); List feeds = feedDAO.findByTopic(topic);