store average entry interval and use it to decide when to reschedule feed refresh when it returns 304

This commit is contained in:
Athou
2013-06-10 12:53:46 +02:00
parent 3cfc37c841
commit 2baa2bbfe5
6 changed files with 81 additions and 74 deletions

View File

@@ -68,8 +68,8 @@ public class FeedParser {
handleForeignMarkup(rss);
fetchedFeed.setTitle(rss.getTitle());
fetchedFeed.setHub(findHub(rss));
fetchedFeed.setTopic(findSelf(rss));
feed.setPushHub(findHub(rss));
feed.setPushTopic(findSelf(rss));
feed.setUrl(feedUrl);
feed.setLink(rss.getLink());
List<SyndEntry> items = rss.getEntries();
@@ -117,7 +117,9 @@ public class FeedParser {
publishedDate = getFeedPublishedDate(publishedDate, entries);
}
feed.setLastPublishedDate(publishedDate);
fetchedFeed.setLastEntryDate(lastEntryDate);
feed.setAverageEntryInterval(FeedUtils
.averageTimeBetweenEntries(entries));
feed.setLastEntryDate(lastEntryDate);
} catch (Exception e) {
throw new FeedException(String.format(
@@ -158,7 +160,7 @@ public class FeedParser {
if (publishedDate == null) {
return null;
}
for (FeedEntry entry : entries) {
if (entry.getUpdated().getTime() > publishedDate.getTime()) {
publishedDate = entry.getUpdated();

View File

@@ -90,8 +90,9 @@ public class FeedRefreshWorker {
Date disabledUntil = null;
if (applicationSettingsService.get().isHeavyLoad()) {
disabledUntil = FeedUtils.buildDisabledUntil(
fetchedFeed.getLastEntryDate(), entries);
disabledUntil = FeedUtils.buildDisabledUntil(fetchedFeed
.getFeed().getLastEntryDate(), fetchedFeed.getFeed()
.getAverageEntryInterval());
}
feed.setLastUpdateSuccess(now);
@@ -102,12 +103,14 @@ public class FeedRefreshWorker {
feed.setLastContentHash(fetchedFeed.getFeed().getLastContentHash());
feed.setLastPublishedDate(fetchedFeed.getFeed()
.getLastPublishedDate());
feed.setAverageEntryInterval(fetchedFeed.getFeed()
.getAverageEntryInterval());
feed.setErrorCount(0);
feed.setMessage(null);
feed.setDisabledUntil(disabledUntil);
handlePubSub(feed, fetchedFeed);
handlePubSub(feed);
feedRefreshUpdater.updateFeed(feed, entries);
} catch (NotModifiedException e) {
@@ -115,26 +118,9 @@ public class FeedRefreshWorker {
Date disabledUntil = null;
if (applicationSettingsService.get().isHeavyLoad()) {
Date lastUpdateSuccess = feed.getLastUpdateSuccess();
Date lastDisabledUntil = feed.getDisabledUntil();
if (feed.getErrorCount() == 0 && lastUpdateSuccess != null
&& lastDisabledUntil != null
&& lastUpdateSuccess.before(lastDisabledUntil)) {
long millis = now.getTime() + lastDisabledUntil.getTime()
- lastUpdateSuccess.getTime();
disabledUntil = new Date(millis);
} else {
List<FeedEntry> feedEntries = feedEntryDAO.findByFeed(feed,
0, 10);
Date publishedDate = null;
if (feedEntries.size() > 0) {
publishedDate = feedEntries.get(0).getInserted();
}
disabledUntil = FeedUtils.buildDisabledUntil(publishedDate,
feedEntries);
}
disabledUntil = FeedUtils
.buildDisabledUntil(feed.getLastEntryDate(),
feed.getAverageEntryInterval());
}
feed.setErrorCount(0);
feed.setMessage(null);
@@ -159,9 +145,9 @@ public class FeedRefreshWorker {
}
}
private void handlePubSub(Feed feed, FetchedFeed fetchedFeed) {
String hub = fetchedFeed.getHub();
String topic = fetchedFeed.getTopic();
private void handlePubSub(Feed feed) {
String hub = feed.getPushHub();
String topic = feed.getPushTopic();
if (hub != null && topic != null) {
if (hub.contains("hubbub.api.typepad.com")) {
// that hub does not exist anymore

View File

@@ -8,7 +8,6 @@ import java.util.Collections;
import java.util.Date;
import java.util.List;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.ObjectUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.time.DateUtils;
@@ -194,7 +193,7 @@ public class FeedUtils {
* When the feed was refreshed successfully
*/
public static Date buildDisabledUntil(Date publishedDate,
List<FeedEntry> entries) {
Long averageEntryInterval) {
Date now = Calendar.getInstance().getTime();
if (publishedDate == null) {
@@ -209,19 +208,22 @@ public class FeedUtils {
} else if (publishedDate.before(DateUtils.addDays(now, -7))) {
// older than a week, recheck in 6 hours
return DateUtils.addHours(now, 6);
} else if (CollectionUtils.isNotEmpty(entries)) {
} else if (averageEntryInterval != null) {
// use average time between entries to decide when to refresh next
long average = averageTimeBetweenEntries(entries);
int factor = 2;
return new Date(Math.min(DateUtils.addHours(now, 6).getTime(),
now.getTime() + average / factor));
now.getTime() + averageEntryInterval / factor));
} else {
// unknown case, recheck in 24 hours
return DateUtils.addHours(now, 24);
}
}
public static long averageTimeBetweenEntries(List<FeedEntry> entries) {
public static Long averageTimeBetweenEntries(List<FeedEntry> entries) {
if (entries.isEmpty() || entries.size() == 1) {
return null;
}
List<Long> timestamps = getSortedTimestamps(entries);
SummaryStatistics stats = new SummaryStatistics();

View File

@@ -1,6 +1,5 @@
package com.commafeed.backend.feeds;
import java.util.Date;
import java.util.List;
import com.commafeed.backend.model.Feed;
@@ -14,17 +13,6 @@ public class FetchedFeed {
private String title;
private long fetchDuration;
private Date lastEntryDate;
/**
* pubsubhubbub hub url
*/
private String hub;
/**
* pubsubhubbub topic
*/
private String topic;
public Feed getFeed() {
return feed;
@@ -57,29 +45,4 @@ public class FetchedFeed {
public void setFetchDuration(long fetchDuration) {
this.fetchDuration = fetchDuration;
}
public String getHub() {
return hub;
}
public void setHub(String hub) {
this.hub = hub;
}
public String getTopic() {
return topic;
}
public void setTopic(String topic) {
this.topic = topic;
}
public Date getLastEntryDate() {
return lastEntryDate;
}
public void setLastEntryDate(Date lastEntryDate) {
this.lastEntryDate = lastEntryDate;
}
}

View File

@@ -51,27 +51,56 @@ public class Feed extends AbstractModel {
@Temporal(TemporalType.TIMESTAMP)
private Date lastPublishedDate;
/**
* date of the last entry of the feed
*/
@Temporal(TemporalType.TIMESTAMP)
private Date lastEntryDate;
/**
* Last time we successfully refreshed the feed
*/
@Temporal(TemporalType.TIMESTAMP)
private Date lastUpdateSuccess;
/**
* error message while retrieving the feed
*/
@Column(length = 1024)
private String message;
/**
* times we failed to retrieve the feed
*/
private int errorCount;
/**
* feed refresh is disabled until this date
*/
@Temporal(TemporalType.TIMESTAMP)
@Index(name = "disableduntil_index")
private Date disabledUntil;
/**
* http header returned by the feed
*/
@Column(length = 64)
private String lastModifiedHeader;
/**
* http header returned by the feed
*/
@Column(length = 255)
private String etagHeader;
/**
* average time between entries in the feed
*/
private Long averageEntryInterval;
/**
* last hash of the content of the feed xml
*/
@Column(length = 40)
private String lastContentHash;
@@ -81,13 +110,22 @@ public class Feed extends AbstractModel {
@OneToMany(mappedBy = "feed")
private Set<FeedSubscription> subscriptions;
/**
* detected hub for pubsubhubbub
*/
@Column(length = 2048)
private String pushHub;
/**
* detected topic for pubsubhubbub
*/
@Column(length = 2048)
@Index(name = "topic_index")
private String pushTopic;
/**
* last time we subscribed for that topic on that hub
*/
@Temporal(TemporalType.TIMESTAMP)
private Date pushLastPing;
@@ -235,4 +273,20 @@ public class Feed extends AbstractModel {
this.lastContentHash = lastContentHash;
}
public Long getAverageEntryInterval() {
return averageEntryInterval;
}
public void setAverageEntryInterval(Long averageEntryInterval) {
this.averageEntryInterval = averageEntryInterval;
}
public Date getLastEntryDate() {
return lastEntryDate;
}
public void setLastEntryDate(Date lastEntryDate) {
this.lastEntryDate = lastEntryDate;
}
}

View File

@@ -82,7 +82,7 @@ public class PubSubHubbubCallbackREST {
try {
byte[] bytes = IOUtils.toByteArray(request.getInputStream());
FetchedFeed fetchedFeed = parser.parse(null, bytes);
String topic = fetchedFeed.getTopic();
String topic = fetchedFeed.getFeed().getPushTopic();
if (topic != null) {
log.debug("content callback received for {}", topic);
List<Feed> feeds = feedDAO.findByTopic(topic);