diff --git a/src/main/java/com/commafeed/backend/feeds/FeedParser.java b/src/main/java/com/commafeed/backend/feeds/FeedParser.java index 60d57904..2a8349b3 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedParser.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedParser.java @@ -2,7 +2,6 @@ package com.commafeed.backend.feeds; import java.io.StringReader; import java.util.Calendar; -import java.util.Collection; import java.util.Date; import java.util.List; @@ -39,7 +38,7 @@ public class FeedParser { public FetchedFeed parse(String feedUrl, byte[] xml) throws FeedException { FetchedFeed fetchedFeed = new FetchedFeed(); Feed feed = fetchedFeed.getFeed(); - Collection entries = fetchedFeed.getEntries(); + List entries = fetchedFeed.getEntries(); feed.setLastUpdated(Calendar.getInstance().getTime()); try { @@ -75,10 +74,10 @@ public class FeedParser { entries.add(entry); } - Date publishedDate = validateDate(rss.getPublishedDate()); - if (publishedDate == null && !feed.getEntries().isEmpty()) { - FeedEntry first = entries.iterator().next(); - publishedDate = first.getUpdated(); + Date publishedDate = null; + if (!entries.isEmpty()) { + Long timestamp = FeedUtils.getSortedTimestamps(entries).get(0); + publishedDate = new Date(timestamp); } fetchedFeed.setPublishedDate(publishedDate); diff --git a/src/main/java/com/commafeed/backend/feeds/FeedUtils.java b/src/main/java/com/commafeed/backend/feeds/FeedUtils.java index 1109f32e..3591bf41 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedUtils.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedUtils.java @@ -131,7 +131,10 @@ public class FeedUtils { Date now = Calendar.getInstance().getTime(); Date publishedDate = feed.getPublishedDate(); - if (publishedDate.before(DateUtils.addMonths(now, -1))) { + if (publishedDate == null) { + // feed with no entries, recheck in 24 hours + return DateUtils.addHours(now, 24); + } else if (publishedDate.before(DateUtils.addMonths(now, -1))) { // older tahn a month, recheck in 24 hours return DateUtils.addHours(now, 24); } else if (publishedDate.before(DateUtils.addDays(now, -14))) { @@ -141,16 +144,28 @@ public class FeedUtils { // older than a week, recheck in 6 hours return DateUtils.addHours(now, 6); } else if (CollectionUtils.isNotEmpty(feed.getEntries())) { + // use average time between entries to decide when to refresh next long average = averageTimeBetweenEntries(feed.getEntries()); return new Date(Math.min(DateUtils.addHours(now, 6).getTime(), now.getTime() + average / 3)); } else { - // no entries in the feed, recheck in 24 hours + // unknown case, recheck in 24 hours return DateUtils.addHours(now, 24); } } public static long averageTimeBetweenEntries(List entries) { + List timestamps = getSortedTimestamps(entries); + + SummaryStatistics stats = new SummaryStatistics(); + for (int i = 0; i < timestamps.size() - 1; i++) { + long diff = Math.abs(timestamps.get(i) - timestamps.get(i + 1)); + stats.addValue(diff); + } + return (long) stats.getMean(); + } + + public static List getSortedTimestamps(List entries) { List timestamps = Lists.newArrayList(); int i = 0; for (FeedEntry entry : entries) { @@ -161,12 +176,6 @@ public class FeedUtils { } Collections.sort(timestamps); Collections.reverse(timestamps); - - SummaryStatistics stats = new SummaryStatistics(); - for (i = 0; i < timestamps.size() - 1; i++) { - long diff = Math.abs(timestamps.get(i) - timestamps.get(i + 1)); - stats.addValue(diff); - } - return (long) stats.getMean(); + return timestamps; } }