check if the feed has changed by using publishDate of the feed or the first entry

This commit is contained in:
Athou
2013-04-19 11:51:40 +02:00
parent f8a7241bc1
commit 7502707930
5 changed files with 73 additions and 6 deletions

View File

@@ -1,6 +1,7 @@
package com.commafeed.backend.dao;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import javax.annotation.PostConstruct;
@@ -38,13 +39,13 @@ public abstract class GenericDAO<T extends AbstractModel> {
em.persist(object);
}
public void save(List<T> objects) {
public void save(Collection<T> objects) {
for (Object object : objects) {
em.persist(object);
}
}
public void update(List<T> objects) {
public void update(Collection<T> objects) {
for (Object object : objects) {
em.merge(object);
}

View File

@@ -36,16 +36,18 @@ public class FeedFetcher {
HttpResult result = getter.getBinary(feedUrl, lastModified, eTag);
if (extractFeedUrlFromHtml) {
String extractedUrl = extractFeedUrl(StringUtils
.newStringUtf8(result.getContent()), feedUrl);
String extractedUrl = extractFeedUrl(
StringUtils.newStringUtf8(result.getContent()), feedUrl);
if (org.apache.commons.lang.StringUtils.isNotBlank(extractedUrl)) {
result = getter.getBinary(extractedUrl, lastModified, eTag);
feedUrl = extractedUrl;
}
}
feed = parser.parse(feedUrl, result.getContent());
feed.setLastModifiedHeader(result.getLastModifiedSince());
feed.setEtagHeader(result.geteTag());
feed.setFetchDuration(result.getDuration());
return feed;
}

View File

@@ -39,8 +39,9 @@ public class FeedParser {
try {
InputSource source = new InputSource(new ByteArrayInputStream(xml));
if (new String(ArrayUtils.subarray(xml, 0, 100)).split(SystemUtils.LINE_SEPARATOR)[0]
.toUpperCase().contains("ISO-8859-1")) {
if (new String(ArrayUtils.subarray(xml, 0, 100))
.split(SystemUtils.LINE_SEPARATOR)[0].toUpperCase()
.contains("ISO-8859-1")) {
// they probably use word, we need to handle curly quotes and
// other word special characters
source.setEncoding("windows-1252");
@@ -70,6 +71,13 @@ public class FeedParser {
feed.getEntries().add(entry);
}
Date publishedDate = rss.getPublishedDate();
if (publishedDate == null && !feed.getEntries().isEmpty()) {
FeedEntry first = feed.getEntries().iterator().next();
publishedDate = first.getUpdated();
}
feed.setPublishedDate(publishedDate);
} catch (Exception e) {
throw new FeedException(String.format(
"Could not parse feed from %s : %s", feedUrl,

View File

@@ -86,6 +86,13 @@ public class FeedRefreshWorker {
try {
fetchedFeed = fetcher.fetch(feed.getUrl(), false,
feed.getLastModifiedHeader(), feed.getEtagHeader());
if (fetchedFeed.getPublishedDate() != null
&& feed.getLastUpdateSuccess() != null
&& fetchedFeed.getPublishedDate().before(
feed.getLastUpdateSuccess())) {
throw new NotModifiedException();
}
feed.setLastUpdateSuccess(Calendar.getInstance().getTime());
} catch (NotModifiedException e) {
modified = false;
log.debug("Feed not modified (304) : " + feed.getUrl());

View File

@@ -31,19 +31,44 @@ public class Feed extends AbstractModel {
@Index(name = "urlHash_index")
private String urlHash;
/**
* title of the feed, used only when fetching, not stored
*/
@Transient
private String title;
/**
* time it took to fetch the feed, used only when fetching, not stored
*/
@Transient
private long fetchDuration;
/**
* extracted published date from the feed, used only when fetching, not
* stored
*/
@Transient
private Date publishedDate;
/**
* The url of the website, extracted from the feed
*/
@Column(length = 2048)
private String link;
/**
* Last time we tried to fetch the feed
*/
@Temporal(TemporalType.TIMESTAMP)
@Index(name = "lastupdated_index")
private Date lastUpdated;
/**
* Last time we successfully refreshed the feed
*/
@Temporal(TemporalType.TIMESTAMP)
private Date lastUpdateSuccess;
@Column(length = 1024)
private String message;
@@ -169,4 +194,28 @@ public class Feed extends AbstractModel {
this.etagHeader = etagHeader;
}
public long getFetchDuration() {
return fetchDuration;
}
public void setFetchDuration(long fetchDuration) {
this.fetchDuration = fetchDuration;
}
public Date getPublishedDate() {
return publishedDate;
}
public void setPublishedDate(Date publishedDate) {
this.publishedDate = publishedDate;
}
public Date getLastUpdateSuccess() {
return lastUpdateSuccess;
}
public void setLastUpdateSuccess(Date lastUpdateSuccess) {
this.lastUpdateSuccess = lastUpdateSuccess;
}
}