forked from Archives/Athou_commafeed
check if the feed has changed by using publishDate of the feed or the first entry
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
package com.commafeed.backend.dao;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
@@ -38,13 +39,13 @@ public abstract class GenericDAO<T extends AbstractModel> {
|
||||
em.persist(object);
|
||||
}
|
||||
|
||||
public void save(List<T> objects) {
|
||||
public void save(Collection<T> objects) {
|
||||
for (Object object : objects) {
|
||||
em.persist(object);
|
||||
}
|
||||
}
|
||||
|
||||
public void update(List<T> objects) {
|
||||
public void update(Collection<T> objects) {
|
||||
for (Object object : objects) {
|
||||
em.merge(object);
|
||||
}
|
||||
|
||||
@@ -36,16 +36,18 @@ public class FeedFetcher {
|
||||
|
||||
HttpResult result = getter.getBinary(feedUrl, lastModified, eTag);
|
||||
if (extractFeedUrlFromHtml) {
|
||||
String extractedUrl = extractFeedUrl(StringUtils
|
||||
.newStringUtf8(result.getContent()), feedUrl);
|
||||
String extractedUrl = extractFeedUrl(
|
||||
StringUtils.newStringUtf8(result.getContent()), feedUrl);
|
||||
if (org.apache.commons.lang.StringUtils.isNotBlank(extractedUrl)) {
|
||||
result = getter.getBinary(extractedUrl, lastModified, eTag);
|
||||
feedUrl = extractedUrl;
|
||||
}
|
||||
}
|
||||
feed = parser.parse(feedUrl, result.getContent());
|
||||
|
||||
feed.setLastModifiedHeader(result.getLastModifiedSince());
|
||||
feed.setEtagHeader(result.geteTag());
|
||||
feed.setFetchDuration(result.getDuration());
|
||||
return feed;
|
||||
}
|
||||
|
||||
|
||||
@@ -39,8 +39,9 @@ public class FeedParser {
|
||||
|
||||
try {
|
||||
InputSource source = new InputSource(new ByteArrayInputStream(xml));
|
||||
if (new String(ArrayUtils.subarray(xml, 0, 100)).split(SystemUtils.LINE_SEPARATOR)[0]
|
||||
.toUpperCase().contains("ISO-8859-1")) {
|
||||
if (new String(ArrayUtils.subarray(xml, 0, 100))
|
||||
.split(SystemUtils.LINE_SEPARATOR)[0].toUpperCase()
|
||||
.contains("ISO-8859-1")) {
|
||||
// they probably use word, we need to handle curly quotes and
|
||||
// other word special characters
|
||||
source.setEncoding("windows-1252");
|
||||
@@ -70,6 +71,13 @@ public class FeedParser {
|
||||
|
||||
feed.getEntries().add(entry);
|
||||
}
|
||||
Date publishedDate = rss.getPublishedDate();
|
||||
if (publishedDate == null && !feed.getEntries().isEmpty()) {
|
||||
FeedEntry first = feed.getEntries().iterator().next();
|
||||
publishedDate = first.getUpdated();
|
||||
}
|
||||
feed.setPublishedDate(publishedDate);
|
||||
|
||||
} catch (Exception e) {
|
||||
throw new FeedException(String.format(
|
||||
"Could not parse feed from %s : %s", feedUrl,
|
||||
|
||||
@@ -86,6 +86,13 @@ public class FeedRefreshWorker {
|
||||
try {
|
||||
fetchedFeed = fetcher.fetch(feed.getUrl(), false,
|
||||
feed.getLastModifiedHeader(), feed.getEtagHeader());
|
||||
if (fetchedFeed.getPublishedDate() != null
|
||||
&& feed.getLastUpdateSuccess() != null
|
||||
&& fetchedFeed.getPublishedDate().before(
|
||||
feed.getLastUpdateSuccess())) {
|
||||
throw new NotModifiedException();
|
||||
}
|
||||
feed.setLastUpdateSuccess(Calendar.getInstance().getTime());
|
||||
} catch (NotModifiedException e) {
|
||||
modified = false;
|
||||
log.debug("Feed not modified (304) : " + feed.getUrl());
|
||||
|
||||
@@ -31,19 +31,44 @@ public class Feed extends AbstractModel {
|
||||
@Index(name = "urlHash_index")
|
||||
private String urlHash;
|
||||
|
||||
/**
|
||||
* title of the feed, used only when fetching, not stored
|
||||
*/
|
||||
@Transient
|
||||
private String title;
|
||||
|
||||
/**
|
||||
* time it took to fetch the feed, used only when fetching, not stored
|
||||
*/
|
||||
@Transient
|
||||
private long fetchDuration;
|
||||
|
||||
/**
|
||||
* extracted published date from the feed, used only when fetching, not
|
||||
* stored
|
||||
*/
|
||||
@Transient
|
||||
private Date publishedDate;
|
||||
|
||||
/**
|
||||
* The url of the website, extracted from the feed
|
||||
*/
|
||||
@Column(length = 2048)
|
||||
private String link;
|
||||
|
||||
/**
|
||||
* Last time we tried to fetch the feed
|
||||
*/
|
||||
@Temporal(TemporalType.TIMESTAMP)
|
||||
@Index(name = "lastupdated_index")
|
||||
private Date lastUpdated;
|
||||
|
||||
/**
|
||||
* Last time we successfully refreshed the feed
|
||||
*/
|
||||
@Temporal(TemporalType.TIMESTAMP)
|
||||
private Date lastUpdateSuccess;
|
||||
|
||||
@Column(length = 1024)
|
||||
private String message;
|
||||
|
||||
@@ -169,4 +194,28 @@ public class Feed extends AbstractModel {
|
||||
this.etagHeader = etagHeader;
|
||||
}
|
||||
|
||||
public long getFetchDuration() {
|
||||
return fetchDuration;
|
||||
}
|
||||
|
||||
public void setFetchDuration(long fetchDuration) {
|
||||
this.fetchDuration = fetchDuration;
|
||||
}
|
||||
|
||||
public Date getPublishedDate() {
|
||||
return publishedDate;
|
||||
}
|
||||
|
||||
public void setPublishedDate(Date publishedDate) {
|
||||
this.publishedDate = publishedDate;
|
||||
}
|
||||
|
||||
public Date getLastUpdateSuccess() {
|
||||
return lastUpdateSuccess;
|
||||
}
|
||||
|
||||
public void setLastUpdateSuccess(Date lastUpdateSuccess) {
|
||||
this.lastUpdateSuccess = lastUpdateSuccess;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user