forked from Archives/Athou_commafeed
check if the feed has changed by using publishDate of the feed or the first entry
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
package com.commafeed.backend.dao;
|
package com.commafeed.backend.dao;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import javax.annotation.PostConstruct;
|
import javax.annotation.PostConstruct;
|
||||||
@@ -38,13 +39,13 @@ public abstract class GenericDAO<T extends AbstractModel> {
|
|||||||
em.persist(object);
|
em.persist(object);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void save(List<T> objects) {
|
public void save(Collection<T> objects) {
|
||||||
for (Object object : objects) {
|
for (Object object : objects) {
|
||||||
em.persist(object);
|
em.persist(object);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void update(List<T> objects) {
|
public void update(Collection<T> objects) {
|
||||||
for (Object object : objects) {
|
for (Object object : objects) {
|
||||||
em.merge(object);
|
em.merge(object);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -36,16 +36,18 @@ public class FeedFetcher {
|
|||||||
|
|
||||||
HttpResult result = getter.getBinary(feedUrl, lastModified, eTag);
|
HttpResult result = getter.getBinary(feedUrl, lastModified, eTag);
|
||||||
if (extractFeedUrlFromHtml) {
|
if (extractFeedUrlFromHtml) {
|
||||||
String extractedUrl = extractFeedUrl(StringUtils
|
String extractedUrl = extractFeedUrl(
|
||||||
.newStringUtf8(result.getContent()), feedUrl);
|
StringUtils.newStringUtf8(result.getContent()), feedUrl);
|
||||||
if (org.apache.commons.lang.StringUtils.isNotBlank(extractedUrl)) {
|
if (org.apache.commons.lang.StringUtils.isNotBlank(extractedUrl)) {
|
||||||
result = getter.getBinary(extractedUrl, lastModified, eTag);
|
result = getter.getBinary(extractedUrl, lastModified, eTag);
|
||||||
feedUrl = extractedUrl;
|
feedUrl = extractedUrl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
feed = parser.parse(feedUrl, result.getContent());
|
feed = parser.parse(feedUrl, result.getContent());
|
||||||
|
|
||||||
feed.setLastModifiedHeader(result.getLastModifiedSince());
|
feed.setLastModifiedHeader(result.getLastModifiedSince());
|
||||||
feed.setEtagHeader(result.geteTag());
|
feed.setEtagHeader(result.geteTag());
|
||||||
|
feed.setFetchDuration(result.getDuration());
|
||||||
return feed;
|
return feed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -39,8 +39,9 @@ public class FeedParser {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
InputSource source = new InputSource(new ByteArrayInputStream(xml));
|
InputSource source = new InputSource(new ByteArrayInputStream(xml));
|
||||||
if (new String(ArrayUtils.subarray(xml, 0, 100)).split(SystemUtils.LINE_SEPARATOR)[0]
|
if (new String(ArrayUtils.subarray(xml, 0, 100))
|
||||||
.toUpperCase().contains("ISO-8859-1")) {
|
.split(SystemUtils.LINE_SEPARATOR)[0].toUpperCase()
|
||||||
|
.contains("ISO-8859-1")) {
|
||||||
// they probably use word, we need to handle curly quotes and
|
// they probably use word, we need to handle curly quotes and
|
||||||
// other word special characters
|
// other word special characters
|
||||||
source.setEncoding("windows-1252");
|
source.setEncoding("windows-1252");
|
||||||
@@ -70,6 +71,13 @@ public class FeedParser {
|
|||||||
|
|
||||||
feed.getEntries().add(entry);
|
feed.getEntries().add(entry);
|
||||||
}
|
}
|
||||||
|
Date publishedDate = rss.getPublishedDate();
|
||||||
|
if (publishedDate == null && !feed.getEntries().isEmpty()) {
|
||||||
|
FeedEntry first = feed.getEntries().iterator().next();
|
||||||
|
publishedDate = first.getUpdated();
|
||||||
|
}
|
||||||
|
feed.setPublishedDate(publishedDate);
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new FeedException(String.format(
|
throw new FeedException(String.format(
|
||||||
"Could not parse feed from %s : %s", feedUrl,
|
"Could not parse feed from %s : %s", feedUrl,
|
||||||
|
|||||||
@@ -86,6 +86,13 @@ public class FeedRefreshWorker {
|
|||||||
try {
|
try {
|
||||||
fetchedFeed = fetcher.fetch(feed.getUrl(), false,
|
fetchedFeed = fetcher.fetch(feed.getUrl(), false,
|
||||||
feed.getLastModifiedHeader(), feed.getEtagHeader());
|
feed.getLastModifiedHeader(), feed.getEtagHeader());
|
||||||
|
if (fetchedFeed.getPublishedDate() != null
|
||||||
|
&& feed.getLastUpdateSuccess() != null
|
||||||
|
&& fetchedFeed.getPublishedDate().before(
|
||||||
|
feed.getLastUpdateSuccess())) {
|
||||||
|
throw new NotModifiedException();
|
||||||
|
}
|
||||||
|
feed.setLastUpdateSuccess(Calendar.getInstance().getTime());
|
||||||
} catch (NotModifiedException e) {
|
} catch (NotModifiedException e) {
|
||||||
modified = false;
|
modified = false;
|
||||||
log.debug("Feed not modified (304) : " + feed.getUrl());
|
log.debug("Feed not modified (304) : " + feed.getUrl());
|
||||||
|
|||||||
@@ -31,19 +31,44 @@ public class Feed extends AbstractModel {
|
|||||||
@Index(name = "urlHash_index")
|
@Index(name = "urlHash_index")
|
||||||
private String urlHash;
|
private String urlHash;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* title of the feed, used only when fetching, not stored
|
||||||
|
*/
|
||||||
@Transient
|
@Transient
|
||||||
private String title;
|
private String title;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* time it took to fetch the feed, used only when fetching, not stored
|
||||||
|
*/
|
||||||
|
@Transient
|
||||||
|
private long fetchDuration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* extracted published date from the feed, used only when fetching, not
|
||||||
|
* stored
|
||||||
|
*/
|
||||||
|
@Transient
|
||||||
|
private Date publishedDate;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The url of the website, extracted from the feed
|
* The url of the website, extracted from the feed
|
||||||
*/
|
*/
|
||||||
@Column(length = 2048)
|
@Column(length = 2048)
|
||||||
private String link;
|
private String link;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Last time we tried to fetch the feed
|
||||||
|
*/
|
||||||
@Temporal(TemporalType.TIMESTAMP)
|
@Temporal(TemporalType.TIMESTAMP)
|
||||||
@Index(name = "lastupdated_index")
|
@Index(name = "lastupdated_index")
|
||||||
private Date lastUpdated;
|
private Date lastUpdated;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Last time we successfully refreshed the feed
|
||||||
|
*/
|
||||||
|
@Temporal(TemporalType.TIMESTAMP)
|
||||||
|
private Date lastUpdateSuccess;
|
||||||
|
|
||||||
@Column(length = 1024)
|
@Column(length = 1024)
|
||||||
private String message;
|
private String message;
|
||||||
|
|
||||||
@@ -169,4 +194,28 @@ public class Feed extends AbstractModel {
|
|||||||
this.etagHeader = etagHeader;
|
this.etagHeader = etagHeader;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long getFetchDuration() {
|
||||||
|
return fetchDuration;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFetchDuration(long fetchDuration) {
|
||||||
|
this.fetchDuration = fetchDuration;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Date getPublishedDate() {
|
||||||
|
return publishedDate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPublishedDate(Date publishedDate) {
|
||||||
|
this.publishedDate = publishedDate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Date getLastUpdateSuccess() {
|
||||||
|
return lastUpdateSuccess;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLastUpdateSuccess(Date lastUpdateSuccess) {
|
||||||
|
this.lastUpdateSuccess = lastUpdateSuccess;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user