mirror of
https://github.com/Athou/commafeed.git
synced 2026-03-21 21:37:29 +00:00
store feed content hash
This commit is contained in:
@@ -6,6 +6,7 @@ import java.util.Date;
|
|||||||
import javax.inject.Inject;
|
import javax.inject.Inject;
|
||||||
|
|
||||||
import org.apache.commons.codec.binary.StringUtils;
|
import org.apache.commons.codec.binary.StringUtils;
|
||||||
|
import org.apache.commons.codec.digest.DigestUtils;
|
||||||
import org.apache.http.client.ClientProtocolException;
|
import org.apache.http.client.ClientProtocolException;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
@@ -30,9 +31,9 @@ public class FeedFetcher {
|
|||||||
HttpGetter getter;
|
HttpGetter getter;
|
||||||
|
|
||||||
public FetchedFeed fetch(String feedUrl, boolean extractFeedUrlFromHtml,
|
public FetchedFeed fetch(String feedUrl, boolean extractFeedUrlFromHtml,
|
||||||
String lastModified, String eTag, Date lastPublishedDate)
|
String lastModified, String eTag, Date lastPublishedDate,
|
||||||
throws FeedException, ClientProtocolException, IOException,
|
String lastContentHash) throws FeedException,
|
||||||
NotModifiedException {
|
ClientProtocolException, IOException, NotModifiedException {
|
||||||
log.debug("Fetching feed {}", feedUrl);
|
log.debug("Fetching feed {}", feedUrl);
|
||||||
FetchedFeed fetchedFeed = null;
|
FetchedFeed fetchedFeed = null;
|
||||||
|
|
||||||
@@ -45,24 +46,33 @@ public class FeedFetcher {
|
|||||||
feedUrl = extractedUrl;
|
feedUrl = extractedUrl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (result.getContent() == null) {
|
byte[] content = result.getContent();
|
||||||
|
|
||||||
|
if (content == null) {
|
||||||
throw new IOException("Feed content is empty.");
|
throw new IOException("Feed content is empty.");
|
||||||
}
|
}
|
||||||
|
|
||||||
fetchedFeed = parser.parse(feedUrl, result.getContent());
|
String hash = DigestUtils.sha1Hex(content);
|
||||||
|
if (lastContentHash != null && hash != null
|
||||||
|
&& lastContentHash.equals(hash)) {
|
||||||
|
log.debug("content hash not modified: {}", feedUrl);
|
||||||
|
throw new NotModifiedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
fetchedFeed = parser.parse(feedUrl, content);
|
||||||
|
|
||||||
if (lastPublishedDate != null
|
if (lastPublishedDate != null
|
||||||
&& fetchedFeed.getFeed().getLastPublishedDate() != null
|
&& fetchedFeed.getFeed().getLastPublishedDate() != null
|
||||||
&& lastPublishedDate.getTime() == fetchedFeed.getFeed()
|
&& lastPublishedDate.getTime() == fetchedFeed.getFeed()
|
||||||
.getLastPublishedDate().getTime()) {
|
.getLastPublishedDate().getTime()) {
|
||||||
log.debug("publishedDate not modified: {}", fetchedFeed.getFeed()
|
log.debug("publishedDate not modified: {}", feedUrl);
|
||||||
.getUrl());
|
|
||||||
throw new NotModifiedException();
|
throw new NotModifiedException();
|
||||||
}
|
}
|
||||||
|
|
||||||
Feed feed = fetchedFeed.getFeed();
|
Feed feed = fetchedFeed.getFeed();
|
||||||
feed.setLastModifiedHeader(result.getLastModifiedSince());
|
feed.setLastModifiedHeader(result.getLastModifiedSince());
|
||||||
feed.setEtagHeader(FeedUtils.truncate(result.geteTag(), 255));
|
feed.setEtagHeader(FeedUtils.truncate(result.geteTag(), 255));
|
||||||
|
feed.setLastContentHash(hash);
|
||||||
fetchedFeed.setFetchDuration(result.getDuration());
|
fetchedFeed.setFetchDuration(result.getDuration());
|
||||||
return fetchedFeed;
|
return fetchedFeed;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -83,7 +83,7 @@ public class FeedRefreshWorker {
|
|||||||
try {
|
try {
|
||||||
FetchedFeed fetchedFeed = fetcher.fetch(feed.getUrl(), false,
|
FetchedFeed fetchedFeed = fetcher.fetch(feed.getUrl(), false,
|
||||||
feed.getLastModifiedHeader(), feed.getEtagHeader(),
|
feed.getLastModifiedHeader(), feed.getEtagHeader(),
|
||||||
feed.getLastPublishedDate());
|
feed.getLastPublishedDate(), feed.getLastContentHash());
|
||||||
// stops here if NotModifiedException or any other exception is
|
// stops here if NotModifiedException or any other exception is
|
||||||
// thrown
|
// thrown
|
||||||
List<FeedEntry> entries = fetchedFeed.getEntries();
|
List<FeedEntry> entries = fetchedFeed.getEntries();
|
||||||
@@ -99,6 +99,7 @@ public class FeedRefreshWorker {
|
|||||||
feed.setLastModifiedHeader(fetchedFeed.getFeed()
|
feed.setLastModifiedHeader(fetchedFeed.getFeed()
|
||||||
.getLastModifiedHeader());
|
.getLastModifiedHeader());
|
||||||
feed.setEtagHeader(fetchedFeed.getFeed().getEtagHeader());
|
feed.setEtagHeader(fetchedFeed.getFeed().getEtagHeader());
|
||||||
|
feed.setLastContentHash(fetchedFeed.getFeed().getLastContentHash());
|
||||||
feed.setLastPublishedDate(fetchedFeed.getFeed()
|
feed.setLastPublishedDate(fetchedFeed.getFeed()
|
||||||
.getLastPublishedDate());
|
.getLastPublishedDate());
|
||||||
|
|
||||||
@@ -144,9 +145,9 @@ public class FeedRefreshWorker {
|
|||||||
String message = "Unable to refresh feed " + feed.getUrl() + " : "
|
String message = "Unable to refresh feed " + feed.getUrl() + " : "
|
||||||
+ e.getMessage();
|
+ e.getMessage();
|
||||||
if (e instanceof FeedException) {
|
if (e instanceof FeedException) {
|
||||||
log.debug(e.getClass().getName() + " " + message);
|
log.debug(e.getClass().getName() + " " + message, e);
|
||||||
} else {
|
} else {
|
||||||
log.debug(e.getClass().getName() + " " + message);
|
log.debug(e.getClass().getName() + " " + message, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
feed.setErrorCount(feed.getErrorCount() + 1);
|
feed.setErrorCount(feed.getErrorCount() + 1);
|
||||||
|
|||||||
@@ -72,6 +72,9 @@ public class Feed extends AbstractModel {
|
|||||||
@Column(length = 255)
|
@Column(length = 255)
|
||||||
private String etagHeader;
|
private String etagHeader;
|
||||||
|
|
||||||
|
@Column(length = 40)
|
||||||
|
private String lastContentHash;
|
||||||
|
|
||||||
@ManyToMany(mappedBy = "feeds")
|
@ManyToMany(mappedBy = "feeds")
|
||||||
private Set<FeedEntry> entries = Sets.newHashSet();
|
private Set<FeedEntry> entries = Sets.newHashSet();
|
||||||
|
|
||||||
@@ -224,4 +227,12 @@ public class Feed extends AbstractModel {
|
|||||||
this.lastPublishedDate = lastPublishedDate;
|
this.lastPublishedDate = lastPublishedDate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getLastContentHash() {
|
||||||
|
return lastContentHash;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLastContentHash(String lastContentHash) {
|
||||||
|
this.lastContentHash = lastContentHash;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -163,7 +163,8 @@ public class FeedREST extends AbstractResourceREST {
|
|||||||
url = StringUtils.trimToEmpty(url);
|
url = StringUtils.trimToEmpty(url);
|
||||||
url = prependHttp(url);
|
url = prependHttp(url);
|
||||||
try {
|
try {
|
||||||
FetchedFeed feed = feedFetcher.fetch(url, true, null, null, null);
|
FetchedFeed feed = feedFetcher.fetch(url, true, null, null, null,
|
||||||
|
null);
|
||||||
info = new FeedInfo();
|
info = new FeedInfo();
|
||||||
info.setUrl(feed.getFeed().getUrl());
|
info.setUrl(feed.getFeed().getUrl());
|
||||||
info.setTitle(feed.getTitle());
|
info.setTitle(feed.getTitle());
|
||||||
|
|||||||
Reference in New Issue
Block a user