store feed content hash

This commit is contained in:
Athou
2013-06-09 16:22:38 +02:00
parent d212cf66c1
commit d855455b54
4 changed files with 34 additions and 11 deletions

View File

@@ -6,6 +6,7 @@ import java.util.Date;
import javax.inject.Inject;
import org.apache.commons.codec.binary.StringUtils;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.http.client.ClientProtocolException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
@@ -30,9 +31,9 @@ public class FeedFetcher {
HttpGetter getter;
public FetchedFeed fetch(String feedUrl, boolean extractFeedUrlFromHtml,
String lastModified, String eTag, Date lastPublishedDate)
throws FeedException, ClientProtocolException, IOException,
NotModifiedException {
String lastModified, String eTag, Date lastPublishedDate,
String lastContentHash) throws FeedException,
ClientProtocolException, IOException, NotModifiedException {
log.debug("Fetching feed {}", feedUrl);
FetchedFeed fetchedFeed = null;
@@ -45,24 +46,33 @@ public class FeedFetcher {
feedUrl = extractedUrl;
}
}
if (result.getContent() == null) {
byte[] content = result.getContent();
if (content == null) {
throw new IOException("Feed content is empty.");
}
fetchedFeed = parser.parse(feedUrl, result.getContent());
String hash = DigestUtils.sha1Hex(content);
if (lastContentHash != null && hash != null
&& lastContentHash.equals(hash)) {
log.debug("content hash not modified: {}", feedUrl);
throw new NotModifiedException();
}
fetchedFeed = parser.parse(feedUrl, content);
if (lastPublishedDate != null
&& fetchedFeed.getFeed().getLastPublishedDate() != null
&& lastPublishedDate.getTime() == fetchedFeed.getFeed()
.getLastPublishedDate().getTime()) {
log.debug("publishedDate not modified: {}", fetchedFeed.getFeed()
.getUrl());
log.debug("publishedDate not modified: {}", feedUrl);
throw new NotModifiedException();
}
Feed feed = fetchedFeed.getFeed();
feed.setLastModifiedHeader(result.getLastModifiedSince());
feed.setEtagHeader(FeedUtils.truncate(result.geteTag(), 255));
feed.setLastContentHash(hash);
fetchedFeed.setFetchDuration(result.getDuration());
return fetchedFeed;
}

View File

@@ -83,7 +83,7 @@ public class FeedRefreshWorker {
try {
FetchedFeed fetchedFeed = fetcher.fetch(feed.getUrl(), false,
feed.getLastModifiedHeader(), feed.getEtagHeader(),
feed.getLastPublishedDate());
feed.getLastPublishedDate(), feed.getLastContentHash());
// stops here if NotModifiedException or any other exception is
// thrown
List<FeedEntry> entries = fetchedFeed.getEntries();
@@ -99,6 +99,7 @@ public class FeedRefreshWorker {
feed.setLastModifiedHeader(fetchedFeed.getFeed()
.getLastModifiedHeader());
feed.setEtagHeader(fetchedFeed.getFeed().getEtagHeader());
feed.setLastContentHash(fetchedFeed.getFeed().getLastContentHash());
feed.setLastPublishedDate(fetchedFeed.getFeed()
.getLastPublishedDate());
@@ -144,9 +145,9 @@ public class FeedRefreshWorker {
String message = "Unable to refresh feed " + feed.getUrl() + " : "
+ e.getMessage();
if (e instanceof FeedException) {
log.debug(e.getClass().getName() + " " + message);
log.debug(e.getClass().getName() + " " + message, e);
} else {
log.debug(e.getClass().getName() + " " + message);
log.debug(e.getClass().getName() + " " + message, e);
}
feed.setErrorCount(feed.getErrorCount() + 1);

View File

@@ -72,6 +72,9 @@ public class Feed extends AbstractModel {
@Column(length = 255)
private String etagHeader;
@Column(length = 40)
private String lastContentHash;
@ManyToMany(mappedBy = "feeds")
private Set<FeedEntry> entries = Sets.newHashSet();
@@ -224,4 +227,12 @@ public class Feed extends AbstractModel {
this.lastPublishedDate = lastPublishedDate;
}
public String getLastContentHash() {
return lastContentHash;
}
public void setLastContentHash(String lastContentHash) {
this.lastContentHash = lastContentHash;
}
}

View File

@@ -163,7 +163,8 @@ public class FeedREST extends AbstractResourceREST {
url = StringUtils.trimToEmpty(url);
url = prependHttp(url);
try {
FetchedFeed feed = feedFetcher.fetch(url, true, null, null, null);
FetchedFeed feed = feedFetcher.fetch(url, true, null, null, null,
null);
info = new FeedInfo();
info.setUrl(feed.getFeed().getUrl());
info.setTitle(feed.getTitle());