From d855455b543c6ae804fa6d6577308a210d8c4b2a Mon Sep 17 00:00:00 2001 From: Athou Date: Sun, 9 Jun 2013 16:22:38 +0200 Subject: [PATCH] store feed content hash --- .../commafeed/backend/feeds/FeedFetcher.java | 24 +++++++++++++------ .../backend/feeds/FeedRefreshWorker.java | 7 +++--- .../com/commafeed/backend/model/Feed.java | 11 +++++++++ .../frontend/rest/resources/FeedREST.java | 3 ++- 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java b/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java index 77050058..9030d5f1 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java @@ -6,6 +6,7 @@ import java.util.Date; import javax.inject.Inject; import org.apache.commons.codec.binary.StringUtils; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.http.client.ClientProtocolException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -30,9 +31,9 @@ public class FeedFetcher { HttpGetter getter; public FetchedFeed fetch(String feedUrl, boolean extractFeedUrlFromHtml, - String lastModified, String eTag, Date lastPublishedDate) - throws FeedException, ClientProtocolException, IOException, - NotModifiedException { + String lastModified, String eTag, Date lastPublishedDate, + String lastContentHash) throws FeedException, + ClientProtocolException, IOException, NotModifiedException { log.debug("Fetching feed {}", feedUrl); FetchedFeed fetchedFeed = null; @@ -45,24 +46,33 @@ public class FeedFetcher { feedUrl = extractedUrl; } } - if (result.getContent() == null) { + byte[] content = result.getContent(); + + if (content == null) { throw new IOException("Feed content is empty."); } - fetchedFeed = parser.parse(feedUrl, result.getContent()); + String hash = DigestUtils.sha1Hex(content); + if (lastContentHash != null && hash != null + && lastContentHash.equals(hash)) { + log.debug("content hash not modified: {}", feedUrl); + throw new NotModifiedException(); + } + + fetchedFeed = parser.parse(feedUrl, content); if (lastPublishedDate != null && fetchedFeed.getFeed().getLastPublishedDate() != null && lastPublishedDate.getTime() == fetchedFeed.getFeed() .getLastPublishedDate().getTime()) { - log.debug("publishedDate not modified: {}", fetchedFeed.getFeed() - .getUrl()); + log.debug("publishedDate not modified: {}", feedUrl); throw new NotModifiedException(); } Feed feed = fetchedFeed.getFeed(); feed.setLastModifiedHeader(result.getLastModifiedSince()); feed.setEtagHeader(FeedUtils.truncate(result.geteTag(), 255)); + feed.setLastContentHash(hash); fetchedFeed.setFetchDuration(result.getDuration()); return fetchedFeed; } diff --git a/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java b/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java index 14c2f73f..d40f2f0a 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java @@ -83,7 +83,7 @@ public class FeedRefreshWorker { try { FetchedFeed fetchedFeed = fetcher.fetch(feed.getUrl(), false, feed.getLastModifiedHeader(), feed.getEtagHeader(), - feed.getLastPublishedDate()); + feed.getLastPublishedDate(), feed.getLastContentHash()); // stops here if NotModifiedException or any other exception is // thrown List entries = fetchedFeed.getEntries(); @@ -99,6 +99,7 @@ public class FeedRefreshWorker { feed.setLastModifiedHeader(fetchedFeed.getFeed() .getLastModifiedHeader()); feed.setEtagHeader(fetchedFeed.getFeed().getEtagHeader()); + feed.setLastContentHash(fetchedFeed.getFeed().getLastContentHash()); feed.setLastPublishedDate(fetchedFeed.getFeed() .getLastPublishedDate()); @@ -144,9 +145,9 @@ public class FeedRefreshWorker { String message = "Unable to refresh feed " + feed.getUrl() + " : " + e.getMessage(); if (e instanceof FeedException) { - log.debug(e.getClass().getName() + " " + message); + log.debug(e.getClass().getName() + " " + message, e); } else { - log.debug(e.getClass().getName() + " " + message); + log.debug(e.getClass().getName() + " " + message, e); } feed.setErrorCount(feed.getErrorCount() + 1); diff --git a/src/main/java/com/commafeed/backend/model/Feed.java b/src/main/java/com/commafeed/backend/model/Feed.java index 633c8717..aa54e332 100644 --- a/src/main/java/com/commafeed/backend/model/Feed.java +++ b/src/main/java/com/commafeed/backend/model/Feed.java @@ -72,6 +72,9 @@ public class Feed extends AbstractModel { @Column(length = 255) private String etagHeader; + @Column(length = 40) + private String lastContentHash; + @ManyToMany(mappedBy = "feeds") private Set entries = Sets.newHashSet(); @@ -224,4 +227,12 @@ public class Feed extends AbstractModel { this.lastPublishedDate = lastPublishedDate; } + public String getLastContentHash() { + return lastContentHash; + } + + public void setLastContentHash(String lastContentHash) { + this.lastContentHash = lastContentHash; + } + } diff --git a/src/main/java/com/commafeed/frontend/rest/resources/FeedREST.java b/src/main/java/com/commafeed/frontend/rest/resources/FeedREST.java index 81d83d01..6066300e 100644 --- a/src/main/java/com/commafeed/frontend/rest/resources/FeedREST.java +++ b/src/main/java/com/commafeed/frontend/rest/resources/FeedREST.java @@ -163,7 +163,8 @@ public class FeedREST extends AbstractResourceREST { url = StringUtils.trimToEmpty(url); url = prependHttp(url); try { - FetchedFeed feed = feedFetcher.fetch(url, true, null, null, null); + FetchedFeed feed = feedFetcher.fetch(url, true, null, null, null, + null); info = new FeedInfo(); info.setUrl(feed.getFeed().getUrl()); info.setTitle(feed.getTitle());