speed feeds refresh up by using http headers

This commit is contained in:
Athou
2013-04-17 12:49:03 +02:00
parent 328aa8c019
commit ef3508f393
7 changed files with 150 additions and 25 deletions

View File

@@ -13,6 +13,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.commafeed.backend.HttpGetter;
import com.commafeed.backend.HttpGetter.HttpResult;
import com.commafeed.backend.HttpGetter.NotModifiedException;
import com.commafeed.backend.model.Feed;
import com.sun.syndication.io.FeedException;
@@ -26,29 +28,31 @@ public class FeedFetcher {
@Inject
HttpGetter getter;
public Feed fetch(String feedUrl, boolean extractFeedUrlFromHtml)
throws FeedException, ClientProtocolException, IOException {
public Feed fetch(String feedUrl, boolean extractFeedUrlFromHtml,
String lastModified, String eTag) throws FeedException,
ClientProtocolException, IOException, NotModifiedException {
log.debug("Fetching feed {}", feedUrl);
Feed feed = null;
byte[] content = getter.getBinary(feedUrl);
HttpResult result = getter.getBinary(feedUrl, lastModified, eTag);
if (extractFeedUrlFromHtml) {
String extractedUrl = extractFeedUrl(StringUtils
.newStringUtf8(content));
if (extractedUrl != null) {
content = getter.getBinary(extractedUrl);
.newStringUtf8(result.getContent()), feedUrl);
if (org.apache.commons.lang.StringUtils.isNotBlank(extractedUrl)) {
result = getter.getBinary(extractedUrl, lastModified, eTag);
feedUrl = extractedUrl;
}
}
feed = parser.parse(feedUrl, content);
feed = parser.parse(feedUrl, result.getContent());
feed.setLastModifiedHeader(result.getLastModifiedSince());
feed.setEtagHeader(result.geteTag());
return feed;
}
private String extractFeedUrl(String html) {
private String extractFeedUrl(String html, String baseUri) {
String foundUrl = null;
Document doc = Jsoup.parse(html);
Document doc = Jsoup.parse(html, baseUri);
String root = doc.children().get(0).tagName();
if ("html".equals(root)) {
Elements rss = doc.select("link[type=application/rss+xml]");

View File

@@ -21,6 +21,7 @@ import org.apache.commons.lang.time.DateUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.commafeed.backend.HttpGetter.NotModifiedException;
import com.commafeed.backend.dao.FeedDAO;
import com.commafeed.backend.model.Feed;
import com.commafeed.backend.services.FeedUpdateService;
@@ -82,7 +83,10 @@ public class FeedRefreshWorker {
Feed fetchedFeed = null;
try {
fetchedFeed = fetcher.fetch(feed.getUrl(), false);
fetchedFeed = fetcher.fetch(feed.getUrl(), false,
feed.getLastModifiedHeader(), feed.getEtagHeader());
} catch (NotModifiedException e) {
log.debug("Feed not modified (304) : " + feed.getUrl());
} catch (Exception e) {
message = "Unable to refresh feed " + feed.getUrl() + " : "
+ e.getMessage();
@@ -105,10 +109,10 @@ public class FeedRefreshWorker {
feed.setDisabledUntil(disabledUntil);
if (fetchedFeed != null) {
feed.setLink(fetchedFeed.getLink());
feed.setLastModifiedHeader(fetchedFeed.getLastModifiedHeader());
feed.setEtagHeader(fetchedFeed.getEtagHeader());
feedUpdateService.updateEntries(feed, fetchedFeed.getEntries());
if (feed.getLink() == null) {
feed.setLink(fetchedFeed.getLink());
}
}
feedDAO.update(feed);