diff --git a/src/main/java/com/commafeed/backend/HttpGetter.java b/src/main/java/com/commafeed/backend/HttpGetter.java index 9a9a6e10..d09bbf09 100644 --- a/src/main/java/com/commafeed/backend/HttpGetter.java +++ b/src/main/java/com/commafeed/backend/HttpGetter.java @@ -2,9 +2,14 @@ package com.commafeed.backend; import java.io.IOException; +import org.apache.commons.lang.StringUtils; +import org.apache.http.Header; import org.apache.http.HttpEntity; +import org.apache.http.HttpHeaders; import org.apache.http.HttpResponse; +import org.apache.http.HttpStatus; import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.HttpResponseException; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.params.CookiePolicy; import org.apache.http.client.params.HttpClientParams; @@ -16,13 +21,28 @@ import org.apache.http.util.EntityUtils; public class HttpGetter { - public String get(String url) throws Exception { - return new String(getBinary(url), "UTF-8"); + public HttpResult getBinary(String url) throws ClientProtocolException, + IOException, NotModifiedException { + return getBinary(url, null, null); } - public byte[] getBinary(String url) throws ClientProtocolException, - IOException { - byte[] content = null; + /** + * + * @param url + * the url to retrive + * @param lastModified + * header we got last time we queried that url, or null + * @param eTag + * header we got last time we queried that url, or null + * @return + * @throws ClientProtocolException + * @throws IOException + * @throws NotModifiedException + * if the url hasn't changed since we asked for it last time + */ + public HttpResult getBinary(String url, String lastModified, String eTag) + throws ClientProtocolException, IOException, NotModifiedException { + HttpResult result = null; DefaultHttpClient httpclient = new DefaultHttpClient(); HttpParams params = httpclient.getParams(); @@ -35,14 +55,85 @@ public class HttpGetter { HttpGet httpget = new HttpGet(url); httpget.addHeader("Pragma", "No-cache"); httpget.addHeader("Cache-Control", "no-cache"); - HttpResponse response = httpclient.execute(httpget); + + if (lastModified != null) { + httpget.addHeader(HttpHeaders.IF_MODIFIED_SINCE, lastModified); + } + if (eTag != null) { + httpget.addHeader(HttpHeaders.IF_NONE_MATCH, eTag); + } + + HttpResponse response = null; + try { + response = httpclient.execute(httpget); + if (response.getStatusLine().getStatusCode() == HttpStatus.SC_NOT_MODIFIED) { + throw new NotModifiedException(); + } + } catch (HttpResponseException e) { + if (e.getStatusCode() == HttpStatus.SC_NOT_MODIFIED) { + throw new NotModifiedException(); + } + } + Header lastModifiedHeader = response + .getFirstHeader(HttpHeaders.LAST_MODIFIED); + Header eTagHeader = response.getFirstHeader(HttpHeaders.ETAG); HttpEntity entity = response.getEntity(); + + String lastModifiedResponse = lastModifiedHeader == null ? null + : lastModifiedHeader.getValue(); + String eTagResponse = eTagHeader == null ? null : eTagHeader + .getValue(); + + if (lastModified != null + && StringUtils.equals(lastModified, lastModifiedResponse)) { + throw new NotModifiedException(); + } + + if (eTag != null && StringUtils.equals(eTag, eTagResponse)) { + throw new NotModifiedException(); + } + + byte[] content = null; if (entity != null) { content = EntityUtils.toByteArray(entity); } + result = new HttpResult(content, lastModifiedHeader == null ? null + : lastModifiedHeader.getValue(), eTagHeader == null ? null + : eTagHeader.getValue()); } finally { httpclient.getConnectionManager().shutdown(); } - return content; + return result; + } + + public static class HttpResult { + + private byte[] content; + private String lastModifiedSince; + private String eTag; + + public HttpResult(byte[] content, String lastModifiedSince, String eTag) { + this.content = content; + this.lastModifiedSince = lastModifiedSince; + this.eTag = eTag; + } + + public byte[] getContent() { + return content; + } + + public String getLastModifiedSince() { + return lastModifiedSince; + } + + public String geteTag() { + return eTag; + } + + } + + public static class NotModifiedException extends Exception { + private static final long serialVersionUID = 1L; + } } diff --git a/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java b/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java index f1f75268..ff053eb3 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java @@ -13,6 +13,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.commafeed.backend.HttpGetter; +import com.commafeed.backend.HttpGetter.HttpResult; +import com.commafeed.backend.HttpGetter.NotModifiedException; import com.commafeed.backend.model.Feed; import com.sun.syndication.io.FeedException; @@ -26,29 +28,31 @@ public class FeedFetcher { @Inject HttpGetter getter; - public Feed fetch(String feedUrl, boolean extractFeedUrlFromHtml) - throws FeedException, ClientProtocolException, IOException { + public Feed fetch(String feedUrl, boolean extractFeedUrlFromHtml, + String lastModified, String eTag) throws FeedException, + ClientProtocolException, IOException, NotModifiedException { log.debug("Fetching feed {}", feedUrl); Feed feed = null; - byte[] content = getter.getBinary(feedUrl); + HttpResult result = getter.getBinary(feedUrl, lastModified, eTag); if (extractFeedUrlFromHtml) { String extractedUrl = extractFeedUrl(StringUtils - .newStringUtf8(content)); - if (extractedUrl != null) { - content = getter.getBinary(extractedUrl); + .newStringUtf8(result.getContent()), feedUrl); + if (org.apache.commons.lang.StringUtils.isNotBlank(extractedUrl)) { + result = getter.getBinary(extractedUrl, lastModified, eTag); feedUrl = extractedUrl; } } - feed = parser.parse(feedUrl, content); - + feed = parser.parse(feedUrl, result.getContent()); + feed.setLastModifiedHeader(result.getLastModifiedSince()); + feed.setEtagHeader(result.geteTag()); return feed; } - private String extractFeedUrl(String html) { + private String extractFeedUrl(String html, String baseUri) { String foundUrl = null; - Document doc = Jsoup.parse(html); + Document doc = Jsoup.parse(html, baseUri); String root = doc.children().get(0).tagName(); if ("html".equals(root)) { Elements rss = doc.select("link[type=application/rss+xml]"); diff --git a/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java b/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java index ee583720..65a97e95 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java @@ -21,6 +21,7 @@ import org.apache.commons.lang.time.DateUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.commafeed.backend.HttpGetter.NotModifiedException; import com.commafeed.backend.dao.FeedDAO; import com.commafeed.backend.model.Feed; import com.commafeed.backend.services.FeedUpdateService; @@ -82,7 +83,10 @@ public class FeedRefreshWorker { Feed fetchedFeed = null; try { - fetchedFeed = fetcher.fetch(feed.getUrl(), false); + fetchedFeed = fetcher.fetch(feed.getUrl(), false, + feed.getLastModifiedHeader(), feed.getEtagHeader()); + } catch (NotModifiedException e) { + log.debug("Feed not modified (304) : " + feed.getUrl()); } catch (Exception e) { message = "Unable to refresh feed " + feed.getUrl() + " : " + e.getMessage(); @@ -105,10 +109,10 @@ public class FeedRefreshWorker { feed.setDisabledUntil(disabledUntil); if (fetchedFeed != null) { + feed.setLink(fetchedFeed.getLink()); + feed.setLastModifiedHeader(fetchedFeed.getLastModifiedHeader()); + feed.setEtagHeader(fetchedFeed.getEtagHeader()); feedUpdateService.updateEntries(feed, fetchedFeed.getEntries()); - if (feed.getLink() == null) { - feed.setLink(fetchedFeed.getLink()); - } } feedDAO.update(feed); diff --git a/src/main/java/com/commafeed/backend/model/Feed.java b/src/main/java/com/commafeed/backend/model/Feed.java index 51ce05bd..d111fc1f 100644 --- a/src/main/java/com/commafeed/backend/model/Feed.java +++ b/src/main/java/com/commafeed/backend/model/Feed.java @@ -53,6 +53,12 @@ public class Feed extends AbstractModel { @Index(name = "disableduntil_index") private Date disabledUntil; + @Column(length = 64) + private String lastModifiedHeader; + + @Column(length = 128) + private String etagHeader; + @ManyToMany(mappedBy = "feeds") private Set entries = Sets.newHashSet(); @@ -147,4 +153,20 @@ public class Feed extends AbstractModel { this.urlHash = urlHash; } + public String getLastModifiedHeader() { + return lastModifiedHeader; + } + + public void setLastModifiedHeader(String lastModifiedHeader) { + this.lastModifiedHeader = lastModifiedHeader; + } + + public String getEtagHeader() { + return etagHeader; + } + + public void setEtagHeader(String etagHeader) { + this.etagHeader = etagHeader; + } + } diff --git a/src/main/java/com/commafeed/frontend/pages/FaviconPage.java b/src/main/java/com/commafeed/frontend/pages/FaviconPage.java index 2d5abbbb..cfb3760e 100644 --- a/src/main/java/com/commafeed/frontend/pages/FaviconPage.java +++ b/src/main/java/com/commafeed/frontend/pages/FaviconPage.java @@ -20,6 +20,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.commafeed.backend.HttpGetter; +import com.commafeed.backend.HttpGetter.HttpResult; import com.commafeed.backend.StartupBean; import com.commafeed.backend.model.UserRole.Role; import com.commafeed.frontend.SecurityCheck; @@ -74,7 +75,10 @@ public class FaviconPage extends BasePage { String iconUrl = "http://g.etfv.co/" + URLEncoder.encode(url, "UTF-8") + "?defaulticon=none"; - img = getter.getBinary(iconUrl); + HttpResult result = getter.getBinary(iconUrl); + if (result != null) { + img = result.getContent(); + } } } catch (Exception e) { log.error(e.getMessage(), e); diff --git a/src/main/java/com/commafeed/frontend/rest/resources/AbstractREST.java b/src/main/java/com/commafeed/frontend/rest/resources/AbstractREST.java index cfc65ed5..17b496ff 100644 --- a/src/main/java/com/commafeed/frontend/rest/resources/AbstractREST.java +++ b/src/main/java/com/commafeed/frontend/rest/resources/AbstractREST.java @@ -3,7 +3,6 @@ package com.commafeed.frontend.rest.resources; import java.lang.reflect.Method; import javax.annotation.PostConstruct; -import javax.annotation.Resource; import javax.inject.Inject; import javax.interceptor.AroundInvoke; import javax.interceptor.InvocationContext; diff --git a/src/main/java/com/commafeed/frontend/rest/resources/SubscriptionsREST.java b/src/main/java/com/commafeed/frontend/rest/resources/SubscriptionsREST.java index bcbbe801..d9de9972 100644 --- a/src/main/java/com/commafeed/frontend/rest/resources/SubscriptionsREST.java +++ b/src/main/java/com/commafeed/frontend/rest/resources/SubscriptionsREST.java @@ -21,6 +21,7 @@ import org.apache.commons.fileupload.disk.DiskFileItemFactory; import org.apache.commons.fileupload.servlet.ServletFileUpload; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.ObjectUtils; +import org.apache.commons.lang.StringUtils; import com.commafeed.backend.model.Feed; import com.commafeed.backend.model.FeedCategory; @@ -42,11 +43,11 @@ public class SubscriptionsREST extends AbstractREST { @ApiOperation(value = "Fetch a feed", notes = "Fetch a feed by its url", responseClass = "com.commafeed.backend.model.Feed") public Feed fetchFeed(@QueryParam("url") String url) { Preconditions.checkNotNull(url); - + url = StringUtils.trimToEmpty(url); url = prependHttp(url); Feed feed = null; try { - feed = feedFetcher.fetch(url, true); + feed = feedFetcher.fetch(url, true, null, null); } catch (Exception e) { throw new WebApplicationException(e, Response .status(Status.INTERNAL_SERVER_ERROR)