mirror of
https://github.com/Athou/commafeed.git
synced 2026-03-21 21:37:29 +00:00
speed feeds refresh up by using http headers
This commit is contained in:
@@ -2,9 +2,14 @@ package com.commafeed.backend;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.apache.http.Header;
|
||||||
import org.apache.http.HttpEntity;
|
import org.apache.http.HttpEntity;
|
||||||
|
import org.apache.http.HttpHeaders;
|
||||||
import org.apache.http.HttpResponse;
|
import org.apache.http.HttpResponse;
|
||||||
|
import org.apache.http.HttpStatus;
|
||||||
import org.apache.http.client.ClientProtocolException;
|
import org.apache.http.client.ClientProtocolException;
|
||||||
|
import org.apache.http.client.HttpResponseException;
|
||||||
import org.apache.http.client.methods.HttpGet;
|
import org.apache.http.client.methods.HttpGet;
|
||||||
import org.apache.http.client.params.CookiePolicy;
|
import org.apache.http.client.params.CookiePolicy;
|
||||||
import org.apache.http.client.params.HttpClientParams;
|
import org.apache.http.client.params.HttpClientParams;
|
||||||
@@ -16,13 +21,28 @@ import org.apache.http.util.EntityUtils;
|
|||||||
|
|
||||||
public class HttpGetter {
|
public class HttpGetter {
|
||||||
|
|
||||||
public String get(String url) throws Exception {
|
public HttpResult getBinary(String url) throws ClientProtocolException,
|
||||||
return new String(getBinary(url), "UTF-8");
|
IOException, NotModifiedException {
|
||||||
|
return getBinary(url, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] getBinary(String url) throws ClientProtocolException,
|
/**
|
||||||
IOException {
|
*
|
||||||
byte[] content = null;
|
* @param url
|
||||||
|
* the url to retrive
|
||||||
|
* @param lastModified
|
||||||
|
* header we got last time we queried that url, or null
|
||||||
|
* @param eTag
|
||||||
|
* header we got last time we queried that url, or null
|
||||||
|
* @return
|
||||||
|
* @throws ClientProtocolException
|
||||||
|
* @throws IOException
|
||||||
|
* @throws NotModifiedException
|
||||||
|
* if the url hasn't changed since we asked for it last time
|
||||||
|
*/
|
||||||
|
public HttpResult getBinary(String url, String lastModified, String eTag)
|
||||||
|
throws ClientProtocolException, IOException, NotModifiedException {
|
||||||
|
HttpResult result = null;
|
||||||
|
|
||||||
DefaultHttpClient httpclient = new DefaultHttpClient();
|
DefaultHttpClient httpclient = new DefaultHttpClient();
|
||||||
HttpParams params = httpclient.getParams();
|
HttpParams params = httpclient.getParams();
|
||||||
@@ -35,14 +55,85 @@ public class HttpGetter {
|
|||||||
HttpGet httpget = new HttpGet(url);
|
HttpGet httpget = new HttpGet(url);
|
||||||
httpget.addHeader("Pragma", "No-cache");
|
httpget.addHeader("Pragma", "No-cache");
|
||||||
httpget.addHeader("Cache-Control", "no-cache");
|
httpget.addHeader("Cache-Control", "no-cache");
|
||||||
HttpResponse response = httpclient.execute(httpget);
|
|
||||||
|
if (lastModified != null) {
|
||||||
|
httpget.addHeader(HttpHeaders.IF_MODIFIED_SINCE, lastModified);
|
||||||
|
}
|
||||||
|
if (eTag != null) {
|
||||||
|
httpget.addHeader(HttpHeaders.IF_NONE_MATCH, eTag);
|
||||||
|
}
|
||||||
|
|
||||||
|
HttpResponse response = null;
|
||||||
|
try {
|
||||||
|
response = httpclient.execute(httpget);
|
||||||
|
if (response.getStatusLine().getStatusCode() == HttpStatus.SC_NOT_MODIFIED) {
|
||||||
|
throw new NotModifiedException();
|
||||||
|
}
|
||||||
|
} catch (HttpResponseException e) {
|
||||||
|
if (e.getStatusCode() == HttpStatus.SC_NOT_MODIFIED) {
|
||||||
|
throw new NotModifiedException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Header lastModifiedHeader = response
|
||||||
|
.getFirstHeader(HttpHeaders.LAST_MODIFIED);
|
||||||
|
Header eTagHeader = response.getFirstHeader(HttpHeaders.ETAG);
|
||||||
HttpEntity entity = response.getEntity();
|
HttpEntity entity = response.getEntity();
|
||||||
|
|
||||||
|
String lastModifiedResponse = lastModifiedHeader == null ? null
|
||||||
|
: lastModifiedHeader.getValue();
|
||||||
|
String eTagResponse = eTagHeader == null ? null : eTagHeader
|
||||||
|
.getValue();
|
||||||
|
|
||||||
|
if (lastModified != null
|
||||||
|
&& StringUtils.equals(lastModified, lastModifiedResponse)) {
|
||||||
|
throw new NotModifiedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (eTag != null && StringUtils.equals(eTag, eTagResponse)) {
|
||||||
|
throw new NotModifiedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] content = null;
|
||||||
if (entity != null) {
|
if (entity != null) {
|
||||||
content = EntityUtils.toByteArray(entity);
|
content = EntityUtils.toByteArray(entity);
|
||||||
}
|
}
|
||||||
|
result = new HttpResult(content, lastModifiedHeader == null ? null
|
||||||
|
: lastModifiedHeader.getValue(), eTagHeader == null ? null
|
||||||
|
: eTagHeader.getValue());
|
||||||
} finally {
|
} finally {
|
||||||
httpclient.getConnectionManager().shutdown();
|
httpclient.getConnectionManager().shutdown();
|
||||||
}
|
}
|
||||||
return content;
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class HttpResult {
|
||||||
|
|
||||||
|
private byte[] content;
|
||||||
|
private String lastModifiedSince;
|
||||||
|
private String eTag;
|
||||||
|
|
||||||
|
public HttpResult(byte[] content, String lastModifiedSince, String eTag) {
|
||||||
|
this.content = content;
|
||||||
|
this.lastModifiedSince = lastModifiedSince;
|
||||||
|
this.eTag = eTag;
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte[] getContent() {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLastModifiedSince() {
|
||||||
|
return lastModifiedSince;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String geteTag() {
|
||||||
|
return eTag;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class NotModifiedException extends Exception {
|
||||||
|
private static final long serialVersionUID = 1L;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.commafeed.backend.HttpGetter;
|
import com.commafeed.backend.HttpGetter;
|
||||||
|
import com.commafeed.backend.HttpGetter.HttpResult;
|
||||||
|
import com.commafeed.backend.HttpGetter.NotModifiedException;
|
||||||
import com.commafeed.backend.model.Feed;
|
import com.commafeed.backend.model.Feed;
|
||||||
import com.sun.syndication.io.FeedException;
|
import com.sun.syndication.io.FeedException;
|
||||||
|
|
||||||
@@ -26,29 +28,31 @@ public class FeedFetcher {
|
|||||||
@Inject
|
@Inject
|
||||||
HttpGetter getter;
|
HttpGetter getter;
|
||||||
|
|
||||||
public Feed fetch(String feedUrl, boolean extractFeedUrlFromHtml)
|
public Feed fetch(String feedUrl, boolean extractFeedUrlFromHtml,
|
||||||
throws FeedException, ClientProtocolException, IOException {
|
String lastModified, String eTag) throws FeedException,
|
||||||
|
ClientProtocolException, IOException, NotModifiedException {
|
||||||
log.debug("Fetching feed {}", feedUrl);
|
log.debug("Fetching feed {}", feedUrl);
|
||||||
Feed feed = null;
|
Feed feed = null;
|
||||||
|
|
||||||
byte[] content = getter.getBinary(feedUrl);
|
HttpResult result = getter.getBinary(feedUrl, lastModified, eTag);
|
||||||
if (extractFeedUrlFromHtml) {
|
if (extractFeedUrlFromHtml) {
|
||||||
String extractedUrl = extractFeedUrl(StringUtils
|
String extractedUrl = extractFeedUrl(StringUtils
|
||||||
.newStringUtf8(content));
|
.newStringUtf8(result.getContent()), feedUrl);
|
||||||
if (extractedUrl != null) {
|
if (org.apache.commons.lang.StringUtils.isNotBlank(extractedUrl)) {
|
||||||
content = getter.getBinary(extractedUrl);
|
result = getter.getBinary(extractedUrl, lastModified, eTag);
|
||||||
feedUrl = extractedUrl;
|
feedUrl = extractedUrl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
feed = parser.parse(feedUrl, content);
|
feed = parser.parse(feedUrl, result.getContent());
|
||||||
|
feed.setLastModifiedHeader(result.getLastModifiedSince());
|
||||||
|
feed.setEtagHeader(result.geteTag());
|
||||||
return feed;
|
return feed;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String extractFeedUrl(String html) {
|
private String extractFeedUrl(String html, String baseUri) {
|
||||||
String foundUrl = null;
|
String foundUrl = null;
|
||||||
|
|
||||||
Document doc = Jsoup.parse(html);
|
Document doc = Jsoup.parse(html, baseUri);
|
||||||
String root = doc.children().get(0).tagName();
|
String root = doc.children().get(0).tagName();
|
||||||
if ("html".equals(root)) {
|
if ("html".equals(root)) {
|
||||||
Elements rss = doc.select("link[type=application/rss+xml]");
|
Elements rss = doc.select("link[type=application/rss+xml]");
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import org.apache.commons.lang.time.DateUtils;
|
|||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.commafeed.backend.HttpGetter.NotModifiedException;
|
||||||
import com.commafeed.backend.dao.FeedDAO;
|
import com.commafeed.backend.dao.FeedDAO;
|
||||||
import com.commafeed.backend.model.Feed;
|
import com.commafeed.backend.model.Feed;
|
||||||
import com.commafeed.backend.services.FeedUpdateService;
|
import com.commafeed.backend.services.FeedUpdateService;
|
||||||
@@ -82,7 +83,10 @@ public class FeedRefreshWorker {
|
|||||||
|
|
||||||
Feed fetchedFeed = null;
|
Feed fetchedFeed = null;
|
||||||
try {
|
try {
|
||||||
fetchedFeed = fetcher.fetch(feed.getUrl(), false);
|
fetchedFeed = fetcher.fetch(feed.getUrl(), false,
|
||||||
|
feed.getLastModifiedHeader(), feed.getEtagHeader());
|
||||||
|
} catch (NotModifiedException e) {
|
||||||
|
log.debug("Feed not modified (304) : " + feed.getUrl());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
message = "Unable to refresh feed " + feed.getUrl() + " : "
|
message = "Unable to refresh feed " + feed.getUrl() + " : "
|
||||||
+ e.getMessage();
|
+ e.getMessage();
|
||||||
@@ -105,10 +109,10 @@ public class FeedRefreshWorker {
|
|||||||
feed.setDisabledUntil(disabledUntil);
|
feed.setDisabledUntil(disabledUntil);
|
||||||
|
|
||||||
if (fetchedFeed != null) {
|
if (fetchedFeed != null) {
|
||||||
|
feed.setLink(fetchedFeed.getLink());
|
||||||
|
feed.setLastModifiedHeader(fetchedFeed.getLastModifiedHeader());
|
||||||
|
feed.setEtagHeader(fetchedFeed.getEtagHeader());
|
||||||
feedUpdateService.updateEntries(feed, fetchedFeed.getEntries());
|
feedUpdateService.updateEntries(feed, fetchedFeed.getEntries());
|
||||||
if (feed.getLink() == null) {
|
|
||||||
feed.setLink(fetchedFeed.getLink());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
feedDAO.update(feed);
|
feedDAO.update(feed);
|
||||||
|
|
||||||
|
|||||||
@@ -53,6 +53,12 @@ public class Feed extends AbstractModel {
|
|||||||
@Index(name = "disableduntil_index")
|
@Index(name = "disableduntil_index")
|
||||||
private Date disabledUntil;
|
private Date disabledUntil;
|
||||||
|
|
||||||
|
@Column(length = 64)
|
||||||
|
private String lastModifiedHeader;
|
||||||
|
|
||||||
|
@Column(length = 128)
|
||||||
|
private String etagHeader;
|
||||||
|
|
||||||
@ManyToMany(mappedBy = "feeds")
|
@ManyToMany(mappedBy = "feeds")
|
||||||
private Set<FeedEntry> entries = Sets.newHashSet();
|
private Set<FeedEntry> entries = Sets.newHashSet();
|
||||||
|
|
||||||
@@ -147,4 +153,20 @@ public class Feed extends AbstractModel {
|
|||||||
this.urlHash = urlHash;
|
this.urlHash = urlHash;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getLastModifiedHeader() {
|
||||||
|
return lastModifiedHeader;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLastModifiedHeader(String lastModifiedHeader) {
|
||||||
|
this.lastModifiedHeader = lastModifiedHeader;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEtagHeader() {
|
||||||
|
return etagHeader;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEtagHeader(String etagHeader) {
|
||||||
|
this.etagHeader = etagHeader;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.commafeed.backend.HttpGetter;
|
import com.commafeed.backend.HttpGetter;
|
||||||
|
import com.commafeed.backend.HttpGetter.HttpResult;
|
||||||
import com.commafeed.backend.StartupBean;
|
import com.commafeed.backend.StartupBean;
|
||||||
import com.commafeed.backend.model.UserRole.Role;
|
import com.commafeed.backend.model.UserRole.Role;
|
||||||
import com.commafeed.frontend.SecurityCheck;
|
import com.commafeed.frontend.SecurityCheck;
|
||||||
@@ -74,7 +75,10 @@ public class FaviconPage extends BasePage {
|
|||||||
|
|
||||||
String iconUrl = "http://g.etfv.co/"
|
String iconUrl = "http://g.etfv.co/"
|
||||||
+ URLEncoder.encode(url, "UTF-8") + "?defaulticon=none";
|
+ URLEncoder.encode(url, "UTF-8") + "?defaulticon=none";
|
||||||
img = getter.getBinary(iconUrl);
|
HttpResult result = getter.getBinary(iconUrl);
|
||||||
|
if (result != null) {
|
||||||
|
img = result.getContent();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error(e.getMessage(), e);
|
log.error(e.getMessage(), e);
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package com.commafeed.frontend.rest.resources;
|
|||||||
import java.lang.reflect.Method;
|
import java.lang.reflect.Method;
|
||||||
|
|
||||||
import javax.annotation.PostConstruct;
|
import javax.annotation.PostConstruct;
|
||||||
import javax.annotation.Resource;
|
|
||||||
import javax.inject.Inject;
|
import javax.inject.Inject;
|
||||||
import javax.interceptor.AroundInvoke;
|
import javax.interceptor.AroundInvoke;
|
||||||
import javax.interceptor.InvocationContext;
|
import javax.interceptor.InvocationContext;
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import org.apache.commons.fileupload.disk.DiskFileItemFactory;
|
|||||||
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang.ObjectUtils;
|
import org.apache.commons.lang.ObjectUtils;
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
import com.commafeed.backend.model.Feed;
|
import com.commafeed.backend.model.Feed;
|
||||||
import com.commafeed.backend.model.FeedCategory;
|
import com.commafeed.backend.model.FeedCategory;
|
||||||
@@ -42,11 +43,11 @@ public class SubscriptionsREST extends AbstractREST {
|
|||||||
@ApiOperation(value = "Fetch a feed", notes = "Fetch a feed by its url", responseClass = "com.commafeed.backend.model.Feed")
|
@ApiOperation(value = "Fetch a feed", notes = "Fetch a feed by its url", responseClass = "com.commafeed.backend.model.Feed")
|
||||||
public Feed fetchFeed(@QueryParam("url") String url) {
|
public Feed fetchFeed(@QueryParam("url") String url) {
|
||||||
Preconditions.checkNotNull(url);
|
Preconditions.checkNotNull(url);
|
||||||
|
url = StringUtils.trimToEmpty(url);
|
||||||
url = prependHttp(url);
|
url = prependHttp(url);
|
||||||
Feed feed = null;
|
Feed feed = null;
|
||||||
try {
|
try {
|
||||||
feed = feedFetcher.fetch(url, true);
|
feed = feedFetcher.fetch(url, true, null, null);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new WebApplicationException(e, Response
|
throw new WebApplicationException(e, Response
|
||||||
.status(Status.INTERNAL_SERVER_ERROR)
|
.status(Status.INTERNAL_SERVER_ERROR)
|
||||||
|
|||||||
Reference in New Issue
Block a user