refactoring favicon fetcher

This commit is contained in:
Athou
2013-06-11 07:18:04 +02:00
parent 3ee4f9807e
commit 17a1783789
5 changed files with 181 additions and 167 deletions

View File

@@ -141,7 +141,9 @@ public class HttpGetter {
}
long duration = System.currentTimeMillis() - start;
result = new HttpResult(content, lastModifiedHeader == null ? null
Header contentType = entity.getContentType();
result = new HttpResult(content, contentType == null ? null
: contentType.getValue(), lastModifiedHeader == null ? null
: lastModifiedHeader.getValue(), eTagHeader == null ? null
: eTagHeader.getValue(), duration);
} finally {
@@ -153,13 +155,15 @@ public class HttpGetter {
public static class HttpResult {
private byte[] content;
private String contentType;
private String lastModifiedSince;
private String eTag;
private long duration;
public HttpResult(byte[] content, String lastModifiedSince,
String eTag, long duration) {
public HttpResult(byte[] content, String contentType,
String lastModifiedSince, String eTag, long duration) {
this.content = content;
this.contentType = contentType;
this.lastModifiedSince = lastModifiedSince;
this.eTag = eTag;
this.duration = duration;
@@ -169,6 +173,10 @@ public class HttpGetter {
return content;
}
public String getContentType() {
return contentType;
}
public String getLastModifiedSince() {
return lastModifiedSince;
}

View File

@@ -0,0 +1,144 @@
package com.commafeed.backend.feeds;
import java.util.Arrays;
import java.util.List;
import javax.inject.Inject;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.commafeed.backend.HttpGetter;
import com.commafeed.backend.HttpGetter.HttpResult;
/**
* Inspired/Ported from https://github.com/potatolondon/getfavicon
*
*/
public class FaviconFetcher {
private static Logger log = LoggerFactory.getLogger(FeedFetcher.class);
private static long MIN_ICON_LENGTH = 100;
private static long MAX_ICON_LENGTH = 20000;
protected static List<String> ICON_MIMETYPES = Arrays.asList(
"image/x-icon", "image/vnd.microsoft.icon", "image/ico",
"image/icon", "text/ico", "application/ico", "image/x-ms-bmp",
"image/x-bmp", "image/gif", "image/png", "image/jpeg");
private static List<String> ICON_MIMETYPE_BLACKLIST = Arrays.asList(
"application/xml", "text/html");
@Inject
HttpGetter getter;
public byte[] fetch(String targetPath) {
byte[] icon = getIconAtRoot(targetPath);
if (icon == null) {
icon = getIconInPage(targetPath);
}
return icon;
}
private byte[] getIconAtRoot(String targetPath) {
byte[] bytes = null;
String contentType = null;
try {
String url = FeedUtils.removeTrailingSlash(targetPath)
+ "/favicon.ico";
log.debug("getting root icon at {}", url);
HttpResult result = getter.getBinary(url);
bytes = result.getContent();
contentType = result.getContentType();
} catch (Exception e) {
log.info("Failed to retrieve iconAtRoot: " + e.getMessage(), e);
}
if (!isValidIconResponse(bytes, contentType)) {
bytes = null;
}
return bytes;
}
boolean isValidIconResponse(byte[] content, String contentType) {
long length = content.length;
if (!contentType.isEmpty()) {
contentType = contentType.split(";")[0];
}
if (ICON_MIMETYPE_BLACKLIST.contains(contentType)) {
log.info("Content-Type {} is blacklisted", contentType);
return false;
}
if (length < MIN_ICON_LENGTH) {
log.info("Length {} below MIN_ICON_LENGTH {}", length,
MIN_ICON_LENGTH);
return false;
}
if (length > MAX_ICON_LENGTH) {
log.info("Length {} greater than MAX_ICON_LENGTH {}", length,
MAX_ICON_LENGTH);
return false;
}
return true;
}
private byte[] getIconInPage(String targetPath) {
log.info("iconInPage, trying " + targetPath);
Document doc;
try {
HttpResult result = getter.getBinary(targetPath);
doc = Jsoup.parse(new String(result.getContent()), targetPath);
} catch (Exception e) {
log.info("Failed to retrieve page to find icon");
return null;
}
Elements icons = doc
.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]");
if (icons.isEmpty()) {
log.info("No icon found in page");
return null;
}
String href = icons.get(0).attr("abs:href");
if (StringUtils.isBlank(href)) {
log.info("No icon found in page");
return null;
}
log.info("Found unconfirmed iconInPage at {}", href);
byte[] bytes = null;
String contentType = null;
try {
HttpResult result = getter.getBinary(href);
bytes = result.getContent();
contentType = result.getContentType();
} catch (Exception e) {
log.info("Failed to retrieve icon found in page {}", href);
return null;
}
if (!isValidIconResponse(bytes, contentType)) {
log.info("Invalid icon found for {}", href);
return null;
}
return bytes;
}
}

View File

@@ -34,6 +34,7 @@ import com.commafeed.backend.dao.FeedSubscriptionDAO;
import com.commafeed.backend.dao.UserDAO;
import com.commafeed.backend.dao.UserRoleDAO;
import com.commafeed.backend.dao.UserSettingsDAO;
import com.commafeed.backend.feeds.FaviconFetcher;
import com.commafeed.backend.feeds.FeedFetcher;
import com.commafeed.backend.feeds.FeedRefreshTaskGiver;
import com.commafeed.backend.feeds.FeedRefreshUpdater;
@@ -120,6 +121,9 @@ public abstract class AbstractREST {
@Inject
FeedRefreshUpdater feedRefreshUpdater;
@Inject
FaviconFetcher faviconFetcher;
@PostConstruct
public void init() {
CommaFeedApplication app = CommaFeedApplication.get();

View File

@@ -1,9 +1,7 @@
package com.commafeed.frontend.rest.resources;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URLDecoder;
import java.util.Calendar;
import java.util.Collections;
import java.util.Comparator;
@@ -20,6 +18,7 @@ import javax.ws.rs.Produces;
import javax.ws.rs.QueryParam;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.CacheControl;
import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.ResponseBuilder;
@@ -32,10 +31,12 @@ import org.apache.commons.fileupload.servlet.ServletFileUpload;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.ObjectUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.http.impl.cookie.DateUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.commafeed.backend.StartupBean;
import com.commafeed.backend.feeds.FeedUtils;
import com.commafeed.backend.feeds.FetchedFeed;
import com.commafeed.backend.model.FeedCategory;
import com.commafeed.backend.model.FeedEntryStatus;
@@ -52,7 +53,6 @@ import com.commafeed.frontend.model.request.IDRequest;
import com.commafeed.frontend.model.request.MarkRequest;
import com.commafeed.frontend.model.request.SubscribeRequest;
import com.commafeed.frontend.rest.Enums.ReadType;
import com.commafeed.frontend.utils.FetchFavicon;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.sun.syndication.feed.opml.Opml;
@@ -252,26 +252,34 @@ public class FeedREST extends AbstractResourceREST {
@GET
@Path("/favicon")
@ApiOperation(value = "Fetch feed icon", notes = "Fetch icon of a feed")
public Response favicon(@QueryParam("url") String path) {
try {
path = URLDecoder.decode(path, "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
@ApiOperation(value = "Fetch a feed's icon", notes = "Fetch icon of a feed")
public Response getFavicon(@QueryParam("url") String url) {
byte[] icon = faviconFetcher.fetch(url);
ResponseBuilder builder = null;
if (icon == null) {
String baseUrl = FeedUtils
.removeTrailingSlash(applicationSettingsService.get()
.getPublicUrl());
builder = Response.status(Status.MOVED_PERMANENTLY).location(
URI.create(baseUrl + "/images/default_favicon.gif"));
} else {
builder = Response.ok(icon, "image/x-icon");
}
byte[] icon = new FetchFavicon().get(path);
ResponseBuilder reponse = Response.ok(icon, "image/x-icon");
CacheControl cacheControl = new CacheControl();
cacheControl.setMaxAge(2592000);
cacheControl.setPrivate(false);
reponse.cacheControl(cacheControl); // trying to replicate "public, max-age=2592000"
// trying to replicate "public, max-age=2592000"
builder.cacheControl(cacheControl);
Calendar calendar = Calendar.getInstance();
calendar.add(Calendar.MONTH, 1);
reponse.expires(calendar.getTime());
builder.expires(calendar.getTime());
builder.lastModified(new Date(startupBean.getStartupTime()));
return reponse.build();
return builder.build();
}
@POST

View File

@@ -1,150 +0,0 @@
package com.commafeed.frontend.utils;
import java.net.MalformedURLException;
import java.net.URL;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
//Inspired/Ported from https://github.com/potatolondon/getfavicon
public class FetchFavicon {
void inf(String message) {
//
}
static long MIN_ICON_LENGTH = 100;
static long MAX_ICON_LENGTH = 20000;
static String[] ICON_MIMETYPES = new String[] { "image/x-icon",
"image/vnd.microsoft.icon", "image/ico", "image/icon", "text/ico",
"application/ico", "image/x-ms-bmp", "image/x-bmp", "image/gif",
"image/png", "image/jpeg" };
static String[] ICON_MIMETYPE_BLACKLIST = new String[] { "application/xml",
"text/html" };
boolean in(String[] array, String value) {
for (String i : array) {
if (i.equals(value)) {
return true;
}
}
return false;
}
boolean isValidIconResponse(Response iconResponse) {
long iconLength = iconResponse.bodyAsBytes().length;
String iconContentType = iconResponse.header("Content-Type");
if (!iconContentType.isEmpty())
iconContentType = iconContentType.split(";")[0];
if (iconResponse.statusCode() != 200) {
inf("Status code isn't 200");
return false;
}
if (in(ICON_MIMETYPE_BLACKLIST, iconContentType)) {
inf("Content-Type in ICON_MIMETYPE_BLACKLIST");
return false;
}
if (iconLength < MIN_ICON_LENGTH) {
inf("Length below MIN_ICON_LENGTH");
return false;
}
if (iconLength > MAX_ICON_LENGTH) {
inf("Length greater than MAX_ICON_LENGTH");
return false;
}
return true;
}
byte[] iconAtRoot(String targetPath) {
Response rootIconPath;
try {
URL url = new URL(new URL(targetPath), "/favicon.ico");
inf(url.toString());
rootIconPath = Jsoup
.connect(url.toString())
.followRedirects(true)
.ignoreContentType(true).execute();
} catch (Exception e) {
inf("Failed to retrieve iconAtRoot");
return null;
}
if (isValidIconResponse(rootIconPath)) {
return rootIconPath.bodyAsBytes();
}
return null;
}
byte[] iconInPage(String targetPath) {
inf("iconInPage, trying " + targetPath);
Document pageSoup;
try {
pageSoup = Jsoup.connect(targetPath).followRedirects(true).get();
} catch (Exception e) {
inf("Failed to retrieve page to find icon");
return null;
}
Elements pageSoupIcon = pageSoup
.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]");
if (pageSoupIcon.size() == 0) {
return null;
}
String pageIconHref = pageSoupIcon.get(0).attr("href");
String pageIconPath;
if (pageIconHref.isEmpty()) {
inf("No icon found in page");
return null;
}
try {
pageIconPath = new URL(new URL(targetPath), pageIconHref).toString();
} catch (MalformedURLException e1) {
inf("URL concatination faild");
return null;
}
inf("Found unconfirmed iconInPage at");
Response pagePathFaviconResult;
try {
pagePathFaviconResult = Jsoup.connect(pageIconPath)
.followRedirects(true).ignoreContentType(true)
.execute();
} catch (Exception e) {
inf("Failed to retrieve icon found in page");
return null;
}
if (isValidIconResponse(pagePathFaviconResult)) {
return pagePathFaviconResult.bodyAsBytes();
}
inf("Invalid icon found");
return null;
}
public byte[] get(String targetPath) {
byte[] icon;
icon = iconAtRoot(targetPath);
if (icon != null) {
return icon;
}
icon = iconInPage(targetPath);
if (icon != null) {
return icon;
}
return null; // or returning default feed
}
}