From 17a1783789f9275f702b1529546d38050c1354db Mon Sep 17 00:00:00 2001 From: Athou Date: Tue, 11 Jun 2013 07:18:04 +0200 Subject: [PATCH] refactoring favicon fetcher --- .../com/commafeed/backend/HttpGetter.java | 14 +- .../backend/feeds/FaviconFetcher.java | 144 +++++++++++++++++ .../frontend/rest/resources/AbstractREST.java | 4 + .../frontend/rest/resources/FeedREST.java | 36 +++-- .../frontend/utils/FetchFavicon.java | 150 ------------------ 5 files changed, 181 insertions(+), 167 deletions(-) create mode 100644 src/main/java/com/commafeed/backend/feeds/FaviconFetcher.java delete mode 100644 src/main/java/com/commafeed/frontend/utils/FetchFavicon.java diff --git a/src/main/java/com/commafeed/backend/HttpGetter.java b/src/main/java/com/commafeed/backend/HttpGetter.java index 1f3dbce8..113c0d64 100644 --- a/src/main/java/com/commafeed/backend/HttpGetter.java +++ b/src/main/java/com/commafeed/backend/HttpGetter.java @@ -141,7 +141,9 @@ public class HttpGetter { } long duration = System.currentTimeMillis() - start; - result = new HttpResult(content, lastModifiedHeader == null ? null + Header contentType = entity.getContentType(); + result = new HttpResult(content, contentType == null ? null + : contentType.getValue(), lastModifiedHeader == null ? null : lastModifiedHeader.getValue(), eTagHeader == null ? null : eTagHeader.getValue(), duration); } finally { @@ -153,13 +155,15 @@ public class HttpGetter { public static class HttpResult { private byte[] content; + private String contentType; private String lastModifiedSince; private String eTag; private long duration; - public HttpResult(byte[] content, String lastModifiedSince, - String eTag, long duration) { + public HttpResult(byte[] content, String contentType, + String lastModifiedSince, String eTag, long duration) { this.content = content; + this.contentType = contentType; this.lastModifiedSince = lastModifiedSince; this.eTag = eTag; this.duration = duration; @@ -169,6 +173,10 @@ public class HttpGetter { return content; } + public String getContentType() { + return contentType; + } + public String getLastModifiedSince() { return lastModifiedSince; } diff --git a/src/main/java/com/commafeed/backend/feeds/FaviconFetcher.java b/src/main/java/com/commafeed/backend/feeds/FaviconFetcher.java new file mode 100644 index 00000000..fc1d0843 --- /dev/null +++ b/src/main/java/com/commafeed/backend/feeds/FaviconFetcher.java @@ -0,0 +1,144 @@ +package com.commafeed.backend.feeds; + +import java.util.Arrays; +import java.util.List; + +import javax.inject.Inject; + +import org.apache.commons.lang3.StringUtils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.commafeed.backend.HttpGetter; +import com.commafeed.backend.HttpGetter.HttpResult; + +/** + * Inspired/Ported from https://github.com/potatolondon/getfavicon + * + */ +public class FaviconFetcher { + + private static Logger log = LoggerFactory.getLogger(FeedFetcher.class); + + private static long MIN_ICON_LENGTH = 100; + private static long MAX_ICON_LENGTH = 20000; + + protected static List ICON_MIMETYPES = Arrays.asList( + "image/x-icon", "image/vnd.microsoft.icon", "image/ico", + "image/icon", "text/ico", "application/ico", "image/x-ms-bmp", + "image/x-bmp", "image/gif", "image/png", "image/jpeg"); + private static List ICON_MIMETYPE_BLACKLIST = Arrays.asList( + "application/xml", "text/html"); + + @Inject + HttpGetter getter; + + public byte[] fetch(String targetPath) { + byte[] icon = getIconAtRoot(targetPath); + + if (icon == null) { + icon = getIconInPage(targetPath); + } + + return icon; + } + + private byte[] getIconAtRoot(String targetPath) { + byte[] bytes = null; + String contentType = null; + + try { + String url = FeedUtils.removeTrailingSlash(targetPath) + + "/favicon.ico"; + log.debug("getting root icon at {}", url); + HttpResult result = getter.getBinary(url); + bytes = result.getContent(); + contentType = result.getContentType(); + } catch (Exception e) { + log.info("Failed to retrieve iconAtRoot: " + e.getMessage(), e); + } + + if (!isValidIconResponse(bytes, contentType)) { + bytes = null; + } + return bytes; + } + + boolean isValidIconResponse(byte[] content, String contentType) { + long length = content.length; + + if (!contentType.isEmpty()) { + contentType = contentType.split(";")[0]; + } + + if (ICON_MIMETYPE_BLACKLIST.contains(contentType)) { + log.info("Content-Type {} is blacklisted", contentType); + return false; + } + + if (length < MIN_ICON_LENGTH) { + log.info("Length {} below MIN_ICON_LENGTH {}", length, + MIN_ICON_LENGTH); + return false; + } + + if (length > MAX_ICON_LENGTH) { + log.info("Length {} greater than MAX_ICON_LENGTH {}", length, + MAX_ICON_LENGTH); + return false; + } + + return true; + } + + private byte[] getIconInPage(String targetPath) { + log.info("iconInPage, trying " + targetPath); + + Document doc; + try { + HttpResult result = getter.getBinary(targetPath); + doc = Jsoup.parse(new String(result.getContent()), targetPath); + } catch (Exception e) { + log.info("Failed to retrieve page to find icon"); + return null; + } + + Elements icons = doc + .select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]"); + + if (icons.isEmpty()) { + log.info("No icon found in page"); + return null; + } + + String href = icons.get(0).attr("abs:href"); + if (StringUtils.isBlank(href)) { + log.info("No icon found in page"); + return null; + } + + log.info("Found unconfirmed iconInPage at {}", href); + + byte[] bytes = null; + String contentType = null; + try { + HttpResult result = getter.getBinary(href); + bytes = result.getContent(); + contentType = result.getContentType(); + } catch (Exception e) { + log.info("Failed to retrieve icon found in page {}", href); + return null; + } + + if (!isValidIconResponse(bytes, contentType)) { + log.info("Invalid icon found for {}", href); + return null; + } + + return bytes; + } + +} diff --git a/src/main/java/com/commafeed/frontend/rest/resources/AbstractREST.java b/src/main/java/com/commafeed/frontend/rest/resources/AbstractREST.java index 7d975cda..18ca3eab 100644 --- a/src/main/java/com/commafeed/frontend/rest/resources/AbstractREST.java +++ b/src/main/java/com/commafeed/frontend/rest/resources/AbstractREST.java @@ -34,6 +34,7 @@ import com.commafeed.backend.dao.FeedSubscriptionDAO; import com.commafeed.backend.dao.UserDAO; import com.commafeed.backend.dao.UserRoleDAO; import com.commafeed.backend.dao.UserSettingsDAO; +import com.commafeed.backend.feeds.FaviconFetcher; import com.commafeed.backend.feeds.FeedFetcher; import com.commafeed.backend.feeds.FeedRefreshTaskGiver; import com.commafeed.backend.feeds.FeedRefreshUpdater; @@ -120,6 +121,9 @@ public abstract class AbstractREST { @Inject FeedRefreshUpdater feedRefreshUpdater; + @Inject + FaviconFetcher faviconFetcher; + @PostConstruct public void init() { CommaFeedApplication app = CommaFeedApplication.get(); diff --git a/src/main/java/com/commafeed/frontend/rest/resources/FeedREST.java b/src/main/java/com/commafeed/frontend/rest/resources/FeedREST.java index 9d542425..a6c1df0e 100644 --- a/src/main/java/com/commafeed/frontend/rest/resources/FeedREST.java +++ b/src/main/java/com/commafeed/frontend/rest/resources/FeedREST.java @@ -1,9 +1,7 @@ package com.commafeed.frontend.rest.resources; import java.io.StringWriter; -import java.io.UnsupportedEncodingException; import java.net.URI; -import java.net.URLDecoder; import java.util.Calendar; import java.util.Collections; import java.util.Comparator; @@ -20,6 +18,7 @@ import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; import javax.ws.rs.WebApplicationException; import javax.ws.rs.core.CacheControl; +import javax.ws.rs.core.HttpHeaders; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import javax.ws.rs.core.Response.ResponseBuilder; @@ -32,10 +31,12 @@ import org.apache.commons.fileupload.servlet.ServletFileUpload; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.ObjectUtils; import org.apache.commons.lang.StringUtils; +import org.apache.http.impl.cookie.DateUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.commafeed.backend.StartupBean; +import com.commafeed.backend.feeds.FeedUtils; import com.commafeed.backend.feeds.FetchedFeed; import com.commafeed.backend.model.FeedCategory; import com.commafeed.backend.model.FeedEntryStatus; @@ -52,7 +53,6 @@ import com.commafeed.frontend.model.request.IDRequest; import com.commafeed.frontend.model.request.MarkRequest; import com.commafeed.frontend.model.request.SubscribeRequest; import com.commafeed.frontend.rest.Enums.ReadType; -import com.commafeed.frontend.utils.FetchFavicon; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.sun.syndication.feed.opml.Opml; @@ -252,26 +252,34 @@ public class FeedREST extends AbstractResourceREST { @GET @Path("/favicon") - @ApiOperation(value = "Fetch feed icon", notes = "Fetch icon of a feed") - public Response favicon(@QueryParam("url") String path) { - try { - path = URLDecoder.decode(path, "UTF-8"); - } catch (UnsupportedEncodingException e) { - e.printStackTrace(); + @ApiOperation(value = "Fetch a feed's icon", notes = "Fetch icon of a feed") + public Response getFavicon(@QueryParam("url") String url) { + + byte[] icon = faviconFetcher.fetch(url); + + ResponseBuilder builder = null; + if (icon == null) { + String baseUrl = FeedUtils + .removeTrailingSlash(applicationSettingsService.get() + .getPublicUrl()); + builder = Response.status(Status.MOVED_PERMANENTLY).location( + URI.create(baseUrl + "/images/default_favicon.gif")); + } else { + builder = Response.ok(icon, "image/x-icon"); } - byte[] icon = new FetchFavicon().get(path); - ResponseBuilder reponse = Response.ok(icon, "image/x-icon"); CacheControl cacheControl = new CacheControl(); cacheControl.setMaxAge(2592000); cacheControl.setPrivate(false); - reponse.cacheControl(cacheControl); // trying to replicate "public, max-age=2592000" + // trying to replicate "public, max-age=2592000" + builder.cacheControl(cacheControl); Calendar calendar = Calendar.getInstance(); calendar.add(Calendar.MONTH, 1); - reponse.expires(calendar.getTime()); + builder.expires(calendar.getTime()); + builder.lastModified(new Date(startupBean.getStartupTime())); - return reponse.build(); + return builder.build(); } @POST diff --git a/src/main/java/com/commafeed/frontend/utils/FetchFavicon.java b/src/main/java/com/commafeed/frontend/utils/FetchFavicon.java deleted file mode 100644 index c092c192..00000000 --- a/src/main/java/com/commafeed/frontend/utils/FetchFavicon.java +++ /dev/null @@ -1,150 +0,0 @@ -package com.commafeed.frontend.utils; - -import java.net.MalformedURLException; -import java.net.URL; - -import org.jsoup.Connection.Response; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.select.Elements; - -//Inspired/Ported from https://github.com/potatolondon/getfavicon -public class FetchFavicon { - void inf(String message) { - // - } - - static long MIN_ICON_LENGTH = 100; - static long MAX_ICON_LENGTH = 20000; - static String[] ICON_MIMETYPES = new String[] { "image/x-icon", - "image/vnd.microsoft.icon", "image/ico", "image/icon", "text/ico", - "application/ico", "image/x-ms-bmp", "image/x-bmp", "image/gif", - "image/png", "image/jpeg" }; - - static String[] ICON_MIMETYPE_BLACKLIST = new String[] { "application/xml", - "text/html" }; - - boolean in(String[] array, String value) { - for (String i : array) { - if (i.equals(value)) { - return true; - } - } - return false; - } - - boolean isValidIconResponse(Response iconResponse) { - long iconLength = iconResponse.bodyAsBytes().length; - - String iconContentType = iconResponse.header("Content-Type"); - if (!iconContentType.isEmpty()) - iconContentType = iconContentType.split(";")[0]; - - if (iconResponse.statusCode() != 200) { - inf("Status code isn't 200"); - return false; - } - - if (in(ICON_MIMETYPE_BLACKLIST, iconContentType)) { - inf("Content-Type in ICON_MIMETYPE_BLACKLIST"); - return false; - } - - if (iconLength < MIN_ICON_LENGTH) { - inf("Length below MIN_ICON_LENGTH"); - return false; - } - - if (iconLength > MAX_ICON_LENGTH) { - inf("Length greater than MAX_ICON_LENGTH"); - return false; - } - return true; - } - - byte[] iconAtRoot(String targetPath) { - Response rootIconPath; - try { - URL url = new URL(new URL(targetPath), "/favicon.ico"); - inf(url.toString()); - rootIconPath = Jsoup - .connect(url.toString()) - .followRedirects(true) - .ignoreContentType(true).execute(); - } catch (Exception e) { - inf("Failed to retrieve iconAtRoot"); - return null; - } - - if (isValidIconResponse(rootIconPath)) { - return rootIconPath.bodyAsBytes(); - } - return null; - } - - byte[] iconInPage(String targetPath) { - inf("iconInPage, trying " + targetPath); - - Document pageSoup; - try { - pageSoup = Jsoup.connect(targetPath).followRedirects(true).get(); - } catch (Exception e) { - inf("Failed to retrieve page to find icon"); - return null; - } - - Elements pageSoupIcon = pageSoup - .select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]"); - - if (pageSoupIcon.size() == 0) { - return null; - } - String pageIconHref = pageSoupIcon.get(0).attr("href"); - String pageIconPath; - if (pageIconHref.isEmpty()) { - inf("No icon found in page"); - return null; - } - - try { - pageIconPath = new URL(new URL(targetPath), pageIconHref).toString(); - } catch (MalformedURLException e1) { - inf("URL concatination faild"); - return null; - } - - inf("Found unconfirmed iconInPage at"); - - Response pagePathFaviconResult; - try { - pagePathFaviconResult = Jsoup.connect(pageIconPath) - .followRedirects(true).ignoreContentType(true) - .execute(); - } catch (Exception e) { - inf("Failed to retrieve icon found in page"); - return null; - } - - if (isValidIconResponse(pagePathFaviconResult)) { - return pagePathFaviconResult.bodyAsBytes(); - } - inf("Invalid icon found"); - return null; - } - - public byte[] get(String targetPath) { - byte[] icon; - - icon = iconAtRoot(targetPath); - if (icon != null) { - return icon; - } - - icon = iconInPage(targetPath); - if (icon != null) { - return icon; - } - - return null; // or returning default feed - } -}