diff --git a/pom.xml b/pom.xml
index 7276eca5..e2c8314d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -180,6 +180,11 @@
guice
3.0
+
+ com.google.inject.extensions
+ guice-multibindings
+ 3.0
+
io.dropwizard
diff --git a/src/main/java/com/commafeed/CommaFeedModule.java b/src/main/java/com/commafeed/CommaFeedModule.java
index 1524865b..a18b0be3 100644
--- a/src/main/java/com/commafeed/CommaFeedModule.java
+++ b/src/main/java/com/commafeed/CommaFeedModule.java
@@ -11,8 +11,12 @@ import com.commafeed.CommaFeedConfiguration.CacheType;
import com.commafeed.backend.cache.CacheService;
import com.commafeed.backend.cache.NoopCacheService;
import com.commafeed.backend.cache.RedisCacheService;
+import com.commafeed.backend.favicon.DefaultFaviconFetcher;
+import com.commafeed.backend.favicon.AbstractFaviconFetcher;
+import com.commafeed.backend.favicon.YoutubeFaviconFetcher;
import com.google.inject.AbstractModule;
import com.google.inject.Provides;
+import com.google.inject.multibindings.Multibinder;
@RequiredArgsConstructor
@Slf4j
@@ -33,5 +37,9 @@ public class CommaFeedModule extends AbstractModule {
: new RedisCacheService(config.getRedisPoolFactory().build());
log.info("using cache {}", cacheService.getClass());
bind(CacheService.class).toInstance(cacheService);
+
+ Multibinder multibinder = Multibinder.newSetBinder(binder(), AbstractFaviconFetcher.class);
+ multibinder.addBinding().to(YoutubeFaviconFetcher.class);
+ multibinder.addBinding().to(DefaultFaviconFetcher.class);
}
}
diff --git a/src/main/java/com/commafeed/backend/favicon/AbstractFaviconFetcher.java b/src/main/java/com/commafeed/backend/favicon/AbstractFaviconFetcher.java
new file mode 100644
index 00000000..3f989379
--- /dev/null
+++ b/src/main/java/com/commafeed/backend/favicon/AbstractFaviconFetcher.java
@@ -0,0 +1,49 @@
+package com.commafeed.backend.favicon;
+
+import java.util.Arrays;
+import java.util.List;
+
+import lombok.extern.slf4j.Slf4j;
+
+import org.apache.commons.lang.StringUtils;
+
+@Slf4j
+public abstract class AbstractFaviconFetcher {
+
+ private static List ICON_MIMETYPE_BLACKLIST = Arrays.asList("application/xml", "text/html");
+ private static long MIN_ICON_LENGTH = 100;
+ private static long MAX_ICON_LENGTH = 100000;
+
+ protected static int TIMEOUT = 4000;
+
+ public abstract byte[] fetch(String url);
+
+ protected boolean isValidIconResponse(byte[] content, String contentType) {
+ if (content == null) {
+ return false;
+ }
+
+ long length = content.length;
+
+ if (StringUtils.isNotBlank(contentType)) {
+ contentType = contentType.split(";")[0];
+ }
+
+ if (ICON_MIMETYPE_BLACKLIST.contains(contentType)) {
+ log.debug("Content-Type {} is blacklisted", contentType);
+ return false;
+ }
+
+ if (length < MIN_ICON_LENGTH) {
+ log.debug("Length {} below MIN_ICON_LENGTH {}", length, MIN_ICON_LENGTH);
+ return false;
+ }
+
+ if (length > MAX_ICON_LENGTH) {
+ log.debug("Length {} greater than MAX_ICON_LENGTH {}", length, MAX_ICON_LENGTH);
+ return false;
+ }
+
+ return true;
+ }
+}
diff --git a/src/main/java/com/commafeed/backend/favicon/DefaultFaviconFetcher.java b/src/main/java/com/commafeed/backend/favicon/DefaultFaviconFetcher.java
new file mode 100644
index 00000000..778d2f42
--- /dev/null
+++ b/src/main/java/com/commafeed/backend/favicon/DefaultFaviconFetcher.java
@@ -0,0 +1,120 @@
+package com.commafeed.backend.favicon;
+
+import javax.inject.Inject;
+import javax.inject.Singleton;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+
+import org.apache.commons.lang.StringUtils;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.select.Elements;
+
+import com.commafeed.backend.HttpGetter;
+import com.commafeed.backend.HttpGetter.HttpResult;
+import com.commafeed.backend.feed.FeedUtils;
+
+/**
+ * Inspired/Ported from https://github.com/potatolondon/getfavicon
+ *
+ */
+@Slf4j
+@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
+@Singleton
+public class DefaultFaviconFetcher extends AbstractFaviconFetcher {
+
+ private final HttpGetter getter;
+
+ @Override
+ public byte[] fetch(String url) {
+ if (url == null) {
+ log.debug("url is null");
+ return null;
+ }
+
+ int doubleSlash = url.indexOf("//");
+ if (doubleSlash == -1) {
+ doubleSlash = 0;
+ } else {
+ doubleSlash += 2;
+ }
+ int firstSlash = url.indexOf('/', doubleSlash);
+ if (firstSlash != -1) {
+ url = url.substring(0, firstSlash);
+ }
+
+ byte[] icon = getIconAtRoot(url);
+
+ if (icon == null) {
+ icon = getIconInPage(url);
+ }
+
+ return icon;
+ }
+
+ private byte[] getIconAtRoot(String url) {
+ byte[] bytes = null;
+ String contentType = null;
+
+ try {
+ url = FeedUtils.removeTrailingSlash(url) + "/favicon.ico";
+ log.debug("getting root icon at {}", url);
+ HttpResult result = getter.getBinary(url, TIMEOUT);
+ bytes = result.getContent();
+ contentType = result.getContentType();
+ } catch (Exception e) {
+ log.debug("Failed to retrieve iconAtRoot for url {}: ", url, e);
+ }
+
+ if (!isValidIconResponse(bytes, contentType)) {
+ bytes = null;
+ }
+ return bytes;
+ }
+
+ private byte[] getIconInPage(String url) {
+
+ Document doc = null;
+ try {
+ HttpResult result = getter.getBinary(url, TIMEOUT);
+ doc = Jsoup.parse(new String(result.getContent()), url);
+ } catch (Exception e) {
+ log.debug("Failed to retrieve page to find icon", e);
+ return null;
+ }
+
+ Elements icons = doc.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]");
+
+ if (icons.isEmpty()) {
+ log.debug("No icon found in page {}", url);
+ return null;
+ }
+
+ String href = icons.get(0).attr("abs:href");
+ if (StringUtils.isBlank(href)) {
+ log.debug("No icon found in page");
+ return null;
+ }
+
+ log.debug("Found unconfirmed iconInPage at {}", href);
+
+ byte[] bytes = null;
+ String contentType = null;
+ try {
+ HttpResult result = getter.getBinary(href, TIMEOUT);
+ bytes = result.getContent();
+ contentType = result.getContentType();
+ } catch (Exception e) {
+ log.debug("Failed to retrieve icon found in page {}", href, e);
+ return null;
+ }
+
+ if (!isValidIconResponse(bytes, contentType)) {
+ log.debug("Invalid icon found for {}", href);
+ return null;
+ }
+
+ return bytes;
+ }
+}
diff --git a/src/main/java/com/commafeed/backend/favicon/YoutubeFaviconFetcher.java b/src/main/java/com/commafeed/backend/favicon/YoutubeFaviconFetcher.java
new file mode 100644
index 00000000..f126857d
--- /dev/null
+++ b/src/main/java/com/commafeed/backend/favicon/YoutubeFaviconFetcher.java
@@ -0,0 +1,87 @@
+package com.commafeed.backend.favicon;
+
+import javax.inject.Inject;
+import javax.inject.Singleton;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.select.Elements;
+
+import com.commafeed.backend.HttpGetter;
+import com.commafeed.backend.HttpGetter.HttpResult;
+
+@Slf4j
+@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
+@Singleton
+public class YoutubeFaviconFetcher extends AbstractFaviconFetcher {
+
+ private final HttpGetter getter;
+
+ @Override
+ public byte[] fetch(String url) {
+ if (!url.toLowerCase().contains("://gdata.youtube.com/")) {
+ return null;
+ }
+
+ String userName = extractUserName(url);
+ if (userName == null) {
+ return null;
+ }
+
+ String profileUrl = "https://gdata.youtube.com/feeds/users/" + userName;
+
+ byte[] bytes = null;
+ String contentType = null;
+
+ try {
+ log.debug("Getting YouTube user's icon, {}", url);
+
+ // initial get to translate username to obscure user thumbnail URL
+ HttpResult profileResult = getter.getBinary(profileUrl, TIMEOUT);
+ Document doc = Jsoup.parse(new String(profileResult.getContent()), profileUrl);
+
+ Elements thumbnails = doc.select("media|thumbnail");
+ if (thumbnails.isEmpty()) {
+ return null;
+ }
+
+ String thumbnailUrl = thumbnails.get(0).attr("abs:url");
+
+ int thumbnailStart = thumbnailUrl.indexOf("", thumbnailStart);
+ if (thumbnailStart != -1) {
+ thumbnailUrl = thumbnailUrl.substring(thumbnailStart + " ICON_MIMETYPE_BLACKLIST = Arrays.asList("application/xml", "text/html");
- private static long MIN_ICON_LENGTH = 100;
- private static long MAX_ICON_LENGTH = 100000;
- private static int TIMEOUT = 4000;
-
- private final HttpGetter getter;
-
- public byte[] fetch(String url) {
-
- if (url == null) {
- log.debug("url is null");
- return null;
- }
-
- // Get YouTube Icon here
- if (url.toLowerCase().contains("://gdata.youtube.com/")) {
- byte[] icon = getYouTubeIcon(url);
- return icon;
- }
-
- int doubleSlash = url.indexOf("//");
- if (doubleSlash == -1) {
- doubleSlash = 0;
- } else {
- doubleSlash += 2;
- }
- int firstSlash = url.indexOf('/', doubleSlash);
- if (firstSlash != -1) {
- url = url.substring(0, firstSlash);
- }
-
- byte[] icon = getIconAtRoot(url);
-
- if (icon == null) {
- icon = getIconInPage(url);
- }
-
- return icon;
- }
-
- private byte[] getIconAtRoot(String url) {
- byte[] bytes = null;
- String contentType = null;
-
- try {
- url = FeedUtils.removeTrailingSlash(url) + "/favicon.ico";
- log.debug("getting root icon at {}", url);
- HttpResult result = getter.getBinary(url, TIMEOUT);
- bytes = result.getContent();
- contentType = result.getContentType();
- } catch (Exception e) {
- log.debug("Failed to retrieve iconAtRoot: " + e.getMessage(), e);
- }
-
- if (!isValidIconResponse(bytes, contentType)) {
- bytes = null;
- }
- return bytes;
- }
-
- private boolean isValidIconResponse(byte[] content, String contentType) {
- if (content == null) {
- return false;
- }
-
- long length = content.length;
-
- if (StringUtils.isNotBlank(contentType)) {
- contentType = contentType.split(";")[0];
- }
-
- if (ICON_MIMETYPE_BLACKLIST.contains(contentType)) {
- log.debug("Content-Type {} is blacklisted", contentType);
- return false;
- }
-
- if (length < MIN_ICON_LENGTH) {
- log.debug("Length {} below MIN_ICON_LENGTH {}", length, MIN_ICON_LENGTH);
- return false;
- }
-
- if (length > MAX_ICON_LENGTH) {
- log.debug("Length {} greater than MAX_ICON_LENGTH {}", length, MAX_ICON_LENGTH);
- return false;
- }
-
- return true;
- }
-
- private byte[] getIconInPage(String url) {
-
- Document doc = null;
- try {
- HttpResult result = getter.getBinary(url, TIMEOUT);
- doc = Jsoup.parse(new String(result.getContent()), url);
- } catch (Exception e) {
- log.debug("Failed to retrieve page to find icon");
- return null;
- }
-
- Elements icons = doc.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]");
-
- if (icons.isEmpty()) {
- log.debug("No icon found in page {}", url);
- return null;
- }
-
- String href = icons.get(0).attr("abs:href");
- if (StringUtils.isBlank(href)) {
- log.debug("No icon found in page");
- return null;
- }
-
- log.debug("Found unconfirmed iconInPage at {}", href);
-
- byte[] bytes = null;
- String contentType = null;
- try {
- HttpResult result = getter.getBinary(href, TIMEOUT);
- bytes = result.getContent();
- contentType = result.getContentType();
- } catch (Exception e) {
- log.debug("Failed to retrieve icon found in page {}", href);
- return null;
- }
-
- if (!isValidIconResponse(bytes, contentType)) {
- log.debug("Invalid icon found for {}", href);
- return null;
- }
-
- return bytes;
- }
-
- /*
- * Instead of grabbing the actual favicon, grab the user's icon
- * This prevents a whole bunch of repeated YouTube icons, replacing
- * each with identifiable user icons.
- */
- private byte[] getYouTubeIcon(String url) {
- byte[] bytes = null;
- String contentType = null;
- String username = null;
- String imageUrl = null;
- String thumbnailUrl = null;
- try {
- int apiOrBase = url.indexOf("/users/");
- int userEndSlash = url.indexOf('/', apiOrBase + "/users/".length());
- if (userEndSlash != -1) {
- username = url.substring(apiOrBase + "/users/".length(), userEndSlash);
- }
- imageUrl = "https://gdata.youtube.com/feeds/users/" + username;
- log.debug("Getting YouTube user's icon, {}", url);
-
- //initial get to translate username to obscure user thumbnail URL
- HttpResult result = getter.getBinary(imageUrl, TIMEOUT);
- bytes = result.getContent();
- contentType = result.getContentType();
- thumbnailUrl = FeedUtils.parseForImageUrl(bytes);
-
- int thumbnailStart = thumbnailUrl.indexOf("", thumbnailStart);
- if (thumbnailStart != -1) {
- thumbnailUrl = thumbnailUrl.substring(thumbnailStart+"