diff --git a/pom.xml b/pom.xml index 7276eca5..e2c8314d 100644 --- a/pom.xml +++ b/pom.xml @@ -180,6 +180,11 @@ guice 3.0 + + com.google.inject.extensions + guice-multibindings + 3.0 + io.dropwizard diff --git a/src/main/java/com/commafeed/CommaFeedModule.java b/src/main/java/com/commafeed/CommaFeedModule.java index 1524865b..a18b0be3 100644 --- a/src/main/java/com/commafeed/CommaFeedModule.java +++ b/src/main/java/com/commafeed/CommaFeedModule.java @@ -11,8 +11,12 @@ import com.commafeed.CommaFeedConfiguration.CacheType; import com.commafeed.backend.cache.CacheService; import com.commafeed.backend.cache.NoopCacheService; import com.commafeed.backend.cache.RedisCacheService; +import com.commafeed.backend.favicon.DefaultFaviconFetcher; +import com.commafeed.backend.favicon.AbstractFaviconFetcher; +import com.commafeed.backend.favicon.YoutubeFaviconFetcher; import com.google.inject.AbstractModule; import com.google.inject.Provides; +import com.google.inject.multibindings.Multibinder; @RequiredArgsConstructor @Slf4j @@ -33,5 +37,9 @@ public class CommaFeedModule extends AbstractModule { : new RedisCacheService(config.getRedisPoolFactory().build()); log.info("using cache {}", cacheService.getClass()); bind(CacheService.class).toInstance(cacheService); + + Multibinder multibinder = Multibinder.newSetBinder(binder(), AbstractFaviconFetcher.class); + multibinder.addBinding().to(YoutubeFaviconFetcher.class); + multibinder.addBinding().to(DefaultFaviconFetcher.class); } } diff --git a/src/main/java/com/commafeed/backend/favicon/AbstractFaviconFetcher.java b/src/main/java/com/commafeed/backend/favicon/AbstractFaviconFetcher.java new file mode 100644 index 00000000..3f989379 --- /dev/null +++ b/src/main/java/com/commafeed/backend/favicon/AbstractFaviconFetcher.java @@ -0,0 +1,49 @@ +package com.commafeed.backend.favicon; + +import java.util.Arrays; +import java.util.List; + +import lombok.extern.slf4j.Slf4j; + +import org.apache.commons.lang.StringUtils; + +@Slf4j +public abstract class AbstractFaviconFetcher { + + private static List ICON_MIMETYPE_BLACKLIST = Arrays.asList("application/xml", "text/html"); + private static long MIN_ICON_LENGTH = 100; + private static long MAX_ICON_LENGTH = 100000; + + protected static int TIMEOUT = 4000; + + public abstract byte[] fetch(String url); + + protected boolean isValidIconResponse(byte[] content, String contentType) { + if (content == null) { + return false; + } + + long length = content.length; + + if (StringUtils.isNotBlank(contentType)) { + contentType = contentType.split(";")[0]; + } + + if (ICON_MIMETYPE_BLACKLIST.contains(contentType)) { + log.debug("Content-Type {} is blacklisted", contentType); + return false; + } + + if (length < MIN_ICON_LENGTH) { + log.debug("Length {} below MIN_ICON_LENGTH {}", length, MIN_ICON_LENGTH); + return false; + } + + if (length > MAX_ICON_LENGTH) { + log.debug("Length {} greater than MAX_ICON_LENGTH {}", length, MAX_ICON_LENGTH); + return false; + } + + return true; + } +} diff --git a/src/main/java/com/commafeed/backend/favicon/DefaultFaviconFetcher.java b/src/main/java/com/commafeed/backend/favicon/DefaultFaviconFetcher.java new file mode 100644 index 00000000..778d2f42 --- /dev/null +++ b/src/main/java/com/commafeed/backend/favicon/DefaultFaviconFetcher.java @@ -0,0 +1,120 @@ +package com.commafeed.backend.favicon; + +import javax.inject.Inject; +import javax.inject.Singleton; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import org.apache.commons.lang.StringUtils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.select.Elements; + +import com.commafeed.backend.HttpGetter; +import com.commafeed.backend.HttpGetter.HttpResult; +import com.commafeed.backend.feed.FeedUtils; + +/** + * Inspired/Ported from https://github.com/potatolondon/getfavicon + * + */ +@Slf4j +@RequiredArgsConstructor(onConstructor = @__({ @Inject })) +@Singleton +public class DefaultFaviconFetcher extends AbstractFaviconFetcher { + + private final HttpGetter getter; + + @Override + public byte[] fetch(String url) { + if (url == null) { + log.debug("url is null"); + return null; + } + + int doubleSlash = url.indexOf("//"); + if (doubleSlash == -1) { + doubleSlash = 0; + } else { + doubleSlash += 2; + } + int firstSlash = url.indexOf('/', doubleSlash); + if (firstSlash != -1) { + url = url.substring(0, firstSlash); + } + + byte[] icon = getIconAtRoot(url); + + if (icon == null) { + icon = getIconInPage(url); + } + + return icon; + } + + private byte[] getIconAtRoot(String url) { + byte[] bytes = null; + String contentType = null; + + try { + url = FeedUtils.removeTrailingSlash(url) + "/favicon.ico"; + log.debug("getting root icon at {}", url); + HttpResult result = getter.getBinary(url, TIMEOUT); + bytes = result.getContent(); + contentType = result.getContentType(); + } catch (Exception e) { + log.debug("Failed to retrieve iconAtRoot for url {}: ", url, e); + } + + if (!isValidIconResponse(bytes, contentType)) { + bytes = null; + } + return bytes; + } + + private byte[] getIconInPage(String url) { + + Document doc = null; + try { + HttpResult result = getter.getBinary(url, TIMEOUT); + doc = Jsoup.parse(new String(result.getContent()), url); + } catch (Exception e) { + log.debug("Failed to retrieve page to find icon", e); + return null; + } + + Elements icons = doc.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]"); + + if (icons.isEmpty()) { + log.debug("No icon found in page {}", url); + return null; + } + + String href = icons.get(0).attr("abs:href"); + if (StringUtils.isBlank(href)) { + log.debug("No icon found in page"); + return null; + } + + log.debug("Found unconfirmed iconInPage at {}", href); + + byte[] bytes = null; + String contentType = null; + try { + HttpResult result = getter.getBinary(href, TIMEOUT); + bytes = result.getContent(); + contentType = result.getContentType(); + } catch (Exception e) { + log.debug("Failed to retrieve icon found in page {}", href, e); + return null; + } + + if (!isValidIconResponse(bytes, contentType)) { + log.debug("Invalid icon found for {}", href); + return null; + } + + return bytes; + } +} diff --git a/src/main/java/com/commafeed/backend/favicon/YoutubeFaviconFetcher.java b/src/main/java/com/commafeed/backend/favicon/YoutubeFaviconFetcher.java new file mode 100644 index 00000000..f126857d --- /dev/null +++ b/src/main/java/com/commafeed/backend/favicon/YoutubeFaviconFetcher.java @@ -0,0 +1,87 @@ +package com.commafeed.backend.favicon; + +import javax.inject.Inject; +import javax.inject.Singleton; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.select.Elements; + +import com.commafeed.backend.HttpGetter; +import com.commafeed.backend.HttpGetter.HttpResult; + +@Slf4j +@RequiredArgsConstructor(onConstructor = @__({ @Inject })) +@Singleton +public class YoutubeFaviconFetcher extends AbstractFaviconFetcher { + + private final HttpGetter getter; + + @Override + public byte[] fetch(String url) { + if (!url.toLowerCase().contains("://gdata.youtube.com/")) { + return null; + } + + String userName = extractUserName(url); + if (userName == null) { + return null; + } + + String profileUrl = "https://gdata.youtube.com/feeds/users/" + userName; + + byte[] bytes = null; + String contentType = null; + + try { + log.debug("Getting YouTube user's icon, {}", url); + + // initial get to translate username to obscure user thumbnail URL + HttpResult profileResult = getter.getBinary(profileUrl, TIMEOUT); + Document doc = Jsoup.parse(new String(profileResult.getContent()), profileUrl); + + Elements thumbnails = doc.select("media|thumbnail"); + if (thumbnails.isEmpty()) { + return null; + } + + String thumbnailUrl = thumbnails.get(0).attr("abs:url"); + + int thumbnailStart = thumbnailUrl.indexOf("", thumbnailStart); + if (thumbnailStart != -1) { + thumbnailUrl = thumbnailUrl.substring(thumbnailStart + " ICON_MIMETYPE_BLACKLIST = Arrays.asList("application/xml", "text/html"); - private static long MIN_ICON_LENGTH = 100; - private static long MAX_ICON_LENGTH = 100000; - private static int TIMEOUT = 4000; - - private final HttpGetter getter; - - public byte[] fetch(String url) { - - if (url == null) { - log.debug("url is null"); - return null; - } - - // Get YouTube Icon here - if (url.toLowerCase().contains("://gdata.youtube.com/")) { - byte[] icon = getYouTubeIcon(url); - return icon; - } - - int doubleSlash = url.indexOf("//"); - if (doubleSlash == -1) { - doubleSlash = 0; - } else { - doubleSlash += 2; - } - int firstSlash = url.indexOf('/', doubleSlash); - if (firstSlash != -1) { - url = url.substring(0, firstSlash); - } - - byte[] icon = getIconAtRoot(url); - - if (icon == null) { - icon = getIconInPage(url); - } - - return icon; - } - - private byte[] getIconAtRoot(String url) { - byte[] bytes = null; - String contentType = null; - - try { - url = FeedUtils.removeTrailingSlash(url) + "/favicon.ico"; - log.debug("getting root icon at {}", url); - HttpResult result = getter.getBinary(url, TIMEOUT); - bytes = result.getContent(); - contentType = result.getContentType(); - } catch (Exception e) { - log.debug("Failed to retrieve iconAtRoot: " + e.getMessage(), e); - } - - if (!isValidIconResponse(bytes, contentType)) { - bytes = null; - } - return bytes; - } - - private boolean isValidIconResponse(byte[] content, String contentType) { - if (content == null) { - return false; - } - - long length = content.length; - - if (StringUtils.isNotBlank(contentType)) { - contentType = contentType.split(";")[0]; - } - - if (ICON_MIMETYPE_BLACKLIST.contains(contentType)) { - log.debug("Content-Type {} is blacklisted", contentType); - return false; - } - - if (length < MIN_ICON_LENGTH) { - log.debug("Length {} below MIN_ICON_LENGTH {}", length, MIN_ICON_LENGTH); - return false; - } - - if (length > MAX_ICON_LENGTH) { - log.debug("Length {} greater than MAX_ICON_LENGTH {}", length, MAX_ICON_LENGTH); - return false; - } - - return true; - } - - private byte[] getIconInPage(String url) { - - Document doc = null; - try { - HttpResult result = getter.getBinary(url, TIMEOUT); - doc = Jsoup.parse(new String(result.getContent()), url); - } catch (Exception e) { - log.debug("Failed to retrieve page to find icon"); - return null; - } - - Elements icons = doc.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]"); - - if (icons.isEmpty()) { - log.debug("No icon found in page {}", url); - return null; - } - - String href = icons.get(0).attr("abs:href"); - if (StringUtils.isBlank(href)) { - log.debug("No icon found in page"); - return null; - } - - log.debug("Found unconfirmed iconInPage at {}", href); - - byte[] bytes = null; - String contentType = null; - try { - HttpResult result = getter.getBinary(href, TIMEOUT); - bytes = result.getContent(); - contentType = result.getContentType(); - } catch (Exception e) { - log.debug("Failed to retrieve icon found in page {}", href); - return null; - } - - if (!isValidIconResponse(bytes, contentType)) { - log.debug("Invalid icon found for {}", href); - return null; - } - - return bytes; - } - - /* - * Instead of grabbing the actual favicon, grab the user's icon - * This prevents a whole bunch of repeated YouTube icons, replacing - * each with identifiable user icons. - */ - private byte[] getYouTubeIcon(String url) { - byte[] bytes = null; - String contentType = null; - String username = null; - String imageUrl = null; - String thumbnailUrl = null; - try { - int apiOrBase = url.indexOf("/users/"); - int userEndSlash = url.indexOf('/', apiOrBase + "/users/".length()); - if (userEndSlash != -1) { - username = url.substring(apiOrBase + "/users/".length(), userEndSlash); - } - imageUrl = "https://gdata.youtube.com/feeds/users/" + username; - log.debug("Getting YouTube user's icon, {}", url); - - //initial get to translate username to obscure user thumbnail URL - HttpResult result = getter.getBinary(imageUrl, TIMEOUT); - bytes = result.getContent(); - contentType = result.getContentType(); - thumbnailUrl = FeedUtils.parseForImageUrl(bytes); - - int thumbnailStart = thumbnailUrl.indexOf("", thumbnailStart); - if (thumbnailStart != -1) { - thumbnailUrl = thumbnailUrl.substring(thumbnailStart+"