diff --git a/README.md b/README.md index 13016281..6715a593 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ You also need Maven 3.x (and a Java 1.7+ JDK) installed in order to build the ap To install maven and openjdk on Ubuntu, issue the following commands - sudo apt-get install openjdk-7-jdk maven + sudo apt-get install build-essential openjdk-7-jdk maven On Windows and other operating systems, just download maven 3.x from the [official site](http://maven.apache.org/), extract it somewhere and add the `bin` directory to your `PATH` environment variable. diff --git a/pom.xml b/pom.xml index 7276eca5..e2c8314d 100644 --- a/pom.xml +++ b/pom.xml @@ -180,6 +180,11 @@ guice 3.0 + + com.google.inject.extensions + guice-multibindings + 3.0 + io.dropwizard diff --git a/src/main/java/com/commafeed/CommaFeedModule.java b/src/main/java/com/commafeed/CommaFeedModule.java index 1524865b..a18b0be3 100644 --- a/src/main/java/com/commafeed/CommaFeedModule.java +++ b/src/main/java/com/commafeed/CommaFeedModule.java @@ -11,8 +11,12 @@ import com.commafeed.CommaFeedConfiguration.CacheType; import com.commafeed.backend.cache.CacheService; import com.commafeed.backend.cache.NoopCacheService; import com.commafeed.backend.cache.RedisCacheService; +import com.commafeed.backend.favicon.DefaultFaviconFetcher; +import com.commafeed.backend.favicon.AbstractFaviconFetcher; +import com.commafeed.backend.favicon.YoutubeFaviconFetcher; import com.google.inject.AbstractModule; import com.google.inject.Provides; +import com.google.inject.multibindings.Multibinder; @RequiredArgsConstructor @Slf4j @@ -33,5 +37,9 @@ public class CommaFeedModule extends AbstractModule { : new RedisCacheService(config.getRedisPoolFactory().build()); log.info("using cache {}", cacheService.getClass()); bind(CacheService.class).toInstance(cacheService); + + Multibinder multibinder = Multibinder.newSetBinder(binder(), AbstractFaviconFetcher.class); + multibinder.addBinding().to(YoutubeFaviconFetcher.class); + multibinder.addBinding().to(DefaultFaviconFetcher.class); } } diff --git a/src/main/java/com/commafeed/backend/favicon/AbstractFaviconFetcher.java b/src/main/java/com/commafeed/backend/favicon/AbstractFaviconFetcher.java new file mode 100644 index 00000000..3f989379 --- /dev/null +++ b/src/main/java/com/commafeed/backend/favicon/AbstractFaviconFetcher.java @@ -0,0 +1,49 @@ +package com.commafeed.backend.favicon; + +import java.util.Arrays; +import java.util.List; + +import lombok.extern.slf4j.Slf4j; + +import org.apache.commons.lang.StringUtils; + +@Slf4j +public abstract class AbstractFaviconFetcher { + + private static List ICON_MIMETYPE_BLACKLIST = Arrays.asList("application/xml", "text/html"); + private static long MIN_ICON_LENGTH = 100; + private static long MAX_ICON_LENGTH = 100000; + + protected static int TIMEOUT = 4000; + + public abstract byte[] fetch(String url); + + protected boolean isValidIconResponse(byte[] content, String contentType) { + if (content == null) { + return false; + } + + long length = content.length; + + if (StringUtils.isNotBlank(contentType)) { + contentType = contentType.split(";")[0]; + } + + if (ICON_MIMETYPE_BLACKLIST.contains(contentType)) { + log.debug("Content-Type {} is blacklisted", contentType); + return false; + } + + if (length < MIN_ICON_LENGTH) { + log.debug("Length {} below MIN_ICON_LENGTH {}", length, MIN_ICON_LENGTH); + return false; + } + + if (length > MAX_ICON_LENGTH) { + log.debug("Length {} greater than MAX_ICON_LENGTH {}", length, MAX_ICON_LENGTH); + return false; + } + + return true; + } +} diff --git a/src/main/java/com/commafeed/backend/feed/FaviconFetcher.java b/src/main/java/com/commafeed/backend/favicon/DefaultFaviconFetcher.java similarity index 67% rename from src/main/java/com/commafeed/backend/feed/FaviconFetcher.java rename to src/main/java/com/commafeed/backend/favicon/DefaultFaviconFetcher.java index a7349422..778d2f42 100644 --- a/src/main/java/com/commafeed/backend/feed/FaviconFetcher.java +++ b/src/main/java/com/commafeed/backend/favicon/DefaultFaviconFetcher.java @@ -1,157 +1,120 @@ -package com.commafeed.backend.feed; - -import java.util.Arrays; -import java.util.List; - -import javax.inject.Inject; -import javax.inject.Singleton; - -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; - -import org.apache.commons.lang.StringUtils; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.select.Elements; - -import com.commafeed.backend.HttpGetter; -import com.commafeed.backend.HttpGetter.HttpResult; - -/** - * Inspired/Ported from https://github.com/potatolondon/getfavicon - * - */ -@Slf4j -@RequiredArgsConstructor(onConstructor = @__({ @Inject })) -@Singleton -public class FaviconFetcher { - - private static List ICON_MIMETYPE_BLACKLIST = Arrays.asList("application/xml", "text/html"); - private static long MIN_ICON_LENGTH = 100; - private static long MAX_ICON_LENGTH = 100000; - private static int TIMEOUT = 4000; - - private final HttpGetter getter; - - public byte[] fetch(String url) { - - if (url == null) { - log.debug("url is null"); - return null; - } - - int doubleSlash = url.indexOf("//"); - if (doubleSlash == -1) { - doubleSlash = 0; - } else { - doubleSlash += 2; - } - int firstSlash = url.indexOf('/', doubleSlash); - if (firstSlash != -1) { - url = url.substring(0, firstSlash); - } - - byte[] icon = getIconAtRoot(url); - - if (icon == null) { - icon = getIconInPage(url); - } - - return icon; - } - - private byte[] getIconAtRoot(String url) { - byte[] bytes = null; - String contentType = null; - - try { - url = FeedUtils.removeTrailingSlash(url) + "/favicon.ico"; - log.debug("getting root icon at {}", url); - HttpResult result = getter.getBinary(url, TIMEOUT); - bytes = result.getContent(); - contentType = result.getContentType(); - } catch (Exception e) { - log.debug("Failed to retrieve iconAtRoot: " + e.getMessage(), e); - } - - if (!isValidIconResponse(bytes, contentType)) { - bytes = null; - } - return bytes; - } - - private boolean isValidIconResponse(byte[] content, String contentType) { - if (content == null) { - return false; - } - - long length = content.length; - - if (StringUtils.isNotBlank(contentType)) { - contentType = contentType.split(";")[0]; - } - - if (ICON_MIMETYPE_BLACKLIST.contains(contentType)) { - log.debug("Content-Type {} is blacklisted", contentType); - return false; - } - - if (length < MIN_ICON_LENGTH) { - log.debug("Length {} below MIN_ICON_LENGTH {}", length, MIN_ICON_LENGTH); - return false; - } - - if (length > MAX_ICON_LENGTH) { - log.debug("Length {} greater than MAX_ICON_LENGTH {}", length, MAX_ICON_LENGTH); - return false; - } - - return true; - } - - private byte[] getIconInPage(String url) { - - Document doc = null; - try { - HttpResult result = getter.getBinary(url, TIMEOUT); - doc = Jsoup.parse(new String(result.getContent()), url); - } catch (Exception e) { - log.debug("Failed to retrieve page to find icon"); - return null; - } - - Elements icons = doc.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]"); - - if (icons.isEmpty()) { - log.debug("No icon found in page {}", url); - return null; - } - - String href = icons.get(0).attr("abs:href"); - if (StringUtils.isBlank(href)) { - log.debug("No icon found in page"); - return null; - } - - log.debug("Found unconfirmed iconInPage at {}", href); - - byte[] bytes = null; - String contentType = null; - try { - HttpResult result = getter.getBinary(href, TIMEOUT); - bytes = result.getContent(); - contentType = result.getContentType(); - } catch (Exception e) { - log.debug("Failed to retrieve icon found in page {}", href); - return null; - } - - if (!isValidIconResponse(bytes, contentType)) { - log.debug("Invalid icon found for {}", href); - return null; - } - - return bytes; - } - -} +package com.commafeed.backend.favicon; + +import javax.inject.Inject; +import javax.inject.Singleton; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import org.apache.commons.lang.StringUtils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.select.Elements; + +import com.commafeed.backend.HttpGetter; +import com.commafeed.backend.HttpGetter.HttpResult; +import com.commafeed.backend.feed.FeedUtils; + +/** + * Inspired/Ported from https://github.com/potatolondon/getfavicon + * + */ +@Slf4j +@RequiredArgsConstructor(onConstructor = @__({ @Inject })) +@Singleton +public class DefaultFaviconFetcher extends AbstractFaviconFetcher { + + private final HttpGetter getter; + + @Override + public byte[] fetch(String url) { + if (url == null) { + log.debug("url is null"); + return null; + } + + int doubleSlash = url.indexOf("//"); + if (doubleSlash == -1) { + doubleSlash = 0; + } else { + doubleSlash += 2; + } + int firstSlash = url.indexOf('/', doubleSlash); + if (firstSlash != -1) { + url = url.substring(0, firstSlash); + } + + byte[] icon = getIconAtRoot(url); + + if (icon == null) { + icon = getIconInPage(url); + } + + return icon; + } + + private byte[] getIconAtRoot(String url) { + byte[] bytes = null; + String contentType = null; + + try { + url = FeedUtils.removeTrailingSlash(url) + "/favicon.ico"; + log.debug("getting root icon at {}", url); + HttpResult result = getter.getBinary(url, TIMEOUT); + bytes = result.getContent(); + contentType = result.getContentType(); + } catch (Exception e) { + log.debug("Failed to retrieve iconAtRoot for url {}: ", url, e); + } + + if (!isValidIconResponse(bytes, contentType)) { + bytes = null; + } + return bytes; + } + + private byte[] getIconInPage(String url) { + + Document doc = null; + try { + HttpResult result = getter.getBinary(url, TIMEOUT); + doc = Jsoup.parse(new String(result.getContent()), url); + } catch (Exception e) { + log.debug("Failed to retrieve page to find icon", e); + return null; + } + + Elements icons = doc.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]"); + + if (icons.isEmpty()) { + log.debug("No icon found in page {}", url); + return null; + } + + String href = icons.get(0).attr("abs:href"); + if (StringUtils.isBlank(href)) { + log.debug("No icon found in page"); + return null; + } + + log.debug("Found unconfirmed iconInPage at {}", href); + + byte[] bytes = null; + String contentType = null; + try { + HttpResult result = getter.getBinary(href, TIMEOUT); + bytes = result.getContent(); + contentType = result.getContentType(); + } catch (Exception e) { + log.debug("Failed to retrieve icon found in page {}", href, e); + return null; + } + + if (!isValidIconResponse(bytes, contentType)) { + log.debug("Invalid icon found for {}", href); + return null; + } + + return bytes; + } +} diff --git a/src/main/java/com/commafeed/backend/favicon/YoutubeFaviconFetcher.java b/src/main/java/com/commafeed/backend/favicon/YoutubeFaviconFetcher.java new file mode 100644 index 00000000..f126857d --- /dev/null +++ b/src/main/java/com/commafeed/backend/favicon/YoutubeFaviconFetcher.java @@ -0,0 +1,87 @@ +package com.commafeed.backend.favicon; + +import javax.inject.Inject; +import javax.inject.Singleton; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.select.Elements; + +import com.commafeed.backend.HttpGetter; +import com.commafeed.backend.HttpGetter.HttpResult; + +@Slf4j +@RequiredArgsConstructor(onConstructor = @__({ @Inject })) +@Singleton +public class YoutubeFaviconFetcher extends AbstractFaviconFetcher { + + private final HttpGetter getter; + + @Override + public byte[] fetch(String url) { + if (!url.toLowerCase().contains("://gdata.youtube.com/")) { + return null; + } + + String userName = extractUserName(url); + if (userName == null) { + return null; + } + + String profileUrl = "https://gdata.youtube.com/feeds/users/" + userName; + + byte[] bytes = null; + String contentType = null; + + try { + log.debug("Getting YouTube user's icon, {}", url); + + // initial get to translate username to obscure user thumbnail URL + HttpResult profileResult = getter.getBinary(profileUrl, TIMEOUT); + Document doc = Jsoup.parse(new String(profileResult.getContent()), profileUrl); + + Elements thumbnails = doc.select("media|thumbnail"); + if (thumbnails.isEmpty()) { + return null; + } + + String thumbnailUrl = thumbnails.get(0).attr("abs:url"); + + int thumbnailStart = thumbnailUrl.indexOf("", thumbnailStart); + if (thumbnailStart != -1) { + thumbnailUrl = thumbnailUrl.substring(thumbnailStart + " faviconFetchers; public synchronized Feed findOrCreate(String url) { String normalized = FeedUtils.normalizeURL(url); @@ -33,4 +36,17 @@ public class FeedService { return feed; } + public byte[] fetchFavicon(Feed feed) { + String url = feed.getLink() != null ? feed.getLink() : feed.getUrl(); + + byte[] icon = null; + for (AbstractFaviconFetcher faviconFetcher : faviconFetchers) { + icon = faviconFetcher.fetch(url); + if (icon != null) { + break; + } + } + return icon; + } + } diff --git a/src/main/java/com/commafeed/frontend/resource/FeedREST.java b/src/main/java/com/commafeed/frontend/resource/FeedREST.java index ce0d72ea..e0723e5c 100644 --- a/src/main/java/com/commafeed/frontend/resource/FeedREST.java +++ b/src/main/java/com/commafeed/frontend/resource/FeedREST.java @@ -42,7 +42,6 @@ import com.commafeed.backend.cache.CacheService; import com.commafeed.backend.dao.FeedCategoryDAO; import com.commafeed.backend.dao.FeedEntryStatusDAO; import com.commafeed.backend.dao.FeedSubscriptionDAO; -import com.commafeed.backend.feed.FaviconFetcher; import com.commafeed.backend.feed.FeedFetcher; import com.commafeed.backend.feed.FeedQueues; import com.commafeed.backend.feed.FeedUtils; @@ -57,6 +56,7 @@ import com.commafeed.backend.model.UserSettings.ReadingOrder; import com.commafeed.backend.opml.OPMLExporter; import com.commafeed.backend.opml.OPMLImporter; import com.commafeed.backend.service.FeedEntryService; +import com.commafeed.backend.service.FeedService; import com.commafeed.backend.service.FeedSubscriptionService; import com.commafeed.frontend.auth.SecurityCheck; import com.commafeed.frontend.model.Entries; @@ -95,8 +95,8 @@ public class FeedREST { private final FeedSubscriptionDAO feedSubscriptionDAO; private final FeedCategoryDAO feedCategoryDAO; private final FeedEntryStatusDAO feedEntryStatusDAO; - private final FaviconFetcher faviconFetcher; private final FeedFetcher feedFetcher; + private final FeedService feedService; private final FeedEntryService feedEntryService; private final FeedSubscriptionService feedSubscriptionService; private final FeedQueues queues; @@ -322,8 +322,7 @@ public class FeedREST { return Response.status(Status.NOT_FOUND).build(); } Feed feed = subscription.getFeed(); - String url = feed.getLink() != null ? feed.getLink() : feed.getUrl(); - byte[] icon = faviconFetcher.fetch(url); + byte[] icon = feedService.fetchFavicon(feed); ResponseBuilder builder = null; if (icon == null) {