forked from Archives/Athou_commafeed
rewrite favicon fetcher
This commit is contained in:
5
pom.xml
5
pom.xml
@@ -180,6 +180,11 @@
|
||||
<artifactId>guice</artifactId>
|
||||
<version>3.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.inject.extensions</groupId>
|
||||
<artifactId>guice-multibindings</artifactId>
|
||||
<version>3.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.dropwizard</groupId>
|
||||
|
||||
@@ -11,8 +11,12 @@ import com.commafeed.CommaFeedConfiguration.CacheType;
|
||||
import com.commafeed.backend.cache.CacheService;
|
||||
import com.commafeed.backend.cache.NoopCacheService;
|
||||
import com.commafeed.backend.cache.RedisCacheService;
|
||||
import com.commafeed.backend.favicon.DefaultFaviconFetcher;
|
||||
import com.commafeed.backend.favicon.AbstractFaviconFetcher;
|
||||
import com.commafeed.backend.favicon.YoutubeFaviconFetcher;
|
||||
import com.google.inject.AbstractModule;
|
||||
import com.google.inject.Provides;
|
||||
import com.google.inject.multibindings.Multibinder;
|
||||
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
@@ -33,5 +37,9 @@ public class CommaFeedModule extends AbstractModule {
|
||||
: new RedisCacheService(config.getRedisPoolFactory().build());
|
||||
log.info("using cache {}", cacheService.getClass());
|
||||
bind(CacheService.class).toInstance(cacheService);
|
||||
|
||||
Multibinder<AbstractFaviconFetcher> multibinder = Multibinder.newSetBinder(binder(), AbstractFaviconFetcher.class);
|
||||
multibinder.addBinding().to(YoutubeFaviconFetcher.class);
|
||||
multibinder.addBinding().to(DefaultFaviconFetcher.class);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
package com.commafeed.backend.favicon;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
@Slf4j
|
||||
public abstract class AbstractFaviconFetcher {
|
||||
|
||||
private static List<String> ICON_MIMETYPE_BLACKLIST = Arrays.asList("application/xml", "text/html");
|
||||
private static long MIN_ICON_LENGTH = 100;
|
||||
private static long MAX_ICON_LENGTH = 100000;
|
||||
|
||||
protected static int TIMEOUT = 4000;
|
||||
|
||||
public abstract byte[] fetch(String url);
|
||||
|
||||
protected boolean isValidIconResponse(byte[] content, String contentType) {
|
||||
if (content == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
long length = content.length;
|
||||
|
||||
if (StringUtils.isNotBlank(contentType)) {
|
||||
contentType = contentType.split(";")[0];
|
||||
}
|
||||
|
||||
if (ICON_MIMETYPE_BLACKLIST.contains(contentType)) {
|
||||
log.debug("Content-Type {} is blacklisted", contentType);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (length < MIN_ICON_LENGTH) {
|
||||
log.debug("Length {} below MIN_ICON_LENGTH {}", length, MIN_ICON_LENGTH);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (length > MAX_ICON_LENGTH) {
|
||||
log.debug("Length {} greater than MAX_ICON_LENGTH {}", length, MAX_ICON_LENGTH);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,120 @@
|
||||
package com.commafeed.backend.favicon;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.commafeed.backend.HttpGetter;
|
||||
import com.commafeed.backend.HttpGetter.HttpResult;
|
||||
import com.commafeed.backend.feed.FeedUtils;
|
||||
|
||||
/**
|
||||
* Inspired/Ported from https://github.com/potatolondon/getfavicon
|
||||
*
|
||||
*/
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
|
||||
@Singleton
|
||||
public class DefaultFaviconFetcher extends AbstractFaviconFetcher {
|
||||
|
||||
private final HttpGetter getter;
|
||||
|
||||
@Override
|
||||
public byte[] fetch(String url) {
|
||||
if (url == null) {
|
||||
log.debug("url is null");
|
||||
return null;
|
||||
}
|
||||
|
||||
int doubleSlash = url.indexOf("//");
|
||||
if (doubleSlash == -1) {
|
||||
doubleSlash = 0;
|
||||
} else {
|
||||
doubleSlash += 2;
|
||||
}
|
||||
int firstSlash = url.indexOf('/', doubleSlash);
|
||||
if (firstSlash != -1) {
|
||||
url = url.substring(0, firstSlash);
|
||||
}
|
||||
|
||||
byte[] icon = getIconAtRoot(url);
|
||||
|
||||
if (icon == null) {
|
||||
icon = getIconInPage(url);
|
||||
}
|
||||
|
||||
return icon;
|
||||
}
|
||||
|
||||
private byte[] getIconAtRoot(String url) {
|
||||
byte[] bytes = null;
|
||||
String contentType = null;
|
||||
|
||||
try {
|
||||
url = FeedUtils.removeTrailingSlash(url) + "/favicon.ico";
|
||||
log.debug("getting root icon at {}", url);
|
||||
HttpResult result = getter.getBinary(url, TIMEOUT);
|
||||
bytes = result.getContent();
|
||||
contentType = result.getContentType();
|
||||
} catch (Exception e) {
|
||||
log.debug("Failed to retrieve iconAtRoot for url {}: ", url, e);
|
||||
}
|
||||
|
||||
if (!isValidIconResponse(bytes, contentType)) {
|
||||
bytes = null;
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
private byte[] getIconInPage(String url) {
|
||||
|
||||
Document doc = null;
|
||||
try {
|
||||
HttpResult result = getter.getBinary(url, TIMEOUT);
|
||||
doc = Jsoup.parse(new String(result.getContent()), url);
|
||||
} catch (Exception e) {
|
||||
log.debug("Failed to retrieve page to find icon", e);
|
||||
return null;
|
||||
}
|
||||
|
||||
Elements icons = doc.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]");
|
||||
|
||||
if (icons.isEmpty()) {
|
||||
log.debug("No icon found in page {}", url);
|
||||
return null;
|
||||
}
|
||||
|
||||
String href = icons.get(0).attr("abs:href");
|
||||
if (StringUtils.isBlank(href)) {
|
||||
log.debug("No icon found in page");
|
||||
return null;
|
||||
}
|
||||
|
||||
log.debug("Found unconfirmed iconInPage at {}", href);
|
||||
|
||||
byte[] bytes = null;
|
||||
String contentType = null;
|
||||
try {
|
||||
HttpResult result = getter.getBinary(href, TIMEOUT);
|
||||
bytes = result.getContent();
|
||||
contentType = result.getContentType();
|
||||
} catch (Exception e) {
|
||||
log.debug("Failed to retrieve icon found in page {}", href, e);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!isValidIconResponse(bytes, contentType)) {
|
||||
log.debug("Invalid icon found for {}", href);
|
||||
return null;
|
||||
}
|
||||
|
||||
return bytes;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
package com.commafeed.backend.favicon;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.commafeed.backend.HttpGetter;
|
||||
import com.commafeed.backend.HttpGetter.HttpResult;
|
||||
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
|
||||
@Singleton
|
||||
public class YoutubeFaviconFetcher extends AbstractFaviconFetcher {
|
||||
|
||||
private final HttpGetter getter;
|
||||
|
||||
@Override
|
||||
public byte[] fetch(String url) {
|
||||
if (!url.toLowerCase().contains("://gdata.youtube.com/")) {
|
||||
return null;
|
||||
}
|
||||
|
||||
String userName = extractUserName(url);
|
||||
if (userName == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
String profileUrl = "https://gdata.youtube.com/feeds/users/" + userName;
|
||||
|
||||
byte[] bytes = null;
|
||||
String contentType = null;
|
||||
|
||||
try {
|
||||
log.debug("Getting YouTube user's icon, {}", url);
|
||||
|
||||
// initial get to translate username to obscure user thumbnail URL
|
||||
HttpResult profileResult = getter.getBinary(profileUrl, TIMEOUT);
|
||||
Document doc = Jsoup.parse(new String(profileResult.getContent()), profileUrl);
|
||||
|
||||
Elements thumbnails = doc.select("media|thumbnail");
|
||||
if (thumbnails.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
String thumbnailUrl = thumbnails.get(0).attr("abs:url");
|
||||
|
||||
int thumbnailStart = thumbnailUrl.indexOf("<media:thumbnail url='");
|
||||
int thumbnailEnd = thumbnailUrl.indexOf("'/>", thumbnailStart);
|
||||
if (thumbnailStart != -1) {
|
||||
thumbnailUrl = thumbnailUrl.substring(thumbnailStart + "<media:thumbnail url='".length(), thumbnailEnd);
|
||||
}
|
||||
|
||||
// final get to actually retrieve the thumbnail
|
||||
HttpResult iconResult = getter.getBinary(thumbnailUrl, TIMEOUT);
|
||||
bytes = iconResult.getContent();
|
||||
contentType = iconResult.getContentType();
|
||||
} catch (Exception e) {
|
||||
log.debug("Failed to retrieve YouTube icon", e);
|
||||
}
|
||||
|
||||
if (!isValidIconResponse(bytes, contentType)) {
|
||||
bytes = null;
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
private String extractUserName(String url) {
|
||||
int apiOrBase = url.indexOf("/users/");
|
||||
if (apiOrBase == -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int userEndSlash = url.indexOf('/', apiOrBase + "/users/".length());
|
||||
if (userEndSlash == -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return url.substring(apiOrBase + "/users/".length(), userEndSlash);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,216 +0,0 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.commafeed.backend.HttpGetter;
|
||||
import com.commafeed.backend.HttpGetter.HttpResult;
|
||||
|
||||
/**
|
||||
* Inspired/Ported from https://github.com/potatolondon/getfavicon
|
||||
*
|
||||
*/
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
|
||||
@Singleton
|
||||
public class FaviconFetcher {
|
||||
|
||||
private static List<String> ICON_MIMETYPE_BLACKLIST = Arrays.asList("application/xml", "text/html");
|
||||
private static long MIN_ICON_LENGTH = 100;
|
||||
private static long MAX_ICON_LENGTH = 100000;
|
||||
private static int TIMEOUT = 4000;
|
||||
|
||||
private final HttpGetter getter;
|
||||
|
||||
public byte[] fetch(String url) {
|
||||
|
||||
if (url == null) {
|
||||
log.debug("url is null");
|
||||
return null;
|
||||
}
|
||||
|
||||
// Get YouTube Icon here
|
||||
if (url.toLowerCase().contains("://gdata.youtube.com/")) {
|
||||
byte[] icon = getYouTubeIcon(url);
|
||||
return icon;
|
||||
}
|
||||
|
||||
int doubleSlash = url.indexOf("//");
|
||||
if (doubleSlash == -1) {
|
||||
doubleSlash = 0;
|
||||
} else {
|
||||
doubleSlash += 2;
|
||||
}
|
||||
int firstSlash = url.indexOf('/', doubleSlash);
|
||||
if (firstSlash != -1) {
|
||||
url = url.substring(0, firstSlash);
|
||||
}
|
||||
|
||||
byte[] icon = getIconAtRoot(url);
|
||||
|
||||
if (icon == null) {
|
||||
icon = getIconInPage(url);
|
||||
}
|
||||
|
||||
return icon;
|
||||
}
|
||||
|
||||
private byte[] getIconAtRoot(String url) {
|
||||
byte[] bytes = null;
|
||||
String contentType = null;
|
||||
|
||||
try {
|
||||
url = FeedUtils.removeTrailingSlash(url) + "/favicon.ico";
|
||||
log.debug("getting root icon at {}", url);
|
||||
HttpResult result = getter.getBinary(url, TIMEOUT);
|
||||
bytes = result.getContent();
|
||||
contentType = result.getContentType();
|
||||
} catch (Exception e) {
|
||||
log.debug("Failed to retrieve iconAtRoot: " + e.getMessage(), e);
|
||||
}
|
||||
|
||||
if (!isValidIconResponse(bytes, contentType)) {
|
||||
bytes = null;
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
private boolean isValidIconResponse(byte[] content, String contentType) {
|
||||
if (content == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
long length = content.length;
|
||||
|
||||
if (StringUtils.isNotBlank(contentType)) {
|
||||
contentType = contentType.split(";")[0];
|
||||
}
|
||||
|
||||
if (ICON_MIMETYPE_BLACKLIST.contains(contentType)) {
|
||||
log.debug("Content-Type {} is blacklisted", contentType);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (length < MIN_ICON_LENGTH) {
|
||||
log.debug("Length {} below MIN_ICON_LENGTH {}", length, MIN_ICON_LENGTH);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (length > MAX_ICON_LENGTH) {
|
||||
log.debug("Length {} greater than MAX_ICON_LENGTH {}", length, MAX_ICON_LENGTH);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private byte[] getIconInPage(String url) {
|
||||
|
||||
Document doc = null;
|
||||
try {
|
||||
HttpResult result = getter.getBinary(url, TIMEOUT);
|
||||
doc = Jsoup.parse(new String(result.getContent()), url);
|
||||
} catch (Exception e) {
|
||||
log.debug("Failed to retrieve page to find icon");
|
||||
return null;
|
||||
}
|
||||
|
||||
Elements icons = doc.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]");
|
||||
|
||||
if (icons.isEmpty()) {
|
||||
log.debug("No icon found in page {}", url);
|
||||
return null;
|
||||
}
|
||||
|
||||
String href = icons.get(0).attr("abs:href");
|
||||
if (StringUtils.isBlank(href)) {
|
||||
log.debug("No icon found in page");
|
||||
return null;
|
||||
}
|
||||
|
||||
log.debug("Found unconfirmed iconInPage at {}", href);
|
||||
|
||||
byte[] bytes = null;
|
||||
String contentType = null;
|
||||
try {
|
||||
HttpResult result = getter.getBinary(href, TIMEOUT);
|
||||
bytes = result.getContent();
|
||||
contentType = result.getContentType();
|
||||
} catch (Exception e) {
|
||||
log.debug("Failed to retrieve icon found in page {}", href);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!isValidIconResponse(bytes, contentType)) {
|
||||
log.debug("Invalid icon found for {}", href);
|
||||
return null;
|
||||
}
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/*
|
||||
* Instead of grabbing the actual favicon, grab the user's icon
|
||||
* This prevents a whole bunch of repeated YouTube icons, replacing
|
||||
* each with identifiable user icons.
|
||||
*/
|
||||
private byte[] getYouTubeIcon(String url) {
|
||||
byte[] bytes = null;
|
||||
String contentType = null;
|
||||
String username = null;
|
||||
String imageUrl = null;
|
||||
String thumbnailUrl = null;
|
||||
try {
|
||||
int apiOrBase = url.indexOf("/users/");
|
||||
int userEndSlash = url.indexOf('/', apiOrBase + "/users/".length());
|
||||
if (userEndSlash != -1) {
|
||||
username = url.substring(apiOrBase + "/users/".length(), userEndSlash);
|
||||
}
|
||||
imageUrl = "https://gdata.youtube.com/feeds/users/" + username;
|
||||
log.debug("Getting YouTube user's icon, {}", url);
|
||||
|
||||
//initial get to translate username to obscure user thumbnail URL
|
||||
HttpResult result = getter.getBinary(imageUrl, TIMEOUT);
|
||||
bytes = result.getContent();
|
||||
contentType = result.getContentType();
|
||||
thumbnailUrl = FeedUtils.parseForImageUrl(bytes);
|
||||
|
||||
int thumbnailStart = thumbnailUrl.indexOf("<media:thumbnail url='");
|
||||
int thumbnailEnd = thumbnailUrl.indexOf("'/>", thumbnailStart);
|
||||
if (thumbnailStart != -1) {
|
||||
thumbnailUrl = thumbnailUrl.substring(thumbnailStart+"<media:thumbnail url='".length(), thumbnailEnd);
|
||||
}
|
||||
|
||||
//final get to actually retrieve the thumbnail
|
||||
result = getter.getBinary(thumbnailUrl, TIMEOUT);
|
||||
bytes = result.getContent();
|
||||
contentType = result.getContentType();
|
||||
} catch (Exception e) {
|
||||
log.debug("Failed to retrieve YouTubeIcon, instead retrieving default YouTube favicon: " + e.getMessage(), e);
|
||||
return fetch("http://www.youtube.com/");
|
||||
}
|
||||
|
||||
if (!isValidIconResponse(bytes, contentType)) {
|
||||
bytes = null;
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public boolean exceptionUrl(String url) {
|
||||
if (url.toLowerCase().contains("://gdata.youtube.com/")) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package com.commafeed.backend.service;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
@@ -10,6 +11,7 @@ import lombok.RequiredArgsConstructor;
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
|
||||
import com.commafeed.backend.dao.FeedDAO;
|
||||
import com.commafeed.backend.favicon.AbstractFaviconFetcher;
|
||||
import com.commafeed.backend.feed.FeedUtils;
|
||||
import com.commafeed.backend.model.Feed;
|
||||
|
||||
@@ -18,6 +20,7 @@ import com.commafeed.backend.model.Feed;
|
||||
public class FeedService {
|
||||
|
||||
private final FeedDAO feedDAO;
|
||||
private final Set<AbstractFaviconFetcher> faviconFetchers;
|
||||
|
||||
public synchronized Feed findOrCreate(String url) {
|
||||
String normalized = FeedUtils.normalizeURL(url);
|
||||
@@ -33,4 +36,17 @@ public class FeedService {
|
||||
return feed;
|
||||
}
|
||||
|
||||
public byte[] fetchFavicon(Feed feed) {
|
||||
String url = feed.getLink() != null ? feed.getLink() : feed.getUrl();
|
||||
|
||||
byte[] icon = null;
|
||||
for (AbstractFaviconFetcher faviconFetcher : faviconFetchers) {
|
||||
icon = faviconFetcher.fetch(url);
|
||||
if (icon != null) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return icon;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -42,7 +42,6 @@ import com.commafeed.backend.cache.CacheService;
|
||||
import com.commafeed.backend.dao.FeedCategoryDAO;
|
||||
import com.commafeed.backend.dao.FeedEntryStatusDAO;
|
||||
import com.commafeed.backend.dao.FeedSubscriptionDAO;
|
||||
import com.commafeed.backend.feed.FaviconFetcher;
|
||||
import com.commafeed.backend.feed.FeedFetcher;
|
||||
import com.commafeed.backend.feed.FeedQueues;
|
||||
import com.commafeed.backend.feed.FeedUtils;
|
||||
@@ -57,6 +56,7 @@ import com.commafeed.backend.model.UserSettings.ReadingOrder;
|
||||
import com.commafeed.backend.opml.OPMLExporter;
|
||||
import com.commafeed.backend.opml.OPMLImporter;
|
||||
import com.commafeed.backend.service.FeedEntryService;
|
||||
import com.commafeed.backend.service.FeedService;
|
||||
import com.commafeed.backend.service.FeedSubscriptionService;
|
||||
import com.commafeed.frontend.auth.SecurityCheck;
|
||||
import com.commafeed.frontend.model.Entries;
|
||||
@@ -95,8 +95,8 @@ public class FeedREST {
|
||||
private final FeedSubscriptionDAO feedSubscriptionDAO;
|
||||
private final FeedCategoryDAO feedCategoryDAO;
|
||||
private final FeedEntryStatusDAO feedEntryStatusDAO;
|
||||
private final FaviconFetcher faviconFetcher;
|
||||
private final FeedFetcher feedFetcher;
|
||||
private final FeedService feedService;
|
||||
private final FeedEntryService feedEntryService;
|
||||
private final FeedSubscriptionService feedSubscriptionService;
|
||||
private final FeedQueues queues;
|
||||
@@ -322,8 +322,7 @@ public class FeedREST {
|
||||
return Response.status(Status.NOT_FOUND).build();
|
||||
}
|
||||
Feed feed = subscription.getFeed();
|
||||
String url = faviconFetcher.exceptionUrl(feed.getUrl()) ? feed.getUrl() : (feed.getLink() != null ? feed.getLink() : feed.getUrl());
|
||||
byte[] icon = faviconFetcher.fetch(url);
|
||||
byte[] icon = feedService.fetchFavicon(feed);
|
||||
|
||||
ResponseBuilder builder = null;
|
||||
if (icon == null) {
|
||||
|
||||
Reference in New Issue
Block a user