Merge branch 'Hubcapp-master'

This commit is contained in:
Athou
2014-10-26 12:26:30 +01:00
9 changed files with 303 additions and 162 deletions

View File

@@ -21,7 +21,7 @@ You also need Maven 3.x (and a Java 1.7+ JDK) installed in order to build the ap
To install maven and openjdk on Ubuntu, issue the following commands To install maven and openjdk on Ubuntu, issue the following commands
sudo apt-get install openjdk-7-jdk maven sudo apt-get install build-essential openjdk-7-jdk maven
On Windows and other operating systems, just download maven 3.x from the [official site](http://maven.apache.org/), extract it somewhere and add the `bin` directory to your `PATH` environment variable. On Windows and other operating systems, just download maven 3.x from the [official site](http://maven.apache.org/), extract it somewhere and add the `bin` directory to your `PATH` environment variable.

View File

@@ -180,6 +180,11 @@
<artifactId>guice</artifactId> <artifactId>guice</artifactId>
<version>3.0</version> <version>3.0</version>
</dependency> </dependency>
<dependency>
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-multibindings</artifactId>
<version>3.0</version>
</dependency>
<dependency> <dependency>
<groupId>io.dropwizard</groupId> <groupId>io.dropwizard</groupId>

View File

@@ -11,8 +11,12 @@ import com.commafeed.CommaFeedConfiguration.CacheType;
import com.commafeed.backend.cache.CacheService; import com.commafeed.backend.cache.CacheService;
import com.commafeed.backend.cache.NoopCacheService; import com.commafeed.backend.cache.NoopCacheService;
import com.commafeed.backend.cache.RedisCacheService; import com.commafeed.backend.cache.RedisCacheService;
import com.commafeed.backend.favicon.DefaultFaviconFetcher;
import com.commafeed.backend.favicon.AbstractFaviconFetcher;
import com.commafeed.backend.favicon.YoutubeFaviconFetcher;
import com.google.inject.AbstractModule; import com.google.inject.AbstractModule;
import com.google.inject.Provides; import com.google.inject.Provides;
import com.google.inject.multibindings.Multibinder;
@RequiredArgsConstructor @RequiredArgsConstructor
@Slf4j @Slf4j
@@ -33,5 +37,9 @@ public class CommaFeedModule extends AbstractModule {
: new RedisCacheService(config.getRedisPoolFactory().build()); : new RedisCacheService(config.getRedisPoolFactory().build());
log.info("using cache {}", cacheService.getClass()); log.info("using cache {}", cacheService.getClass());
bind(CacheService.class).toInstance(cacheService); bind(CacheService.class).toInstance(cacheService);
Multibinder<AbstractFaviconFetcher> multibinder = Multibinder.newSetBinder(binder(), AbstractFaviconFetcher.class);
multibinder.addBinding().to(YoutubeFaviconFetcher.class);
multibinder.addBinding().to(DefaultFaviconFetcher.class);
} }
} }

View File

@@ -0,0 +1,49 @@
package com.commafeed.backend.favicon;
import java.util.Arrays;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang.StringUtils;
@Slf4j
public abstract class AbstractFaviconFetcher {
private static List<String> ICON_MIMETYPE_BLACKLIST = Arrays.asList("application/xml", "text/html");
private static long MIN_ICON_LENGTH = 100;
private static long MAX_ICON_LENGTH = 100000;
protected static int TIMEOUT = 4000;
public abstract byte[] fetch(String url);
protected boolean isValidIconResponse(byte[] content, String contentType) {
if (content == null) {
return false;
}
long length = content.length;
if (StringUtils.isNotBlank(contentType)) {
contentType = contentType.split(";")[0];
}
if (ICON_MIMETYPE_BLACKLIST.contains(contentType)) {
log.debug("Content-Type {} is blacklisted", contentType);
return false;
}
if (length < MIN_ICON_LENGTH) {
log.debug("Length {} below MIN_ICON_LENGTH {}", length, MIN_ICON_LENGTH);
return false;
}
if (length > MAX_ICON_LENGTH) {
log.debug("Length {} greater than MAX_ICON_LENGTH {}", length, MAX_ICON_LENGTH);
return false;
}
return true;
}
}

View File

@@ -1,157 +1,120 @@
package com.commafeed.backend.feed; package com.commafeed.backend.favicon;
import java.util.Arrays; import javax.inject.Inject;
import java.util.List; import javax.inject.Singleton;
import javax.inject.Inject; import lombok.RequiredArgsConstructor;
import javax.inject.Singleton; import lombok.extern.slf4j.Slf4j;
import lombok.RequiredArgsConstructor; import org.apache.commons.lang.StringUtils;
import lombok.extern.slf4j.Slf4j; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.apache.commons.lang.StringUtils; import org.jsoup.select.Elements;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import com.commafeed.backend.HttpGetter;
import org.jsoup.select.Elements; import com.commafeed.backend.HttpGetter.HttpResult;
import com.commafeed.backend.feed.FeedUtils;
import com.commafeed.backend.HttpGetter;
import com.commafeed.backend.HttpGetter.HttpResult; /**
* Inspired/Ported from https://github.com/potatolondon/getfavicon
/** *
* Inspired/Ported from https://github.com/potatolondon/getfavicon */
* @Slf4j
*/ @RequiredArgsConstructor(onConstructor = @__({ @Inject }))
@Slf4j @Singleton
@RequiredArgsConstructor(onConstructor = @__({ @Inject })) public class DefaultFaviconFetcher extends AbstractFaviconFetcher {
@Singleton
public class FaviconFetcher { private final HttpGetter getter;
private static List<String> ICON_MIMETYPE_BLACKLIST = Arrays.asList("application/xml", "text/html"); @Override
private static long MIN_ICON_LENGTH = 100; public byte[] fetch(String url) {
private static long MAX_ICON_LENGTH = 100000; if (url == null) {
private static int TIMEOUT = 4000; log.debug("url is null");
return null;
private final HttpGetter getter; }
public byte[] fetch(String url) { int doubleSlash = url.indexOf("//");
if (doubleSlash == -1) {
if (url == null) { doubleSlash = 0;
log.debug("url is null"); } else {
return null; doubleSlash += 2;
} }
int firstSlash = url.indexOf('/', doubleSlash);
int doubleSlash = url.indexOf("//"); if (firstSlash != -1) {
if (doubleSlash == -1) { url = url.substring(0, firstSlash);
doubleSlash = 0; }
} else {
doubleSlash += 2; byte[] icon = getIconAtRoot(url);
}
int firstSlash = url.indexOf('/', doubleSlash); if (icon == null) {
if (firstSlash != -1) { icon = getIconInPage(url);
url = url.substring(0, firstSlash); }
}
return icon;
byte[] icon = getIconAtRoot(url); }
if (icon == null) { private byte[] getIconAtRoot(String url) {
icon = getIconInPage(url); byte[] bytes = null;
} String contentType = null;
return icon; try {
} url = FeedUtils.removeTrailingSlash(url) + "/favicon.ico";
log.debug("getting root icon at {}", url);
private byte[] getIconAtRoot(String url) { HttpResult result = getter.getBinary(url, TIMEOUT);
byte[] bytes = null; bytes = result.getContent();
String contentType = null; contentType = result.getContentType();
} catch (Exception e) {
try { log.debug("Failed to retrieve iconAtRoot for url {}: ", url, e);
url = FeedUtils.removeTrailingSlash(url) + "/favicon.ico"; }
log.debug("getting root icon at {}", url);
HttpResult result = getter.getBinary(url, TIMEOUT); if (!isValidIconResponse(bytes, contentType)) {
bytes = result.getContent(); bytes = null;
contentType = result.getContentType(); }
} catch (Exception e) { return bytes;
log.debug("Failed to retrieve iconAtRoot: " + e.getMessage(), e); }
}
private byte[] getIconInPage(String url) {
if (!isValidIconResponse(bytes, contentType)) {
bytes = null; Document doc = null;
} try {
return bytes; HttpResult result = getter.getBinary(url, TIMEOUT);
} doc = Jsoup.parse(new String(result.getContent()), url);
} catch (Exception e) {
private boolean isValidIconResponse(byte[] content, String contentType) { log.debug("Failed to retrieve page to find icon", e);
if (content == null) { return null;
return false; }
}
Elements icons = doc.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]");
long length = content.length;
if (icons.isEmpty()) {
if (StringUtils.isNotBlank(contentType)) { log.debug("No icon found in page {}", url);
contentType = contentType.split(";")[0]; return null;
} }
if (ICON_MIMETYPE_BLACKLIST.contains(contentType)) { String href = icons.get(0).attr("abs:href");
log.debug("Content-Type {} is blacklisted", contentType); if (StringUtils.isBlank(href)) {
return false; log.debug("No icon found in page");
} return null;
}
if (length < MIN_ICON_LENGTH) {
log.debug("Length {} below MIN_ICON_LENGTH {}", length, MIN_ICON_LENGTH); log.debug("Found unconfirmed iconInPage at {}", href);
return false;
} byte[] bytes = null;
String contentType = null;
if (length > MAX_ICON_LENGTH) { try {
log.debug("Length {} greater than MAX_ICON_LENGTH {}", length, MAX_ICON_LENGTH); HttpResult result = getter.getBinary(href, TIMEOUT);
return false; bytes = result.getContent();
} contentType = result.getContentType();
} catch (Exception e) {
return true; log.debug("Failed to retrieve icon found in page {}", href, e);
} return null;
}
private byte[] getIconInPage(String url) {
if (!isValidIconResponse(bytes, contentType)) {
Document doc = null; log.debug("Invalid icon found for {}", href);
try { return null;
HttpResult result = getter.getBinary(url, TIMEOUT); }
doc = Jsoup.parse(new String(result.getContent()), url);
} catch (Exception e) { return bytes;
log.debug("Failed to retrieve page to find icon"); }
return null; }
}
Elements icons = doc.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]");
if (icons.isEmpty()) {
log.debug("No icon found in page {}", url);
return null;
}
String href = icons.get(0).attr("abs:href");
if (StringUtils.isBlank(href)) {
log.debug("No icon found in page");
return null;
}
log.debug("Found unconfirmed iconInPage at {}", href);
byte[] bytes = null;
String contentType = null;
try {
HttpResult result = getter.getBinary(href, TIMEOUT);
bytes = result.getContent();
contentType = result.getContentType();
} catch (Exception e) {
log.debug("Failed to retrieve icon found in page {}", href);
return null;
}
if (!isValidIconResponse(bytes, contentType)) {
log.debug("Invalid icon found for {}", href);
return null;
}
return bytes;
}
}

View File

@@ -0,0 +1,87 @@
package com.commafeed.backend.favicon;
import javax.inject.Inject;
import javax.inject.Singleton;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import com.commafeed.backend.HttpGetter;
import com.commafeed.backend.HttpGetter.HttpResult;
@Slf4j
@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
@Singleton
public class YoutubeFaviconFetcher extends AbstractFaviconFetcher {
private final HttpGetter getter;
@Override
public byte[] fetch(String url) {
if (!url.toLowerCase().contains("://gdata.youtube.com/")) {
return null;
}
String userName = extractUserName(url);
if (userName == null) {
return null;
}
String profileUrl = "https://gdata.youtube.com/feeds/users/" + userName;
byte[] bytes = null;
String contentType = null;
try {
log.debug("Getting YouTube user's icon, {}", url);
// initial get to translate username to obscure user thumbnail URL
HttpResult profileResult = getter.getBinary(profileUrl, TIMEOUT);
Document doc = Jsoup.parse(new String(profileResult.getContent()), profileUrl);
Elements thumbnails = doc.select("media|thumbnail");
if (thumbnails.isEmpty()) {
return null;
}
String thumbnailUrl = thumbnails.get(0).attr("abs:url");
int thumbnailStart = thumbnailUrl.indexOf("<media:thumbnail url='");
int thumbnailEnd = thumbnailUrl.indexOf("'/>", thumbnailStart);
if (thumbnailStart != -1) {
thumbnailUrl = thumbnailUrl.substring(thumbnailStart + "<media:thumbnail url='".length(), thumbnailEnd);
}
// final get to actually retrieve the thumbnail
HttpResult iconResult = getter.getBinary(thumbnailUrl, TIMEOUT);
bytes = iconResult.getContent();
contentType = iconResult.getContentType();
} catch (Exception e) {
log.debug("Failed to retrieve YouTube icon", e);
}
if (!isValidIconResponse(bytes, contentType)) {
bytes = null;
}
return bytes;
}
private String extractUserName(String url) {
int apiOrBase = url.indexOf("/users/");
if (apiOrBase == -1) {
return null;
}
int userEndSlash = url.indexOf('/', apiOrBase + "/users/".length());
if (userEndSlash == -1) {
return null;
}
return url.substring(apiOrBase + "/users/".length(), userEndSlash);
}
}

View File

@@ -518,4 +518,18 @@ public class FeedUtils {
} }
} }
public static String parseForImageUrl(byte[] xml) {
String xmlString = null;
try {
String encoding = FeedUtils.guessEncoding(xml);
xmlString = FeedUtils.trimInvalidXmlCharacters(new String(xml, encoding));
/*if (xmlString == null) {
throw new FeedException("Input string is null for url " + feedUrl);
}*/
xmlString = FeedUtils.replaceHtmlEntitiesWithNumericEntities(xmlString);
} catch (Exception e) {
}
return xmlString;
}
} }

View File

@@ -1,6 +1,7 @@
package com.commafeed.backend.service; package com.commafeed.backend.service;
import java.util.Date; import java.util.Date;
import java.util.Set;
import javax.inject.Inject; import javax.inject.Inject;
import javax.inject.Singleton; import javax.inject.Singleton;
@@ -10,6 +11,7 @@ import lombok.RequiredArgsConstructor;
import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.codec.digest.DigestUtils;
import com.commafeed.backend.dao.FeedDAO; import com.commafeed.backend.dao.FeedDAO;
import com.commafeed.backend.favicon.AbstractFaviconFetcher;
import com.commafeed.backend.feed.FeedUtils; import com.commafeed.backend.feed.FeedUtils;
import com.commafeed.backend.model.Feed; import com.commafeed.backend.model.Feed;
@@ -18,6 +20,7 @@ import com.commafeed.backend.model.Feed;
public class FeedService { public class FeedService {
private final FeedDAO feedDAO; private final FeedDAO feedDAO;
private final Set<AbstractFaviconFetcher> faviconFetchers;
public synchronized Feed findOrCreate(String url) { public synchronized Feed findOrCreate(String url) {
String normalized = FeedUtils.normalizeURL(url); String normalized = FeedUtils.normalizeURL(url);
@@ -33,4 +36,17 @@ public class FeedService {
return feed; return feed;
} }
public byte[] fetchFavicon(Feed feed) {
String url = feed.getLink() != null ? feed.getLink() : feed.getUrl();
byte[] icon = null;
for (AbstractFaviconFetcher faviconFetcher : faviconFetchers) {
icon = faviconFetcher.fetch(url);
if (icon != null) {
break;
}
}
return icon;
}
} }

View File

@@ -42,7 +42,6 @@ import com.commafeed.backend.cache.CacheService;
import com.commafeed.backend.dao.FeedCategoryDAO; import com.commafeed.backend.dao.FeedCategoryDAO;
import com.commafeed.backend.dao.FeedEntryStatusDAO; import com.commafeed.backend.dao.FeedEntryStatusDAO;
import com.commafeed.backend.dao.FeedSubscriptionDAO; import com.commafeed.backend.dao.FeedSubscriptionDAO;
import com.commafeed.backend.feed.FaviconFetcher;
import com.commafeed.backend.feed.FeedFetcher; import com.commafeed.backend.feed.FeedFetcher;
import com.commafeed.backend.feed.FeedQueues; import com.commafeed.backend.feed.FeedQueues;
import com.commafeed.backend.feed.FeedUtils; import com.commafeed.backend.feed.FeedUtils;
@@ -57,6 +56,7 @@ import com.commafeed.backend.model.UserSettings.ReadingOrder;
import com.commafeed.backend.opml.OPMLExporter; import com.commafeed.backend.opml.OPMLExporter;
import com.commafeed.backend.opml.OPMLImporter; import com.commafeed.backend.opml.OPMLImporter;
import com.commafeed.backend.service.FeedEntryService; import com.commafeed.backend.service.FeedEntryService;
import com.commafeed.backend.service.FeedService;
import com.commafeed.backend.service.FeedSubscriptionService; import com.commafeed.backend.service.FeedSubscriptionService;
import com.commafeed.frontend.auth.SecurityCheck; import com.commafeed.frontend.auth.SecurityCheck;
import com.commafeed.frontend.model.Entries; import com.commafeed.frontend.model.Entries;
@@ -95,8 +95,8 @@ public class FeedREST {
private final FeedSubscriptionDAO feedSubscriptionDAO; private final FeedSubscriptionDAO feedSubscriptionDAO;
private final FeedCategoryDAO feedCategoryDAO; private final FeedCategoryDAO feedCategoryDAO;
private final FeedEntryStatusDAO feedEntryStatusDAO; private final FeedEntryStatusDAO feedEntryStatusDAO;
private final FaviconFetcher faviconFetcher;
private final FeedFetcher feedFetcher; private final FeedFetcher feedFetcher;
private final FeedService feedService;
private final FeedEntryService feedEntryService; private final FeedEntryService feedEntryService;
private final FeedSubscriptionService feedSubscriptionService; private final FeedSubscriptionService feedSubscriptionService;
private final FeedQueues queues; private final FeedQueues queues;
@@ -322,8 +322,7 @@ public class FeedREST {
return Response.status(Status.NOT_FOUND).build(); return Response.status(Status.NOT_FOUND).build();
} }
Feed feed = subscription.getFeed(); Feed feed = subscription.getFeed();
String url = feed.getLink() != null ? feed.getLink() : feed.getUrl(); byte[] icon = feedService.fetchFavicon(feed);
byte[] icon = faviconFetcher.fetch(url);
ResponseBuilder builder = null; ResponseBuilder builder = null;
if (icon == null) { if (icon == null) {