mirror of
https://github.com/Athou/commafeed.git
synced 2026-03-21 21:37:29 +00:00
refactoring favicon fetcher
This commit is contained in:
@@ -141,7 +141,9 @@ public class HttpGetter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
long duration = System.currentTimeMillis() - start;
|
long duration = System.currentTimeMillis() - start;
|
||||||
result = new HttpResult(content, lastModifiedHeader == null ? null
|
Header contentType = entity.getContentType();
|
||||||
|
result = new HttpResult(content, contentType == null ? null
|
||||||
|
: contentType.getValue(), lastModifiedHeader == null ? null
|
||||||
: lastModifiedHeader.getValue(), eTagHeader == null ? null
|
: lastModifiedHeader.getValue(), eTagHeader == null ? null
|
||||||
: eTagHeader.getValue(), duration);
|
: eTagHeader.getValue(), duration);
|
||||||
} finally {
|
} finally {
|
||||||
@@ -153,13 +155,15 @@ public class HttpGetter {
|
|||||||
public static class HttpResult {
|
public static class HttpResult {
|
||||||
|
|
||||||
private byte[] content;
|
private byte[] content;
|
||||||
|
private String contentType;
|
||||||
private String lastModifiedSince;
|
private String lastModifiedSince;
|
||||||
private String eTag;
|
private String eTag;
|
||||||
private long duration;
|
private long duration;
|
||||||
|
|
||||||
public HttpResult(byte[] content, String lastModifiedSince,
|
public HttpResult(byte[] content, String contentType,
|
||||||
String eTag, long duration) {
|
String lastModifiedSince, String eTag, long duration) {
|
||||||
this.content = content;
|
this.content = content;
|
||||||
|
this.contentType = contentType;
|
||||||
this.lastModifiedSince = lastModifiedSince;
|
this.lastModifiedSince = lastModifiedSince;
|
||||||
this.eTag = eTag;
|
this.eTag = eTag;
|
||||||
this.duration = duration;
|
this.duration = duration;
|
||||||
@@ -169,6 +173,10 @@ public class HttpGetter {
|
|||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getContentType() {
|
||||||
|
return contentType;
|
||||||
|
}
|
||||||
|
|
||||||
public String getLastModifiedSince() {
|
public String getLastModifiedSince() {
|
||||||
return lastModifiedSince;
|
return lastModifiedSince;
|
||||||
}
|
}
|
||||||
|
|||||||
144
src/main/java/com/commafeed/backend/feeds/FaviconFetcher.java
Normal file
144
src/main/java/com/commafeed/backend/feeds/FaviconFetcher.java
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
package com.commafeed.backend.feeds;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import javax.inject.Inject;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.commafeed.backend.HttpGetter;
|
||||||
|
import com.commafeed.backend.HttpGetter.HttpResult;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inspired/Ported from https://github.com/potatolondon/getfavicon
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class FaviconFetcher {
|
||||||
|
|
||||||
|
private static Logger log = LoggerFactory.getLogger(FeedFetcher.class);
|
||||||
|
|
||||||
|
private static long MIN_ICON_LENGTH = 100;
|
||||||
|
private static long MAX_ICON_LENGTH = 20000;
|
||||||
|
|
||||||
|
protected static List<String> ICON_MIMETYPES = Arrays.asList(
|
||||||
|
"image/x-icon", "image/vnd.microsoft.icon", "image/ico",
|
||||||
|
"image/icon", "text/ico", "application/ico", "image/x-ms-bmp",
|
||||||
|
"image/x-bmp", "image/gif", "image/png", "image/jpeg");
|
||||||
|
private static List<String> ICON_MIMETYPE_BLACKLIST = Arrays.asList(
|
||||||
|
"application/xml", "text/html");
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
HttpGetter getter;
|
||||||
|
|
||||||
|
public byte[] fetch(String targetPath) {
|
||||||
|
byte[] icon = getIconAtRoot(targetPath);
|
||||||
|
|
||||||
|
if (icon == null) {
|
||||||
|
icon = getIconInPage(targetPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
return icon;
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] getIconAtRoot(String targetPath) {
|
||||||
|
byte[] bytes = null;
|
||||||
|
String contentType = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
String url = FeedUtils.removeTrailingSlash(targetPath)
|
||||||
|
+ "/favicon.ico";
|
||||||
|
log.debug("getting root icon at {}", url);
|
||||||
|
HttpResult result = getter.getBinary(url);
|
||||||
|
bytes = result.getContent();
|
||||||
|
contentType = result.getContentType();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.info("Failed to retrieve iconAtRoot: " + e.getMessage(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isValidIconResponse(bytes, contentType)) {
|
||||||
|
bytes = null;
|
||||||
|
}
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean isValidIconResponse(byte[] content, String contentType) {
|
||||||
|
long length = content.length;
|
||||||
|
|
||||||
|
if (!contentType.isEmpty()) {
|
||||||
|
contentType = contentType.split(";")[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ICON_MIMETYPE_BLACKLIST.contains(contentType)) {
|
||||||
|
log.info("Content-Type {} is blacklisted", contentType);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (length < MIN_ICON_LENGTH) {
|
||||||
|
log.info("Length {} below MIN_ICON_LENGTH {}", length,
|
||||||
|
MIN_ICON_LENGTH);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (length > MAX_ICON_LENGTH) {
|
||||||
|
log.info("Length {} greater than MAX_ICON_LENGTH {}", length,
|
||||||
|
MAX_ICON_LENGTH);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] getIconInPage(String targetPath) {
|
||||||
|
log.info("iconInPage, trying " + targetPath);
|
||||||
|
|
||||||
|
Document doc;
|
||||||
|
try {
|
||||||
|
HttpResult result = getter.getBinary(targetPath);
|
||||||
|
doc = Jsoup.parse(new String(result.getContent()), targetPath);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.info("Failed to retrieve page to find icon");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
Elements icons = doc
|
||||||
|
.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]");
|
||||||
|
|
||||||
|
if (icons.isEmpty()) {
|
||||||
|
log.info("No icon found in page");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
String href = icons.get(0).attr("abs:href");
|
||||||
|
if (StringUtils.isBlank(href)) {
|
||||||
|
log.info("No icon found in page");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Found unconfirmed iconInPage at {}", href);
|
||||||
|
|
||||||
|
byte[] bytes = null;
|
||||||
|
String contentType = null;
|
||||||
|
try {
|
||||||
|
HttpResult result = getter.getBinary(href);
|
||||||
|
bytes = result.getContent();
|
||||||
|
contentType = result.getContentType();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.info("Failed to retrieve icon found in page {}", href);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isValidIconResponse(bytes, contentType)) {
|
||||||
|
log.info("Invalid icon found for {}", href);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -34,6 +34,7 @@ import com.commafeed.backend.dao.FeedSubscriptionDAO;
|
|||||||
import com.commafeed.backend.dao.UserDAO;
|
import com.commafeed.backend.dao.UserDAO;
|
||||||
import com.commafeed.backend.dao.UserRoleDAO;
|
import com.commafeed.backend.dao.UserRoleDAO;
|
||||||
import com.commafeed.backend.dao.UserSettingsDAO;
|
import com.commafeed.backend.dao.UserSettingsDAO;
|
||||||
|
import com.commafeed.backend.feeds.FaviconFetcher;
|
||||||
import com.commafeed.backend.feeds.FeedFetcher;
|
import com.commafeed.backend.feeds.FeedFetcher;
|
||||||
import com.commafeed.backend.feeds.FeedRefreshTaskGiver;
|
import com.commafeed.backend.feeds.FeedRefreshTaskGiver;
|
||||||
import com.commafeed.backend.feeds.FeedRefreshUpdater;
|
import com.commafeed.backend.feeds.FeedRefreshUpdater;
|
||||||
@@ -120,6 +121,9 @@ public abstract class AbstractREST {
|
|||||||
@Inject
|
@Inject
|
||||||
FeedRefreshUpdater feedRefreshUpdater;
|
FeedRefreshUpdater feedRefreshUpdater;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
FaviconFetcher faviconFetcher;
|
||||||
|
|
||||||
@PostConstruct
|
@PostConstruct
|
||||||
public void init() {
|
public void init() {
|
||||||
CommaFeedApplication app = CommaFeedApplication.get();
|
CommaFeedApplication app = CommaFeedApplication.get();
|
||||||
|
|||||||
@@ -1,9 +1,7 @@
|
|||||||
package com.commafeed.frontend.rest.resources;
|
package com.commafeed.frontend.rest.resources;
|
||||||
|
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URLDecoder;
|
|
||||||
import java.util.Calendar;
|
import java.util.Calendar;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
@@ -20,6 +18,7 @@ import javax.ws.rs.Produces;
|
|||||||
import javax.ws.rs.QueryParam;
|
import javax.ws.rs.QueryParam;
|
||||||
import javax.ws.rs.WebApplicationException;
|
import javax.ws.rs.WebApplicationException;
|
||||||
import javax.ws.rs.core.CacheControl;
|
import javax.ws.rs.core.CacheControl;
|
||||||
|
import javax.ws.rs.core.HttpHeaders;
|
||||||
import javax.ws.rs.core.MediaType;
|
import javax.ws.rs.core.MediaType;
|
||||||
import javax.ws.rs.core.Response;
|
import javax.ws.rs.core.Response;
|
||||||
import javax.ws.rs.core.Response.ResponseBuilder;
|
import javax.ws.rs.core.Response.ResponseBuilder;
|
||||||
@@ -32,10 +31,12 @@ import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
|||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang.ObjectUtils;
|
import org.apache.commons.lang.ObjectUtils;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.apache.http.impl.cookie.DateUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.commafeed.backend.StartupBean;
|
import com.commafeed.backend.StartupBean;
|
||||||
|
import com.commafeed.backend.feeds.FeedUtils;
|
||||||
import com.commafeed.backend.feeds.FetchedFeed;
|
import com.commafeed.backend.feeds.FetchedFeed;
|
||||||
import com.commafeed.backend.model.FeedCategory;
|
import com.commafeed.backend.model.FeedCategory;
|
||||||
import com.commafeed.backend.model.FeedEntryStatus;
|
import com.commafeed.backend.model.FeedEntryStatus;
|
||||||
@@ -52,7 +53,6 @@ import com.commafeed.frontend.model.request.IDRequest;
|
|||||||
import com.commafeed.frontend.model.request.MarkRequest;
|
import com.commafeed.frontend.model.request.MarkRequest;
|
||||||
import com.commafeed.frontend.model.request.SubscribeRequest;
|
import com.commafeed.frontend.model.request.SubscribeRequest;
|
||||||
import com.commafeed.frontend.rest.Enums.ReadType;
|
import com.commafeed.frontend.rest.Enums.ReadType;
|
||||||
import com.commafeed.frontend.utils.FetchFavicon;
|
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.sun.syndication.feed.opml.Opml;
|
import com.sun.syndication.feed.opml.Opml;
|
||||||
@@ -252,26 +252,34 @@ public class FeedREST extends AbstractResourceREST {
|
|||||||
|
|
||||||
@GET
|
@GET
|
||||||
@Path("/favicon")
|
@Path("/favicon")
|
||||||
@ApiOperation(value = "Fetch feed icon", notes = "Fetch icon of a feed")
|
@ApiOperation(value = "Fetch a feed's icon", notes = "Fetch icon of a feed")
|
||||||
public Response favicon(@QueryParam("url") String path) {
|
public Response getFavicon(@QueryParam("url") String url) {
|
||||||
try {
|
|
||||||
path = URLDecoder.decode(path, "UTF-8");
|
byte[] icon = faviconFetcher.fetch(url);
|
||||||
} catch (UnsupportedEncodingException e) {
|
|
||||||
e.printStackTrace();
|
ResponseBuilder builder = null;
|
||||||
|
if (icon == null) {
|
||||||
|
String baseUrl = FeedUtils
|
||||||
|
.removeTrailingSlash(applicationSettingsService.get()
|
||||||
|
.getPublicUrl());
|
||||||
|
builder = Response.status(Status.MOVED_PERMANENTLY).location(
|
||||||
|
URI.create(baseUrl + "/images/default_favicon.gif"));
|
||||||
|
} else {
|
||||||
|
builder = Response.ok(icon, "image/x-icon");
|
||||||
}
|
}
|
||||||
byte[] icon = new FetchFavicon().get(path);
|
|
||||||
ResponseBuilder reponse = Response.ok(icon, "image/x-icon");
|
|
||||||
|
|
||||||
CacheControl cacheControl = new CacheControl();
|
CacheControl cacheControl = new CacheControl();
|
||||||
cacheControl.setMaxAge(2592000);
|
cacheControl.setMaxAge(2592000);
|
||||||
cacheControl.setPrivate(false);
|
cacheControl.setPrivate(false);
|
||||||
reponse.cacheControl(cacheControl); // trying to replicate "public, max-age=2592000"
|
// trying to replicate "public, max-age=2592000"
|
||||||
|
builder.cacheControl(cacheControl);
|
||||||
|
|
||||||
Calendar calendar = Calendar.getInstance();
|
Calendar calendar = Calendar.getInstance();
|
||||||
calendar.add(Calendar.MONTH, 1);
|
calendar.add(Calendar.MONTH, 1);
|
||||||
reponse.expires(calendar.getTime());
|
builder.expires(calendar.getTime());
|
||||||
|
builder.lastModified(new Date(startupBean.getStartupTime()));
|
||||||
|
|
||||||
return reponse.build();
|
return builder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
@POST
|
@POST
|
||||||
|
|||||||
@@ -1,150 +0,0 @@
|
|||||||
package com.commafeed.frontend.utils;
|
|
||||||
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
|
|
||||||
import org.jsoup.Connection.Response;
|
|
||||||
import org.jsoup.Jsoup;
|
|
||||||
import org.jsoup.nodes.Document;
|
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
|
|
||||||
//Inspired/Ported from https://github.com/potatolondon/getfavicon
|
|
||||||
public class FetchFavicon {
|
|
||||||
void inf(String message) {
|
|
||||||
//
|
|
||||||
}
|
|
||||||
|
|
||||||
static long MIN_ICON_LENGTH = 100;
|
|
||||||
static long MAX_ICON_LENGTH = 20000;
|
|
||||||
static String[] ICON_MIMETYPES = new String[] { "image/x-icon",
|
|
||||||
"image/vnd.microsoft.icon", "image/ico", "image/icon", "text/ico",
|
|
||||||
"application/ico", "image/x-ms-bmp", "image/x-bmp", "image/gif",
|
|
||||||
"image/png", "image/jpeg" };
|
|
||||||
|
|
||||||
static String[] ICON_MIMETYPE_BLACKLIST = new String[] { "application/xml",
|
|
||||||
"text/html" };
|
|
||||||
|
|
||||||
boolean in(String[] array, String value) {
|
|
||||||
for (String i : array) {
|
|
||||||
if (i.equals(value)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean isValidIconResponse(Response iconResponse) {
|
|
||||||
long iconLength = iconResponse.bodyAsBytes().length;
|
|
||||||
|
|
||||||
String iconContentType = iconResponse.header("Content-Type");
|
|
||||||
if (!iconContentType.isEmpty())
|
|
||||||
iconContentType = iconContentType.split(";")[0];
|
|
||||||
|
|
||||||
if (iconResponse.statusCode() != 200) {
|
|
||||||
inf("Status code isn't 200");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (in(ICON_MIMETYPE_BLACKLIST, iconContentType)) {
|
|
||||||
inf("Content-Type in ICON_MIMETYPE_BLACKLIST");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (iconLength < MIN_ICON_LENGTH) {
|
|
||||||
inf("Length below MIN_ICON_LENGTH");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (iconLength > MAX_ICON_LENGTH) {
|
|
||||||
inf("Length greater than MAX_ICON_LENGTH");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
byte[] iconAtRoot(String targetPath) {
|
|
||||||
Response rootIconPath;
|
|
||||||
try {
|
|
||||||
URL url = new URL(new URL(targetPath), "/favicon.ico");
|
|
||||||
inf(url.toString());
|
|
||||||
rootIconPath = Jsoup
|
|
||||||
.connect(url.toString())
|
|
||||||
.followRedirects(true)
|
|
||||||
.ignoreContentType(true).execute();
|
|
||||||
} catch (Exception e) {
|
|
||||||
inf("Failed to retrieve iconAtRoot");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isValidIconResponse(rootIconPath)) {
|
|
||||||
return rootIconPath.bodyAsBytes();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
byte[] iconInPage(String targetPath) {
|
|
||||||
inf("iconInPage, trying " + targetPath);
|
|
||||||
|
|
||||||
Document pageSoup;
|
|
||||||
try {
|
|
||||||
pageSoup = Jsoup.connect(targetPath).followRedirects(true).get();
|
|
||||||
} catch (Exception e) {
|
|
||||||
inf("Failed to retrieve page to find icon");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
Elements pageSoupIcon = pageSoup
|
|
||||||
.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]");
|
|
||||||
|
|
||||||
if (pageSoupIcon.size() == 0) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
String pageIconHref = pageSoupIcon.get(0).attr("href");
|
|
||||||
String pageIconPath;
|
|
||||||
if (pageIconHref.isEmpty()) {
|
|
||||||
inf("No icon found in page");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
pageIconPath = new URL(new URL(targetPath), pageIconHref).toString();
|
|
||||||
} catch (MalformedURLException e1) {
|
|
||||||
inf("URL concatination faild");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
inf("Found unconfirmed iconInPage at");
|
|
||||||
|
|
||||||
Response pagePathFaviconResult;
|
|
||||||
try {
|
|
||||||
pagePathFaviconResult = Jsoup.connect(pageIconPath)
|
|
||||||
.followRedirects(true).ignoreContentType(true)
|
|
||||||
.execute();
|
|
||||||
} catch (Exception e) {
|
|
||||||
inf("Failed to retrieve icon found in page");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isValidIconResponse(pagePathFaviconResult)) {
|
|
||||||
return pagePathFaviconResult.bodyAsBytes();
|
|
||||||
}
|
|
||||||
inf("Invalid icon found");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public byte[] get(String targetPath) {
|
|
||||||
byte[] icon;
|
|
||||||
|
|
||||||
icon = iconAtRoot(targetPath);
|
|
||||||
if (icon != null) {
|
|
||||||
return icon;
|
|
||||||
}
|
|
||||||
|
|
||||||
icon = iconInPage(targetPath);
|
|
||||||
if (icon != null) {
|
|
||||||
return icon;
|
|
||||||
}
|
|
||||||
|
|
||||||
return null; // or returning default feed
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user