mirror of
https://github.com/Athou/commafeed.git
synced 2026-03-21 21:37:29 +00:00
split client and server into maven modules
This commit is contained in:
@@ -0,0 +1,39 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
/**
|
||||
* A keyword used in a search query
|
||||
*/
|
||||
@Getter
|
||||
@RequiredArgsConstructor
|
||||
public class FeedEntryKeyword {
|
||||
|
||||
public enum Mode {
|
||||
INCLUDE, EXCLUDE;
|
||||
}
|
||||
|
||||
private final String keyword;
|
||||
private final Mode mode;
|
||||
|
||||
public static List<FeedEntryKeyword> fromQueryString(String keywords) {
|
||||
List<FeedEntryKeyword> list = new ArrayList<>();
|
||||
if (keywords != null) {
|
||||
for (String keyword : StringUtils.split(keywords)) {
|
||||
boolean not = false;
|
||||
if (keyword.startsWith("-") || keyword.startsWith("!")) {
|
||||
not = true;
|
||||
keyword = keyword.substring(1);
|
||||
}
|
||||
list.add(new FeedEntryKeyword(keyword, not ? Mode.EXCLUDE : Mode.INCLUDE));
|
||||
}
|
||||
}
|
||||
return list;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Date;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
|
||||
import org.apache.commons.codec.binary.StringUtils;
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
|
||||
import com.commafeed.backend.HttpGetter;
|
||||
import com.commafeed.backend.HttpGetter.HttpResult;
|
||||
import com.commafeed.backend.HttpGetter.NotModifiedException;
|
||||
import com.commafeed.backend.model.Feed;
|
||||
import com.commafeed.backend.urlprovider.FeedURLProvider;
|
||||
import com.rometools.rome.io.FeedException;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
|
||||
@Singleton
|
||||
public class FeedFetcher {
|
||||
|
||||
private final FeedParser parser;
|
||||
private final HttpGetter getter;
|
||||
private final Set<FeedURLProvider> urlProviders;
|
||||
|
||||
public FetchedFeed fetch(String feedUrl, boolean extractFeedUrlFromHtml, String lastModified, String eTag, Date lastPublishedDate,
|
||||
String lastContentHash) throws FeedException, IOException, NotModifiedException {
|
||||
log.debug("Fetching feed {}", feedUrl);
|
||||
FetchedFeed fetchedFeed = null;
|
||||
|
||||
int timeout = 20000;
|
||||
|
||||
HttpResult result = getter.getBinary(feedUrl, lastModified, eTag, timeout);
|
||||
byte[] content = result.getContent();
|
||||
|
||||
try {
|
||||
fetchedFeed = parser.parse(result.getUrlAfterRedirect(), content);
|
||||
} catch (FeedException e) {
|
||||
if (extractFeedUrlFromHtml) {
|
||||
String extractedUrl = extractFeedUrl(urlProviders, feedUrl, StringUtils.newStringUtf8(result.getContent()));
|
||||
if (org.apache.commons.lang3.StringUtils.isNotBlank(extractedUrl)) {
|
||||
feedUrl = extractedUrl;
|
||||
|
||||
result = getter.getBinary(extractedUrl, lastModified, eTag, timeout);
|
||||
content = result.getContent();
|
||||
fetchedFeed = parser.parse(result.getUrlAfterRedirect(), content);
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
if (content == null) {
|
||||
throw new IOException("Feed content is empty.");
|
||||
}
|
||||
|
||||
String hash = DigestUtils.sha1Hex(content);
|
||||
if (lastContentHash != null && hash != null && lastContentHash.equals(hash)) {
|
||||
log.debug("content hash not modified: {}", feedUrl);
|
||||
throw new NotModifiedException("content hash not modified");
|
||||
}
|
||||
|
||||
if (lastPublishedDate != null && fetchedFeed.getFeed().getLastPublishedDate() != null
|
||||
&& lastPublishedDate.getTime() == fetchedFeed.getFeed().getLastPublishedDate().getTime()) {
|
||||
log.debug("publishedDate not modified: {}", feedUrl);
|
||||
throw new NotModifiedException("publishedDate not modified");
|
||||
}
|
||||
|
||||
Feed feed = fetchedFeed.getFeed();
|
||||
feed.setLastModifiedHeader(result.getLastModifiedSince());
|
||||
feed.setEtagHeader(FeedUtils.truncate(result.getETag(), 255));
|
||||
feed.setLastContentHash(hash);
|
||||
fetchedFeed.setFetchDuration(result.getDuration());
|
||||
fetchedFeed.setUrlAfterRedirect(result.getUrlAfterRedirect());
|
||||
return fetchedFeed;
|
||||
}
|
||||
|
||||
private static String extractFeedUrl(Set<FeedURLProvider> urlProviders, String url, String urlContent) {
|
||||
for (FeedURLProvider urlProvider : urlProviders) {
|
||||
String feedUrl = urlProvider.get(url, urlContent);
|
||||
if (feedUrl != null) {
|
||||
return feedUrl;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,276 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.nio.charset.Charset;
|
||||
import java.text.DateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
|
||||
import org.apache.commons.lang3.ArrayUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.jdom2.Element;
|
||||
import org.jdom2.Namespace;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
import com.commafeed.backend.model.Feed;
|
||||
import com.commafeed.backend.model.FeedEntry;
|
||||
import com.commafeed.backend.model.FeedEntryContent;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.rometools.modules.mediarss.MediaEntryModule;
|
||||
import com.rometools.modules.mediarss.MediaModule;
|
||||
import com.rometools.modules.mediarss.types.MediaGroup;
|
||||
import com.rometools.modules.mediarss.types.Metadata;
|
||||
import com.rometools.modules.mediarss.types.Thumbnail;
|
||||
import com.rometools.rome.feed.synd.SyndCategory;
|
||||
import com.rometools.rome.feed.synd.SyndContent;
|
||||
import com.rometools.rome.feed.synd.SyndEnclosure;
|
||||
import com.rometools.rome.feed.synd.SyndEntry;
|
||||
import com.rometools.rome.feed.synd.SyndFeed;
|
||||
import com.rometools.rome.feed.synd.SyndLink;
|
||||
import com.rometools.rome.feed.synd.SyndLinkImpl;
|
||||
import com.rometools.rome.io.FeedException;
|
||||
import com.rometools.rome.io.SyndFeedInput;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
|
||||
@Singleton
|
||||
public class FeedParser {
|
||||
|
||||
private static final String ATOM_10_URI = "http://www.w3.org/2005/Atom";
|
||||
private static final Namespace ATOM_10_NS = Namespace.getNamespace(ATOM_10_URI);
|
||||
|
||||
private static final Date START = new Date(86400000);
|
||||
private static final Date END = new Date(1000L * Integer.MAX_VALUE - 86400000);
|
||||
|
||||
public FetchedFeed parse(String feedUrl, byte[] xml) throws FeedException {
|
||||
FetchedFeed fetchedFeed = new FetchedFeed();
|
||||
Feed feed = fetchedFeed.getFeed();
|
||||
List<FeedEntry> entries = fetchedFeed.getEntries();
|
||||
|
||||
try {
|
||||
Charset encoding = FeedUtils.guessEncoding(xml);
|
||||
String xmlString = FeedUtils.trimInvalidXmlCharacters(new String(xml, encoding));
|
||||
if (xmlString == null) {
|
||||
throw new FeedException("Input string is null for url " + feedUrl);
|
||||
}
|
||||
xmlString = FeedUtils.replaceHtmlEntitiesWithNumericEntities(xmlString);
|
||||
InputSource source = new InputSource(new StringReader(xmlString));
|
||||
SyndFeed rss = new SyndFeedInput().build(source);
|
||||
handleForeignMarkup(rss);
|
||||
|
||||
fetchedFeed.setTitle(rss.getTitle());
|
||||
feed.setPushHub(findHub(rss));
|
||||
feed.setPushTopic(findSelf(rss));
|
||||
feed.setUrl(feedUrl);
|
||||
feed.setLink(rss.getLink());
|
||||
List<SyndEntry> items = rss.getEntries();
|
||||
|
||||
for (SyndEntry item : items) {
|
||||
FeedEntry entry = new FeedEntry();
|
||||
|
||||
String guid = item.getUri();
|
||||
if (StringUtils.isBlank(guid)) {
|
||||
guid = item.getLink();
|
||||
}
|
||||
if (StringUtils.isBlank(guid)) {
|
||||
// no guid and no link, skip entry
|
||||
continue;
|
||||
}
|
||||
entry.setGuid(FeedUtils.truncate(guid, 2048));
|
||||
entry.setUpdated(validateDate(getEntryUpdateDate(item), true));
|
||||
entry.setUrl(FeedUtils.truncate(FeedUtils.toAbsoluteUrl(item.getLink(), feed.getLink(), feedUrl), 2048));
|
||||
|
||||
// if link is empty but guid is used as url
|
||||
if (StringUtils.isBlank(entry.getUrl()) && StringUtils.startsWith(entry.getGuid(), "http")) {
|
||||
entry.setUrl(entry.getGuid());
|
||||
}
|
||||
|
||||
FeedEntryContent content = new FeedEntryContent();
|
||||
content.setContent(getContent(item));
|
||||
content.setCategories(FeedUtils
|
||||
.truncate(item.getCategories().stream().map(SyndCategory::getName).collect(Collectors.joining(", ")), 4096));
|
||||
content.setTitle(getTitle(item));
|
||||
content.setAuthor(StringUtils.trimToNull(item.getAuthor()));
|
||||
|
||||
SyndEnclosure enclosure = Iterables.getFirst(item.getEnclosures(), null);
|
||||
if (enclosure != null) {
|
||||
content.setEnclosureUrl(FeedUtils.truncate(enclosure.getUrl(), 2048));
|
||||
content.setEnclosureType(enclosure.getType());
|
||||
}
|
||||
|
||||
MediaEntryModule module = (MediaEntryModule) item.getModule(MediaModule.URI);
|
||||
if (module != null) {
|
||||
Media media = getMedia(module);
|
||||
if (media != null) {
|
||||
content.setMediaDescription(media.getDescription());
|
||||
content.setMediaThumbnailUrl(FeedUtils.truncate(media.getThumbnailUrl(), 2048));
|
||||
content.setMediaThumbnailWidth(media.getThumbnailWidth());
|
||||
content.setMediaThumbnailHeight(media.getThumbnailHeight());
|
||||
}
|
||||
}
|
||||
|
||||
entry.setContent(content);
|
||||
|
||||
entries.add(entry);
|
||||
}
|
||||
Date lastEntryDate = null;
|
||||
Date publishedDate = validateDate(rss.getPublishedDate(), false);
|
||||
if (!entries.isEmpty()) {
|
||||
List<Long> sortedTimestamps = FeedUtils.getSortedTimestamps(entries);
|
||||
Long timestamp = sortedTimestamps.get(0);
|
||||
lastEntryDate = new Date(timestamp);
|
||||
publishedDate = (publishedDate == null || publishedDate.before(lastEntryDate)) ? lastEntryDate : publishedDate;
|
||||
}
|
||||
feed.setLastPublishedDate(publishedDate);
|
||||
feed.setAverageEntryInterval(FeedUtils.averageTimeBetweenEntries(entries));
|
||||
feed.setLastEntryDate(lastEntryDate);
|
||||
|
||||
} catch (Exception e) {
|
||||
throw new FeedException(String.format("Could not parse feed from %s : %s", feedUrl, e.getMessage()), e);
|
||||
}
|
||||
return fetchedFeed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds atom links for rss feeds
|
||||
*/
|
||||
private void handleForeignMarkup(SyndFeed feed) {
|
||||
List<Element> foreignMarkup = feed.getForeignMarkup();
|
||||
if (foreignMarkup == null) {
|
||||
return;
|
||||
}
|
||||
for (Element element : foreignMarkup) {
|
||||
if ("link".equals(element.getName()) && ATOM_10_NS.equals(element.getNamespace())) {
|
||||
SyndLink link = new SyndLinkImpl();
|
||||
link.setRel(element.getAttributeValue("rel"));
|
||||
link.setHref(element.getAttributeValue("href"));
|
||||
feed.getLinks().add(link);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Date getEntryUpdateDate(SyndEntry item) {
|
||||
Date date = item.getUpdatedDate();
|
||||
if (date == null) {
|
||||
date = item.getPublishedDate();
|
||||
}
|
||||
if (date == null) {
|
||||
date = new Date();
|
||||
}
|
||||
return date;
|
||||
}
|
||||
|
||||
private Date validateDate(Date date, boolean nullToNow) {
|
||||
Date now = new Date();
|
||||
if (date == null) {
|
||||
return nullToNow ? now : null;
|
||||
}
|
||||
if (date.before(START) || date.after(END)) {
|
||||
return now;
|
||||
}
|
||||
|
||||
if (date.after(now)) {
|
||||
return now;
|
||||
}
|
||||
return date;
|
||||
}
|
||||
|
||||
private String getContent(SyndEntry item) {
|
||||
String content = null;
|
||||
if (item.getContents().isEmpty()) {
|
||||
content = item.getDescription() == null ? null : item.getDescription().getValue();
|
||||
} else {
|
||||
content = item.getContents().stream().map(SyndContent::getValue).collect(Collectors.joining(System.lineSeparator()));
|
||||
}
|
||||
return StringUtils.trimToNull(content);
|
||||
}
|
||||
|
||||
private String getTitle(SyndEntry item) {
|
||||
String title = item.getTitle();
|
||||
if (StringUtils.isBlank(title)) {
|
||||
Date date = item.getPublishedDate();
|
||||
if (date != null) {
|
||||
title = DateFormat.getInstance().format(date);
|
||||
} else {
|
||||
title = "(no title)";
|
||||
}
|
||||
}
|
||||
return StringUtils.trimToNull(title);
|
||||
}
|
||||
|
||||
private Media getMedia(MediaEntryModule module) {
|
||||
Media media = getMedia(module.getMetadata());
|
||||
if (media == null && ArrayUtils.isNotEmpty(module.getMediaGroups())) {
|
||||
MediaGroup group = module.getMediaGroups()[0];
|
||||
media = getMedia(group.getMetadata());
|
||||
}
|
||||
|
||||
return media;
|
||||
}
|
||||
|
||||
private Media getMedia(Metadata metadata) {
|
||||
if (metadata == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Media media = new Media();
|
||||
media.setDescription(metadata.getDescription());
|
||||
|
||||
if (ArrayUtils.isNotEmpty(metadata.getThumbnail())) {
|
||||
Thumbnail thumbnail = metadata.getThumbnail()[0];
|
||||
media.setThumbnailWidth(thumbnail.getWidth());
|
||||
media.setThumbnailHeight(thumbnail.getHeight());
|
||||
|
||||
if (thumbnail.getUrl() != null) {
|
||||
media.setThumbnailUrl(thumbnail.getUrl().toString());
|
||||
}
|
||||
}
|
||||
|
||||
if (media.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return media;
|
||||
}
|
||||
|
||||
private String findHub(SyndFeed feed) {
|
||||
for (SyndLink l : feed.getLinks()) {
|
||||
if ("hub".equalsIgnoreCase(l.getRel())) {
|
||||
log.debug("found hub {} for feed {}", l.getHref(), feed.getLink());
|
||||
return l.getHref();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private String findSelf(SyndFeed feed) {
|
||||
for (SyndLink l : feed.getLinks()) {
|
||||
if ("self".equalsIgnoreCase(l.getRel())) {
|
||||
log.debug("found self {} for feed {}", l.getHref(), feed.getLink());
|
||||
return l.getHref();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Data
|
||||
private static class Media {
|
||||
private String description;
|
||||
private String thumbnailUrl;
|
||||
private Integer thumbnailWidth;
|
||||
private Integer thumbnailHeight;
|
||||
|
||||
public boolean isEmpty() {
|
||||
return description == null && thumbnailUrl == null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,160 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.commons.lang3.time.DateUtils;
|
||||
import org.hibernate.SessionFactory;
|
||||
|
||||
import com.codahale.metrics.Gauge;
|
||||
import com.codahale.metrics.Meter;
|
||||
import com.codahale.metrics.MetricRegistry;
|
||||
import com.commafeed.CommaFeedConfiguration;
|
||||
import com.commafeed.backend.dao.FeedDAO;
|
||||
import com.commafeed.backend.dao.UnitOfWork;
|
||||
import com.commafeed.backend.model.Feed;
|
||||
|
||||
@Singleton
|
||||
public class FeedQueues {
|
||||
|
||||
private SessionFactory sessionFactory;
|
||||
private final FeedDAO feedDAO;
|
||||
private final CommaFeedConfiguration config;
|
||||
|
||||
private Queue<FeedRefreshContext> addQueue = new ConcurrentLinkedQueue<>();
|
||||
private Queue<FeedRefreshContext> takeQueue = new ConcurrentLinkedQueue<>();
|
||||
private Queue<Feed> giveBackQueue = new ConcurrentLinkedQueue<>();
|
||||
|
||||
private Meter refill;
|
||||
|
||||
@Inject
|
||||
public FeedQueues(SessionFactory sessionFactory, FeedDAO feedDAO, CommaFeedConfiguration config, MetricRegistry metrics) {
|
||||
this.sessionFactory = sessionFactory;
|
||||
this.config = config;
|
||||
this.feedDAO = feedDAO;
|
||||
|
||||
refill = metrics.meter(MetricRegistry.name(getClass(), "refill"));
|
||||
metrics.register(MetricRegistry.name(getClass(), "addQueue"), new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return addQueue.size();
|
||||
}
|
||||
});
|
||||
metrics.register(MetricRegistry.name(getClass(), "takeQueue"), new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return takeQueue.size();
|
||||
}
|
||||
});
|
||||
metrics.register(MetricRegistry.name(getClass(), "giveBackQueue"), new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return giveBackQueue.size();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* take a feed from the refresh queue
|
||||
*/
|
||||
public synchronized FeedRefreshContext take() {
|
||||
FeedRefreshContext context = takeQueue.poll();
|
||||
|
||||
if (context == null) {
|
||||
refill();
|
||||
context = takeQueue.poll();
|
||||
}
|
||||
return context;
|
||||
}
|
||||
|
||||
/**
|
||||
* add a feed to the refresh queue
|
||||
*/
|
||||
public void add(Feed feed, boolean urgent) {
|
||||
int refreshInterval = config.getApplicationSettings().getRefreshIntervalMinutes();
|
||||
if (feed.getLastUpdated() == null || feed.getLastUpdated().before(DateUtils.addMinutes(new Date(), -1 * refreshInterval))) {
|
||||
boolean alreadyQueued = addQueue.stream().anyMatch(c -> c.getFeed().getId().equals(feed.getId()));
|
||||
if (!alreadyQueued) {
|
||||
addQueue.add(new FeedRefreshContext(feed, urgent));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* refills the refresh queue and empties the giveBack queue while at it
|
||||
*/
|
||||
private void refill() {
|
||||
refill.mark();
|
||||
|
||||
List<FeedRefreshContext> contexts = new ArrayList<>();
|
||||
int batchSize = Math.min(100, 3 * config.getApplicationSettings().getBackgroundThreads());
|
||||
|
||||
// add feeds we got from the add() method
|
||||
int addQueueSize = addQueue.size();
|
||||
for (int i = 0; i < Math.min(batchSize, addQueueSize); i++) {
|
||||
contexts.add(addQueue.poll());
|
||||
}
|
||||
|
||||
// add feeds that are up to refresh from the database
|
||||
int count = batchSize - contexts.size();
|
||||
if (count > 0) {
|
||||
List<Feed> feeds = UnitOfWork.call(sessionFactory, () -> feedDAO.findNextUpdatable(count, getLastLoginThreshold()));
|
||||
for (Feed feed : feeds) {
|
||||
contexts.add(new FeedRefreshContext(feed, false));
|
||||
}
|
||||
}
|
||||
|
||||
// set the disabledDate as we use it in feedDAO to decide what to refresh next. We also use a map to remove
|
||||
// duplicates.
|
||||
Map<Long, FeedRefreshContext> map = new LinkedHashMap<>();
|
||||
for (FeedRefreshContext context : contexts) {
|
||||
Feed feed = context.getFeed();
|
||||
feed.setDisabledUntil(DateUtils.addMinutes(new Date(), config.getApplicationSettings().getRefreshIntervalMinutes()));
|
||||
map.put(feed.getId(), context);
|
||||
}
|
||||
|
||||
// refill the queue
|
||||
takeQueue.addAll(map.values());
|
||||
|
||||
// add feeds from the giveBack queue to the map, overriding duplicates
|
||||
int giveBackQueueSize = giveBackQueue.size();
|
||||
for (int i = 0; i < giveBackQueueSize; i++) {
|
||||
Feed feed = giveBackQueue.poll();
|
||||
map.put(feed.getId(), new FeedRefreshContext(feed, false));
|
||||
}
|
||||
|
||||
// update all feeds in the database
|
||||
List<Feed> feeds = map.values().stream().map(c -> c.getFeed()).collect(Collectors.toList());
|
||||
UnitOfWork.run(sessionFactory, () -> feedDAO.saveOrUpdate(feeds));
|
||||
}
|
||||
|
||||
/**
|
||||
* give a feed back, updating it to the database during the next refill()
|
||||
*/
|
||||
public void giveBack(Feed feed) {
|
||||
String normalized = FeedUtils.normalizeURL(feed.getUrl());
|
||||
feed.setNormalizedUrl(normalized);
|
||||
feed.setNormalizedUrlHash(DigestUtils.sha1Hex(normalized));
|
||||
feed.setLastUpdated(new Date());
|
||||
giveBackQueue.add(feed);
|
||||
}
|
||||
|
||||
private Date getLastLoginThreshold() {
|
||||
if (config.getApplicationSettings().getHeavyLoad()) {
|
||||
return DateUtils.addDays(new Date(), -30);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.commafeed.backend.model.Feed;
|
||||
import com.commafeed.backend.model.FeedEntry;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class FeedRefreshContext {
|
||||
private Feed feed;
|
||||
private List<FeedEntry> entries;
|
||||
private boolean urgent;
|
||||
|
||||
public FeedRefreshContext(Feed feed, boolean isUrgent) {
|
||||
this.feed = feed;
|
||||
this.urgent = isUrgent;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.util.concurrent.LinkedBlockingDeque;
|
||||
import java.util.concurrent.RejectedExecutionHandler;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.codahale.metrics.Gauge;
|
||||
import com.codahale.metrics.MetricRegistry;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* Wraps a {@link ThreadPoolExecutor} instance. Blocks when queue is full instead of rejecting the task. Allow priority queueing by using
|
||||
* {@link Task} instead of {@link Runnable}
|
||||
*
|
||||
*/
|
||||
@Slf4j
|
||||
public class FeedRefreshExecutor {
|
||||
|
||||
private String poolName;
|
||||
private ThreadPoolExecutor pool;
|
||||
private LinkedBlockingDeque<Runnable> queue;
|
||||
|
||||
public FeedRefreshExecutor(final String poolName, int threads, int queueCapacity, MetricRegistry metrics) {
|
||||
log.info("Creating pool {} with {} threads", poolName, threads);
|
||||
this.poolName = poolName;
|
||||
pool = new ThreadPoolExecutor(threads, threads, 0, TimeUnit.MILLISECONDS, queue = new LinkedBlockingDeque<Runnable>(queueCapacity) {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
@Override
|
||||
public boolean offer(Runnable r) {
|
||||
Task task = (Task) r;
|
||||
if (task.isUrgent()) {
|
||||
return offerFirst(r);
|
||||
} else {
|
||||
return offerLast(r);
|
||||
}
|
||||
}
|
||||
}) {
|
||||
@Override
|
||||
protected void afterExecute(Runnable r, Throwable t) {
|
||||
if (t != null) {
|
||||
log.error("thread from pool {} threw a runtime exception", poolName, t);
|
||||
}
|
||||
}
|
||||
};
|
||||
pool.setRejectedExecutionHandler(new RejectedExecutionHandler() {
|
||||
@Override
|
||||
public void rejectedExecution(Runnable r, ThreadPoolExecutor e) {
|
||||
log.debug("{} thread queue full, waiting...", poolName);
|
||||
try {
|
||||
Task task = (Task) r;
|
||||
if (task.isUrgent()) {
|
||||
queue.putFirst(r);
|
||||
} else {
|
||||
queue.put(r);
|
||||
}
|
||||
} catch (InterruptedException e1) {
|
||||
log.error(poolName + " interrupted while waiting for queue.", e1);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
metrics.register(MetricRegistry.name(getClass(), poolName, "active"), new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return pool.getActiveCount();
|
||||
}
|
||||
});
|
||||
|
||||
metrics.register(MetricRegistry.name(getClass(), poolName, "pending"), new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return queue.size();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public void execute(Task task) {
|
||||
pool.execute(task);
|
||||
}
|
||||
|
||||
public void shutdown() {
|
||||
pool.shutdownNow();
|
||||
while (!pool.isTerminated()) {
|
||||
try {
|
||||
Thread.sleep(100);
|
||||
} catch (InterruptedException e) {
|
||||
log.error("{} interrupted while waiting for threads to finish.", poolName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public interface Task extends Runnable {
|
||||
boolean isUrgent();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
|
||||
import com.codahale.metrics.Meter;
|
||||
import com.codahale.metrics.MetricRegistry;
|
||||
import com.commafeed.CommaFeedConfiguration;
|
||||
import com.commafeed.backend.dao.FeedDAO;
|
||||
|
||||
import io.dropwizard.lifecycle.Managed;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* Infinite loop fetching feeds from @FeedQueues and queuing them to the {@link FeedRefreshWorker} pool.
|
||||
*
|
||||
*/
|
||||
@Slf4j
|
||||
@Singleton
|
||||
public class FeedRefreshTaskGiver implements Managed {
|
||||
|
||||
private final FeedQueues queues;
|
||||
private final FeedRefreshWorker worker;
|
||||
|
||||
private final ExecutorService executor;
|
||||
|
||||
private final Meter feedRefreshed;
|
||||
private final Meter threadWaited;
|
||||
|
||||
@Inject
|
||||
public FeedRefreshTaskGiver(FeedQueues queues, FeedDAO feedDAO, FeedRefreshWorker worker, CommaFeedConfiguration config,
|
||||
MetricRegistry metrics) {
|
||||
this.queues = queues;
|
||||
this.worker = worker;
|
||||
|
||||
executor = Executors.newFixedThreadPool(1);
|
||||
feedRefreshed = metrics.meter(MetricRegistry.name(getClass(), "feedRefreshed"));
|
||||
threadWaited = metrics.meter(MetricRegistry.name(getClass(), "threadWaited"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stop() {
|
||||
log.info("shutting down feed refresh task giver");
|
||||
executor.shutdownNow();
|
||||
while (!executor.isTerminated()) {
|
||||
try {
|
||||
Thread.sleep(100);
|
||||
} catch (InterruptedException e) {
|
||||
log.error("interrupted while waiting for threads to finish.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start() {
|
||||
log.info("starting feed refresh task giver");
|
||||
executor.execute(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
while (!executor.isShutdown()) {
|
||||
try {
|
||||
FeedRefreshContext context = queues.take();
|
||||
if (context != null) {
|
||||
feedRefreshed.mark();
|
||||
worker.updateFeed(context);
|
||||
} else {
|
||||
log.debug("nothing to do, sleeping for 15s");
|
||||
threadWaited.mark();
|
||||
try {
|
||||
Thread.sleep(15000);
|
||||
} catch (InterruptedException e) {
|
||||
log.debug("interrupted while sleeping");
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,226 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.time.DateUtils;
|
||||
import org.hibernate.SessionFactory;
|
||||
|
||||
import com.codahale.metrics.Meter;
|
||||
import com.codahale.metrics.MetricRegistry;
|
||||
import com.commafeed.CommaFeedConfiguration;
|
||||
import com.commafeed.CommaFeedConfiguration.ApplicationSettings;
|
||||
import com.commafeed.backend.cache.CacheService;
|
||||
import com.commafeed.backend.dao.FeedSubscriptionDAO;
|
||||
import com.commafeed.backend.dao.UnitOfWork;
|
||||
import com.commafeed.backend.feed.FeedRefreshExecutor.Task;
|
||||
import com.commafeed.backend.model.Feed;
|
||||
import com.commafeed.backend.model.FeedEntry;
|
||||
import com.commafeed.backend.model.FeedEntryContent;
|
||||
import com.commafeed.backend.model.FeedSubscription;
|
||||
import com.commafeed.backend.model.User;
|
||||
import com.commafeed.backend.service.FeedUpdateService;
|
||||
import com.commafeed.backend.service.PubSubService;
|
||||
import com.google.common.util.concurrent.Striped;
|
||||
|
||||
import io.dropwizard.lifecycle.Managed;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Singleton
|
||||
public class FeedRefreshUpdater implements Managed {
|
||||
|
||||
private final SessionFactory sessionFactory;
|
||||
private final FeedUpdateService feedUpdateService;
|
||||
private final PubSubService pubSubService;
|
||||
private final FeedQueues queues;
|
||||
private final CommaFeedConfiguration config;
|
||||
private final FeedSubscriptionDAO feedSubscriptionDAO;
|
||||
private final CacheService cache;
|
||||
|
||||
private final FeedRefreshExecutor pool;
|
||||
private final Striped<Lock> locks;
|
||||
|
||||
private final Meter entryCacheMiss;
|
||||
private final Meter entryCacheHit;
|
||||
private final Meter feedUpdated;
|
||||
private final Meter entryInserted;
|
||||
|
||||
@Inject
|
||||
public FeedRefreshUpdater(SessionFactory sessionFactory, FeedUpdateService feedUpdateService, PubSubService pubSubService,
|
||||
FeedQueues queues, CommaFeedConfiguration config, MetricRegistry metrics, FeedSubscriptionDAO feedSubscriptionDAO,
|
||||
CacheService cache) {
|
||||
this.sessionFactory = sessionFactory;
|
||||
this.feedUpdateService = feedUpdateService;
|
||||
this.pubSubService = pubSubService;
|
||||
this.queues = queues;
|
||||
this.config = config;
|
||||
this.feedSubscriptionDAO = feedSubscriptionDAO;
|
||||
this.cache = cache;
|
||||
|
||||
ApplicationSettings settings = config.getApplicationSettings();
|
||||
int threads = Math.max(settings.getDatabaseUpdateThreads(), 1);
|
||||
pool = new FeedRefreshExecutor("feed-refresh-updater", threads, Math.min(50 * threads, 1000), metrics);
|
||||
locks = Striped.lazyWeakLock(threads * 100000);
|
||||
|
||||
entryCacheMiss = metrics.meter(MetricRegistry.name(getClass(), "entryCacheMiss"));
|
||||
entryCacheHit = metrics.meter(MetricRegistry.name(getClass(), "entryCacheHit"));
|
||||
feedUpdated = metrics.meter(MetricRegistry.name(getClass(), "feedUpdated"));
|
||||
entryInserted = metrics.meter(MetricRegistry.name(getClass(), "entryInserted"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start() throws Exception {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stop() throws Exception {
|
||||
log.info("shutting down feed refresh updater");
|
||||
pool.shutdown();
|
||||
}
|
||||
|
||||
public void updateFeed(FeedRefreshContext context) {
|
||||
pool.execute(new EntryTask(context));
|
||||
}
|
||||
|
||||
private boolean addEntry(final Feed feed, final FeedEntry entry, final List<FeedSubscription> subscriptions) {
|
||||
boolean success = false;
|
||||
|
||||
// lock on feed, make sure we are not updating the same feed twice at
|
||||
// the same time
|
||||
String key1 = StringUtils.trimToEmpty("" + feed.getId());
|
||||
|
||||
// lock on content, make sure we are not updating the same entry
|
||||
// twice at the same time
|
||||
FeedEntryContent content = entry.getContent();
|
||||
String key2 = DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.getContent() + content.getTitle()));
|
||||
|
||||
Iterator<Lock> iterator = locks.bulkGet(Arrays.asList(key1, key2)).iterator();
|
||||
Lock lock1 = iterator.next();
|
||||
Lock lock2 = iterator.next();
|
||||
boolean locked1 = false;
|
||||
boolean locked2 = false;
|
||||
try {
|
||||
locked1 = lock1.tryLock(1, TimeUnit.MINUTES);
|
||||
locked2 = lock2.tryLock(1, TimeUnit.MINUTES);
|
||||
if (locked1 && locked2) {
|
||||
boolean inserted = UnitOfWork.call(sessionFactory, () -> feedUpdateService.addEntry(feed, entry, subscriptions));
|
||||
if (inserted) {
|
||||
entryInserted.mark();
|
||||
}
|
||||
success = true;
|
||||
} else {
|
||||
log.error("lock timeout for " + feed.getUrl() + " - " + key1);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
log.error("interrupted while waiting for lock for " + feed.getUrl() + " : " + e.getMessage(), e);
|
||||
} finally {
|
||||
if (locked1) {
|
||||
lock1.unlock();
|
||||
}
|
||||
if (locked2) {
|
||||
lock2.unlock();
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
private void handlePubSub(final Feed feed) {
|
||||
if (feed.getPushHub() != null && feed.getPushTopic() != null) {
|
||||
Date lastPing = feed.getPushLastPing();
|
||||
Date now = new Date();
|
||||
if (lastPing == null || lastPing.before(DateUtils.addDays(now, -3))) {
|
||||
new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
// make sure the feed has been updated in the database so that the
|
||||
// callback works
|
||||
Thread.sleep(30000);
|
||||
} catch (InterruptedException e1) {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
pubSubService.subscribe(feed);
|
||||
}
|
||||
}.start();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class EntryTask implements Task {
|
||||
|
||||
private final FeedRefreshContext context;
|
||||
|
||||
public EntryTask(FeedRefreshContext context) {
|
||||
this.context = context;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
boolean ok = true;
|
||||
final Feed feed = context.getFeed();
|
||||
List<FeedEntry> entries = context.getEntries();
|
||||
if (entries.isEmpty()) {
|
||||
feed.setMessage("Feed has no entries");
|
||||
} else {
|
||||
List<String> lastEntries = cache.getLastEntries(feed);
|
||||
List<String> currentEntries = new ArrayList<>();
|
||||
|
||||
List<FeedSubscription> subscriptions = null;
|
||||
for (FeedEntry entry : entries) {
|
||||
String cacheKey = cache.buildUniqueEntryKey(feed, entry);
|
||||
if (!lastEntries.contains(cacheKey)) {
|
||||
log.debug("cache miss for {}", entry.getUrl());
|
||||
if (subscriptions == null) {
|
||||
subscriptions = UnitOfWork.call(sessionFactory, () -> feedSubscriptionDAO.findByFeed(feed));
|
||||
}
|
||||
ok &= addEntry(feed, entry, subscriptions);
|
||||
entryCacheMiss.mark();
|
||||
} else {
|
||||
log.debug("cache hit for {}", entry.getUrl());
|
||||
entryCacheHit.mark();
|
||||
}
|
||||
|
||||
currentEntries.add(cacheKey);
|
||||
}
|
||||
cache.setLastEntries(feed, currentEntries);
|
||||
|
||||
if (subscriptions == null) {
|
||||
feed.setMessage("No new entries found");
|
||||
} else if (!subscriptions.isEmpty()) {
|
||||
List<User> users = subscriptions.stream().map(FeedSubscription::getUser).collect(Collectors.toList());
|
||||
cache.invalidateUnreadCount(subscriptions.toArray(new FeedSubscription[0]));
|
||||
cache.invalidateUserRootCategory(users.toArray(new User[0]));
|
||||
}
|
||||
}
|
||||
|
||||
if (config.getApplicationSettings().getPubsubhubbub()) {
|
||||
handlePubSub(feed);
|
||||
}
|
||||
if (!ok) {
|
||||
// requeue asap
|
||||
feed.setDisabledUntil(new Date(0));
|
||||
}
|
||||
feedUpdated.mark();
|
||||
queues.giveBack(feed);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isUrgent() {
|
||||
return context.isUrgent();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,167 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.time.DateUtils;
|
||||
|
||||
import com.codahale.metrics.MetricRegistry;
|
||||
import com.commafeed.CommaFeedConfiguration;
|
||||
import com.commafeed.backend.HttpGetter.NotModifiedException;
|
||||
import com.commafeed.backend.feed.FeedRefreshExecutor.Task;
|
||||
import com.commafeed.backend.model.Feed;
|
||||
import com.commafeed.backend.model.FeedEntry;
|
||||
|
||||
import io.dropwizard.lifecycle.Managed;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* Calls {@link FeedFetcher} and handles its outcome
|
||||
*
|
||||
*/
|
||||
@Slf4j
|
||||
@Singleton
|
||||
public class FeedRefreshWorker implements Managed {
|
||||
|
||||
private final FeedRefreshUpdater feedRefreshUpdater;
|
||||
private final FeedFetcher fetcher;
|
||||
private final FeedQueues queues;
|
||||
private final CommaFeedConfiguration config;
|
||||
private final FeedRefreshExecutor pool;
|
||||
|
||||
@Inject
|
||||
public FeedRefreshWorker(FeedRefreshUpdater feedRefreshUpdater, FeedFetcher fetcher, FeedQueues queues, CommaFeedConfiguration config,
|
||||
MetricRegistry metrics) {
|
||||
this.feedRefreshUpdater = feedRefreshUpdater;
|
||||
this.fetcher = fetcher;
|
||||
this.config = config;
|
||||
this.queues = queues;
|
||||
int threads = config.getApplicationSettings().getBackgroundThreads();
|
||||
pool = new FeedRefreshExecutor("feed-refresh-worker", threads, Math.min(20 * threads, 1000), metrics);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start() throws Exception {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stop() throws Exception {
|
||||
pool.shutdown();
|
||||
}
|
||||
|
||||
public void updateFeed(FeedRefreshContext context) {
|
||||
pool.execute(new FeedTask(context));
|
||||
}
|
||||
|
||||
private void update(FeedRefreshContext context) {
|
||||
Feed feed = context.getFeed();
|
||||
int refreshInterval = config.getApplicationSettings().getRefreshIntervalMinutes();
|
||||
Date disabledUntil = DateUtils.addMinutes(new Date(), refreshInterval);
|
||||
try {
|
||||
String url = Optional.ofNullable(feed.getUrlAfterRedirect()).orElse(feed.getUrl());
|
||||
FetchedFeed fetchedFeed = fetcher.fetch(url, false, feed.getLastModifiedHeader(), feed.getEtagHeader(),
|
||||
feed.getLastPublishedDate(), feed.getLastContentHash());
|
||||
// stops here if NotModifiedException or any other exception is thrown
|
||||
List<FeedEntry> entries = fetchedFeed.getEntries();
|
||||
|
||||
Integer maxFeedCapacity = config.getApplicationSettings().getMaxFeedCapacity();
|
||||
if (maxFeedCapacity > 0) {
|
||||
entries = entries.stream().limit(maxFeedCapacity).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
if (config.getApplicationSettings().getHeavyLoad()) {
|
||||
disabledUntil = FeedUtils.buildDisabledUntil(fetchedFeed.getFeed().getLastEntryDate(),
|
||||
fetchedFeed.getFeed().getAverageEntryInterval(), disabledUntil);
|
||||
}
|
||||
String urlAfterRedirect = fetchedFeed.getUrlAfterRedirect();
|
||||
if (StringUtils.equals(url, urlAfterRedirect)) {
|
||||
urlAfterRedirect = null;
|
||||
}
|
||||
feed.setUrlAfterRedirect(urlAfterRedirect);
|
||||
feed.setLink(fetchedFeed.getFeed().getLink());
|
||||
feed.setLastModifiedHeader(fetchedFeed.getFeed().getLastModifiedHeader());
|
||||
feed.setEtagHeader(fetchedFeed.getFeed().getEtagHeader());
|
||||
feed.setLastContentHash(fetchedFeed.getFeed().getLastContentHash());
|
||||
feed.setLastPublishedDate(fetchedFeed.getFeed().getLastPublishedDate());
|
||||
feed.setAverageEntryInterval(fetchedFeed.getFeed().getAverageEntryInterval());
|
||||
feed.setLastEntryDate(fetchedFeed.getFeed().getLastEntryDate());
|
||||
|
||||
feed.setErrorCount(0);
|
||||
feed.setMessage(null);
|
||||
feed.setDisabledUntil(disabledUntil);
|
||||
|
||||
handlePubSub(feed, fetchedFeed.getFeed());
|
||||
context.setEntries(entries);
|
||||
feedRefreshUpdater.updateFeed(context);
|
||||
|
||||
} catch (NotModifiedException e) {
|
||||
log.debug("Feed not modified : {} - {}", feed.getUrl(), e.getMessage());
|
||||
|
||||
if (config.getApplicationSettings().getHeavyLoad()) {
|
||||
disabledUntil = FeedUtils.buildDisabledUntil(feed.getLastEntryDate(), feed.getAverageEntryInterval(), disabledUntil);
|
||||
}
|
||||
feed.setErrorCount(0);
|
||||
feed.setMessage(e.getMessage());
|
||||
feed.setDisabledUntil(disabledUntil);
|
||||
|
||||
queues.giveBack(feed);
|
||||
} catch (Exception e) {
|
||||
String message = "Unable to refresh feed " + feed.getUrl() + " : " + e.getMessage();
|
||||
log.debug(e.getClass().getName() + " " + message, e);
|
||||
|
||||
feed.setErrorCount(feed.getErrorCount() + 1);
|
||||
feed.setMessage(message);
|
||||
feed.setDisabledUntil(FeedUtils.buildDisabledUntil(feed.getErrorCount()));
|
||||
|
||||
queues.giveBack(feed);
|
||||
}
|
||||
}
|
||||
|
||||
private void handlePubSub(Feed feed, Feed fetchedFeed) {
|
||||
String hub = fetchedFeed.getPushHub();
|
||||
String topic = fetchedFeed.getPushTopic();
|
||||
if (hub != null && topic != null) {
|
||||
if (hub.contains("hubbub.api.typepad.com")) {
|
||||
// that hub does not exist anymore
|
||||
return;
|
||||
}
|
||||
if (topic.startsWith("www.")) {
|
||||
topic = "http://" + topic;
|
||||
} else if (topic.startsWith("feed://")) {
|
||||
topic = "http://" + topic.substring(7);
|
||||
} else if (!topic.startsWith("http")) {
|
||||
topic = "http://" + topic;
|
||||
}
|
||||
log.debug("feed {} has pubsub info: {}", feed.getUrl(), topic);
|
||||
feed.setPushHub(hub);
|
||||
feed.setPushTopic(topic);
|
||||
feed.setPushTopicHash(DigestUtils.sha1Hex(topic));
|
||||
}
|
||||
}
|
||||
|
||||
private class FeedTask implements Task {
|
||||
|
||||
private final FeedRefreshContext context;
|
||||
|
||||
public FeedTask(FeedRefreshContext context) {
|
||||
this.context = context;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
update(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isUrgent() {
|
||||
return context.isUrgent();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,556 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.ahocorasick.trie.Emit;
|
||||
import org.ahocorasick.trie.Trie;
|
||||
import org.ahocorasick.trie.Trie.TrieBuilder;
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
import org.apache.commons.lang3.ArrayUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.time.DateUtils;
|
||||
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Document.OutputSettings;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.nodes.Entities.EscapeMode;
|
||||
import org.jsoup.safety.Cleaner;
|
||||
import org.jsoup.safety.Safelist;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.w3c.css.sac.InputSource;
|
||||
import org.w3c.dom.css.CSSStyleDeclaration;
|
||||
|
||||
import com.commafeed.backend.feed.FeedEntryKeyword.Mode;
|
||||
import com.commafeed.backend.model.FeedEntry;
|
||||
import com.commafeed.backend.model.FeedSubscription;
|
||||
import com.commafeed.frontend.model.Entry;
|
||||
import com.google.gwt.i18n.client.HasDirection.Direction;
|
||||
import com.google.gwt.i18n.shared.BidiUtils;
|
||||
import com.ibm.icu.text.CharsetDetector;
|
||||
import com.ibm.icu.text.CharsetMatch;
|
||||
import com.steadystate.css.parser.CSSOMParser;
|
||||
|
||||
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* Utility methods related to feed handling
|
||||
*
|
||||
*/
|
||||
@Slf4j
|
||||
public class FeedUtils {
|
||||
|
||||
private static final String ESCAPED_QUESTION_MARK = Pattern.quote("?");
|
||||
|
||||
private static final List<String> ALLOWED_IFRAME_CSS_RULES = Arrays.asList("height", "width", "border");
|
||||
private static final List<String> ALLOWED_IMG_CSS_RULES = Arrays.asList("display", "width", "height");
|
||||
private static final char[] FORBIDDEN_CSS_RULE_CHARACTERS = new char[] { '(', ')' };
|
||||
|
||||
private static final Safelist WHITELIST = buildWhiteList();
|
||||
|
||||
public static String truncate(String string, int length) {
|
||||
if (string != null) {
|
||||
string = string.substring(0, Math.min(length, string.length()));
|
||||
}
|
||||
return string;
|
||||
}
|
||||
|
||||
private static synchronized Safelist buildWhiteList() {
|
||||
Safelist whitelist = new Safelist();
|
||||
whitelist.addTags("a", "b", "blockquote", "br", "caption", "cite", "code", "col", "colgroup", "dd", "div", "dl", "dt", "em", "h1",
|
||||
"h2", "h3", "h4", "h5", "h6", "i", "iframe", "img", "li", "ol", "p", "pre", "q", "small", "strike", "strong", "sub", "sup",
|
||||
"table", "tbody", "td", "tfoot", "th", "thead", "tr", "u", "ul");
|
||||
|
||||
whitelist.addAttributes("div", "dir");
|
||||
whitelist.addAttributes("pre", "dir");
|
||||
whitelist.addAttributes("code", "dir");
|
||||
whitelist.addAttributes("table", "dir");
|
||||
whitelist.addAttributes("p", "dir");
|
||||
whitelist.addAttributes("a", "href", "title");
|
||||
whitelist.addAttributes("blockquote", "cite");
|
||||
whitelist.addAttributes("col", "span", "width");
|
||||
whitelist.addAttributes("colgroup", "span", "width");
|
||||
whitelist.addAttributes("iframe", "src", "height", "width", "allowfullscreen", "frameborder", "style");
|
||||
whitelist.addAttributes("img", "align", "alt", "height", "src", "title", "width", "style");
|
||||
whitelist.addAttributes("ol", "start", "type");
|
||||
whitelist.addAttributes("q", "cite");
|
||||
whitelist.addAttributes("table", "border", "bordercolor", "summary", "width");
|
||||
whitelist.addAttributes("td", "border", "bordercolor", "abbr", "axis", "colspan", "rowspan", "width");
|
||||
whitelist.addAttributes("th", "border", "bordercolor", "abbr", "axis", "colspan", "rowspan", "scope", "width");
|
||||
whitelist.addAttributes("ul", "type");
|
||||
|
||||
whitelist.addProtocols("a", "href", "ftp", "http", "https", "magnet", "mailto");
|
||||
whitelist.addProtocols("blockquote", "cite", "http", "https");
|
||||
whitelist.addProtocols("img", "src", "http", "https");
|
||||
whitelist.addProtocols("q", "cite", "http", "https");
|
||||
|
||||
whitelist.addEnforcedAttribute("a", "target", "_blank");
|
||||
whitelist.addEnforcedAttribute("a", "rel", "noreferrer");
|
||||
return whitelist;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect feed encoding by using the declared encoding in the xml processing instruction and by detecting the characters used in the
|
||||
* feed
|
||||
*
|
||||
*/
|
||||
public static Charset guessEncoding(byte[] bytes) {
|
||||
String extracted = extractDeclaredEncoding(bytes);
|
||||
if (StringUtils.startsWithIgnoreCase(extracted, "iso-8859-")) {
|
||||
if (!StringUtils.endsWith(extracted, "1")) {
|
||||
return Charset.forName(extracted);
|
||||
}
|
||||
} else if (StringUtils.startsWithIgnoreCase(extracted, "windows-")) {
|
||||
return Charset.forName(extracted);
|
||||
}
|
||||
return detectEncoding(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect encoding by analyzing characters in the array
|
||||
*/
|
||||
public static Charset detectEncoding(byte[] bytes) {
|
||||
String encoding = "UTF-8";
|
||||
|
||||
CharsetDetector detector = new CharsetDetector();
|
||||
detector.setText(bytes);
|
||||
CharsetMatch match = detector.detect();
|
||||
if (match != null) {
|
||||
encoding = match.getName();
|
||||
}
|
||||
if (encoding.equalsIgnoreCase("ISO-8859-1")) {
|
||||
encoding = "windows-1252";
|
||||
}
|
||||
return Charset.forName(encoding);
|
||||
}
|
||||
|
||||
public static String replaceHtmlEntitiesWithNumericEntities(String source) {
|
||||
// Create a buffer sufficiently large that re-allocations are minimized.
|
||||
StringBuilder sb = new StringBuilder(source.length() << 1);
|
||||
|
||||
TrieBuilder builder = Trie.builder();
|
||||
builder.ignoreOverlaps();
|
||||
|
||||
for (String key : HtmlEntities.HTML_ENTITIES) {
|
||||
builder.addKeyword(key);
|
||||
}
|
||||
|
||||
Trie trie = builder.build();
|
||||
Collection<Emit> emits = trie.parseText(source);
|
||||
|
||||
int prevIndex = 0;
|
||||
for (Emit emit : emits) {
|
||||
int matchIndex = emit.getStart();
|
||||
|
||||
sb.append(source.substring(prevIndex, matchIndex));
|
||||
sb.append(HtmlEntities.HTML_TO_NUMERIC_MAP.get(emit.getKeyword()));
|
||||
prevIndex = emit.getEnd() + 1;
|
||||
}
|
||||
|
||||
// Add the remainder of the string (contains no more matches).
|
||||
sb.append(source.substring(prevIndex));
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static boolean isHttp(String url) {
|
||||
return url.startsWith("http://");
|
||||
}
|
||||
|
||||
public static boolean isHttps(String url) {
|
||||
return url.startsWith("https://");
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize the url. The resulting url is not meant to be fetched but rather used as a mean to identify a feed and avoid duplicates
|
||||
*/
|
||||
public static String normalizeURL(String url) {
|
||||
if (url == null) {
|
||||
return null;
|
||||
}
|
||||
String normalized = URLCanonicalizer.getCanonicalURL(url);
|
||||
if (normalized == null) {
|
||||
normalized = url;
|
||||
}
|
||||
|
||||
// convert to lower case, the url probably won't work in some cases
|
||||
// after that but we don't care we just want to compare urls to avoid
|
||||
// duplicates
|
||||
normalized = normalized.toLowerCase();
|
||||
|
||||
// store all urls as http
|
||||
if (normalized.startsWith("https")) {
|
||||
normalized = "http" + normalized.substring(5);
|
||||
}
|
||||
|
||||
// remove the www. part
|
||||
normalized = normalized.replace("//www.", "//");
|
||||
|
||||
// feedproxy redirects to feedburner
|
||||
normalized = normalized.replace("feedproxy.google.com", "feeds.feedburner.com");
|
||||
|
||||
// feedburner feeds have a special treatment
|
||||
if (normalized.split(ESCAPED_QUESTION_MARK)[0].contains("feedburner.com")) {
|
||||
normalized = normalized.replace("feeds2.feedburner.com", "feeds.feedburner.com");
|
||||
normalized = normalized.split(ESCAPED_QUESTION_MARK)[0];
|
||||
normalized = StringUtils.removeEnd(normalized, "/");
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the declared encoding from the xml
|
||||
*/
|
||||
public static String extractDeclaredEncoding(byte[] bytes) {
|
||||
int index = ArrayUtils.indexOf(bytes, (byte) '>');
|
||||
if (index == -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
String pi = new String(ArrayUtils.subarray(bytes, 0, index + 1)).replace('\'', '"');
|
||||
index = StringUtils.indexOf(pi, "encoding=\"");
|
||||
if (index == -1) {
|
||||
return null;
|
||||
}
|
||||
String encoding = pi.substring(index + 10, pi.length());
|
||||
encoding = encoding.substring(0, encoding.indexOf('"'));
|
||||
return encoding;
|
||||
}
|
||||
|
||||
public static String handleContent(String content, String baseUri, boolean keepTextOnly) {
|
||||
if (StringUtils.isNotBlank(content)) {
|
||||
baseUri = StringUtils.trimToEmpty(baseUri);
|
||||
|
||||
Document dirty = Jsoup.parseBodyFragment(content, baseUri);
|
||||
Cleaner cleaner = new Cleaner(WHITELIST);
|
||||
Document clean = cleaner.clean(dirty);
|
||||
|
||||
for (Element e : clean.select("iframe[style]")) {
|
||||
String style = e.attr("style");
|
||||
String escaped = escapeIFrameCss(style);
|
||||
e.attr("style", escaped);
|
||||
}
|
||||
|
||||
for (Element e : clean.select("img[style]")) {
|
||||
String style = e.attr("style");
|
||||
String escaped = escapeImgCss(style);
|
||||
e.attr("style", escaped);
|
||||
}
|
||||
|
||||
clean.outputSettings(new OutputSettings().escapeMode(EscapeMode.base).prettyPrint(false));
|
||||
Element body = clean.body();
|
||||
if (keepTextOnly) {
|
||||
content = body.text();
|
||||
} else {
|
||||
content = body.html();
|
||||
}
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
public static String escapeIFrameCss(String orig) {
|
||||
String rule = "";
|
||||
CSSOMParser parser = new CSSOMParser();
|
||||
try {
|
||||
List<String> rules = new ArrayList<>();
|
||||
CSSStyleDeclaration decl = parser.parseStyleDeclaration(new InputSource(new StringReader(orig)));
|
||||
|
||||
for (int i = 0; i < decl.getLength(); i++) {
|
||||
String property = decl.item(i);
|
||||
String value = decl.getPropertyValue(property);
|
||||
if (StringUtils.isBlank(property) || StringUtils.isBlank(value)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ALLOWED_IFRAME_CSS_RULES.contains(property) && StringUtils.containsNone(value, FORBIDDEN_CSS_RULE_CHARACTERS)) {
|
||||
rules.add(property + ":" + decl.getPropertyValue(property) + ";");
|
||||
}
|
||||
}
|
||||
rule = StringUtils.join(rules, "");
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
}
|
||||
return rule;
|
||||
}
|
||||
|
||||
public static String escapeImgCss(String orig) {
|
||||
String rule = "";
|
||||
CSSOMParser parser = new CSSOMParser();
|
||||
try {
|
||||
List<String> rules = new ArrayList<>();
|
||||
CSSStyleDeclaration decl = parser.parseStyleDeclaration(new InputSource(new StringReader(orig)));
|
||||
|
||||
for (int i = 0; i < decl.getLength(); i++) {
|
||||
String property = decl.item(i);
|
||||
String value = decl.getPropertyValue(property);
|
||||
if (StringUtils.isBlank(property) || StringUtils.isBlank(value)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ALLOWED_IMG_CSS_RULES.contains(property) && StringUtils.containsNone(value, FORBIDDEN_CSS_RULE_CHARACTERS)) {
|
||||
rules.add(property + ":" + decl.getPropertyValue(property) + ";");
|
||||
}
|
||||
}
|
||||
rule = StringUtils.join(rules, "");
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
}
|
||||
return rule;
|
||||
}
|
||||
|
||||
public static boolean isRTL(FeedEntry entry) {
|
||||
String text = entry.getContent().getContent();
|
||||
|
||||
if (StringUtils.isBlank(text)) {
|
||||
text = entry.getContent().getTitle();
|
||||
}
|
||||
|
||||
if (StringUtils.isBlank(text)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
text = Jsoup.parse(text).text();
|
||||
if (StringUtils.isBlank(text)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Direction direction = BidiUtils.get().estimateDirection(text);
|
||||
return direction == Direction.RTL;
|
||||
}
|
||||
|
||||
public static String trimInvalidXmlCharacters(String xml) {
|
||||
if (StringUtils.isBlank(xml)) {
|
||||
return null;
|
||||
}
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
boolean firstTagFound = false;
|
||||
for (int i = 0; i < xml.length(); i++) {
|
||||
char c = xml.charAt(i);
|
||||
|
||||
if (!firstTagFound) {
|
||||
if (c == '<') {
|
||||
firstTagFound = true;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (c >= 32 || c == 9 || c == 10 || c == 13) {
|
||||
if (!Character.isHighSurrogate(c) && !Character.isLowSurrogate(c)) {
|
||||
sb.append(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* When there was an error fetching the feed
|
||||
*
|
||||
*/
|
||||
public static Date buildDisabledUntil(int errorCount) {
|
||||
Date now = new Date();
|
||||
int retriesBeforeDisable = 3;
|
||||
|
||||
if (errorCount >= retriesBeforeDisable) {
|
||||
int disabledHours = errorCount - retriesBeforeDisable + 1;
|
||||
disabledHours = Math.min(24 * 7, disabledHours);
|
||||
return DateUtils.addHours(now, disabledHours);
|
||||
}
|
||||
return now;
|
||||
}
|
||||
|
||||
/**
|
||||
* When the feed was refreshed successfully
|
||||
*/
|
||||
public static Date buildDisabledUntil(Date publishedDate, Long averageEntryInterval, Date defaultRefreshInterval) {
|
||||
Date now = new Date();
|
||||
|
||||
if (publishedDate == null) {
|
||||
// feed with no entries, recheck in 24 hours
|
||||
return DateUtils.addHours(now, 24);
|
||||
} else if (publishedDate.before(DateUtils.addMonths(now, -1))) {
|
||||
// older than a month, recheck in 24 hours
|
||||
return DateUtils.addHours(now, 24);
|
||||
} else if (publishedDate.before(DateUtils.addDays(now, -14))) {
|
||||
// older than two weeks, recheck in 12 hours
|
||||
return DateUtils.addHours(now, 12);
|
||||
} else if (publishedDate.before(DateUtils.addDays(now, -7))) {
|
||||
// older than a week, recheck in 6 hours
|
||||
return DateUtils.addHours(now, 6);
|
||||
} else if (averageEntryInterval != null) {
|
||||
// use average time between entries to decide when to refresh next, divided by factor
|
||||
int factor = 2;
|
||||
|
||||
// not more than 6 hours
|
||||
long date = Math.min(DateUtils.addHours(now, 6).getTime(), now.getTime() + averageEntryInterval / factor);
|
||||
|
||||
// not less than default refresh interval
|
||||
date = Math.max(defaultRefreshInterval.getTime(), date);
|
||||
|
||||
return new Date(date);
|
||||
} else {
|
||||
// unknown case, recheck in 24 hours
|
||||
return DateUtils.addHours(now, 24);
|
||||
}
|
||||
}
|
||||
|
||||
public static Long averageTimeBetweenEntries(List<FeedEntry> entries) {
|
||||
if (entries.isEmpty() || entries.size() == 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<Long> timestamps = getSortedTimestamps(entries);
|
||||
|
||||
SummaryStatistics stats = new SummaryStatistics();
|
||||
for (int i = 0; i < timestamps.size() - 1; i++) {
|
||||
long diff = Math.abs(timestamps.get(i) - timestamps.get(i + 1));
|
||||
stats.addValue(diff);
|
||||
}
|
||||
return (long) stats.getMean();
|
||||
}
|
||||
|
||||
public static List<Long> getSortedTimestamps(List<FeedEntry> entries) {
|
||||
return entries.stream().map(t -> t.getUpdated().getTime()).sorted(Collections.reverseOrder()).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static String removeTrailingSlash(String url) {
|
||||
if (url.endsWith("/")) {
|
||||
url = url.substring(0, url.length() - 1);
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param url
|
||||
* the url of the entry
|
||||
* @param feedLink
|
||||
* the url of the feed as described in the feed
|
||||
* @param feedUrl
|
||||
* the url of the feed that we used to fetch the feed
|
||||
* @return an absolute url pointing to the entry
|
||||
*/
|
||||
public static String toAbsoluteUrl(String url, String feedLink, String feedUrl) {
|
||||
url = StringUtils.trimToNull(StringUtils.normalizeSpace(url));
|
||||
if (url == null || url.startsWith("http")) {
|
||||
return url;
|
||||
}
|
||||
|
||||
String baseUrl = (feedLink == null || isRelative(feedLink)) ? feedUrl : feedLink;
|
||||
|
||||
if (baseUrl == null) {
|
||||
return url;
|
||||
}
|
||||
|
||||
String result = null;
|
||||
try {
|
||||
result = new URL(new URL(baseUrl), url).toString();
|
||||
} catch (MalformedURLException e) {
|
||||
log.debug("could not parse url : " + e.getMessage(), e);
|
||||
result = url;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static boolean isRelative(final String url) {
|
||||
// the regex means "start with 'scheme://'"
|
||||
return url.startsWith("/") || url.startsWith("#") || !url.matches("^\\w+\\:\\/\\/.*");
|
||||
}
|
||||
|
||||
public static String getFaviconUrl(FeedSubscription subscription, String publicUrl) {
|
||||
return removeTrailingSlash(publicUrl) + "/rest/feed/favicon/" + subscription.getId();
|
||||
}
|
||||
|
||||
public static String proxyImages(String content, String publicUrl) {
|
||||
if (StringUtils.isBlank(content)) {
|
||||
return content;
|
||||
}
|
||||
|
||||
Document doc = Jsoup.parse(content);
|
||||
Elements elements = doc.select("img");
|
||||
for (Element element : elements) {
|
||||
String href = element.attr("src");
|
||||
if (href != null) {
|
||||
String proxy = proxyImage(href, publicUrl);
|
||||
element.attr("src", proxy);
|
||||
}
|
||||
}
|
||||
|
||||
return doc.body().html();
|
||||
}
|
||||
|
||||
public static String proxyImage(String url, String publicUrl) {
|
||||
if (StringUtils.isBlank(url)) {
|
||||
return url;
|
||||
}
|
||||
return removeTrailingSlash(publicUrl) + "/rest/server/proxy?u=" + imageProxyEncoder(url);
|
||||
}
|
||||
|
||||
public static String rot13(String msg) {
|
||||
StringBuilder message = new StringBuilder();
|
||||
|
||||
for (char c : msg.toCharArray()) {
|
||||
if (c >= 'a' && c <= 'm') {
|
||||
c += 13;
|
||||
} else if (c >= 'n' && c <= 'z') {
|
||||
c -= 13;
|
||||
} else if (c >= 'A' && c <= 'M') {
|
||||
c += 13;
|
||||
} else if (c >= 'N' && c <= 'Z') {
|
||||
c -= 13;
|
||||
}
|
||||
message.append(c);
|
||||
}
|
||||
|
||||
return message.toString();
|
||||
}
|
||||
|
||||
public static String imageProxyEncoder(String url) {
|
||||
return Base64.encodeBase64String(rot13(url).getBytes());
|
||||
}
|
||||
|
||||
public static String imageProxyDecoder(String code) {
|
||||
return rot13(new String(Base64.decodeBase64(code)));
|
||||
}
|
||||
|
||||
public static void removeUnwantedFromSearch(List<Entry> entries, List<FeedEntryKeyword> keywords) {
|
||||
Iterator<Entry> it = entries.iterator();
|
||||
while (it.hasNext()) {
|
||||
Entry entry = it.next();
|
||||
boolean keep = true;
|
||||
for (FeedEntryKeyword keyword : keywords) {
|
||||
String title = entry.getTitle() == null ? null : Jsoup.parse(entry.getTitle()).text();
|
||||
String content = entry.getContent() == null ? null : Jsoup.parse(entry.getContent()).text();
|
||||
boolean condition = !StringUtils.containsIgnoreCase(content, keyword.getKeyword())
|
||||
&& !StringUtils.containsIgnoreCase(title, keyword.getKeyword());
|
||||
if (keyword.getMode() == Mode.EXCLUDE) {
|
||||
condition = !condition;
|
||||
}
|
||||
if (condition) {
|
||||
keep = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!keep) {
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.commafeed.backend.model.Feed;
|
||||
import com.commafeed.backend.model.FeedEntry;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class FetchedFeed {
|
||||
|
||||
private Feed feed = new Feed();
|
||||
private List<FeedEntry> entries = new ArrayList<>();
|
||||
|
||||
private String title;
|
||||
private String urlAfterRedirect;
|
||||
private long fetchDuration;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,269 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class HtmlEntities {
|
||||
public static final Map<String, String> HTML_TO_NUMERIC_MAP;
|
||||
public static final String[] HTML_ENTITIES;
|
||||
public static final String[] NUMERIC_ENTITIES;
|
||||
|
||||
static {
|
||||
Map<String, String> map = new LinkedHashMap<>();
|
||||
map.put("Á", "Á");
|
||||
map.put("á", "á");
|
||||
map.put("Â", "Â");
|
||||
map.put("â", "â");
|
||||
map.put("´", "´");
|
||||
map.put("Æ", "Æ");
|
||||
map.put("æ", "æ");
|
||||
map.put("À", "À");
|
||||
map.put("à", "à");
|
||||
map.put("ℵ", "ℵ");
|
||||
map.put("Α", "Α");
|
||||
map.put("α", "α");
|
||||
map.put("&", "&");
|
||||
map.put("∧", "∧");
|
||||
map.put("∠", "∠");
|
||||
map.put("Å", "Å");
|
||||
map.put("å", "å");
|
||||
map.put("≈", "≈");
|
||||
map.put("Ã", "Ã");
|
||||
map.put("ã", "ã");
|
||||
map.put("Ä", "Ä");
|
||||
map.put("ä", "ä");
|
||||
map.put("„", "„");
|
||||
map.put("Β", "Β");
|
||||
map.put("β", "β");
|
||||
map.put("¦", "¦");
|
||||
map.put("•", "•");
|
||||
map.put("∩", "∩");
|
||||
map.put("Ç", "Ç");
|
||||
map.put("ç", "ç");
|
||||
map.put("¸", "¸");
|
||||
map.put("¢", "¢");
|
||||
map.put("Χ", "Χ");
|
||||
map.put("χ", "χ");
|
||||
map.put("ˆ", "ˆ");
|
||||
map.put("♣", "♣");
|
||||
map.put("≅", "≅");
|
||||
map.put("©", "©");
|
||||
map.put("↵", "↵");
|
||||
map.put("∪", "∪");
|
||||
map.put("¤", "¤");
|
||||
map.put("†", "†");
|
||||
map.put("‡", "‡");
|
||||
map.put("↓", "↓");
|
||||
map.put("⇓", "⇓");
|
||||
map.put("°", "°");
|
||||
map.put("Δ", "Δ");
|
||||
map.put("δ", "δ");
|
||||
map.put("♦", "♦");
|
||||
map.put("÷", "÷");
|
||||
map.put("É", "É");
|
||||
map.put("é", "é");
|
||||
map.put("Ê", "Ê");
|
||||
map.put("ê", "ê");
|
||||
map.put("È", "È");
|
||||
map.put("è", "è");
|
||||
map.put("∅", "∅");
|
||||
map.put(" ", " ");
|
||||
map.put(" ", " ");
|
||||
map.put("Ε", "Ε");
|
||||
map.put("ε", "ε");
|
||||
map.put("≡", "≡");
|
||||
map.put("Η", "Η");
|
||||
map.put("η", "η");
|
||||
map.put("Ð", "Ð");
|
||||
map.put("ð", "ð");
|
||||
map.put("Ë", "Ë");
|
||||
map.put("ë", "ë");
|
||||
map.put("€", "€");
|
||||
map.put("∃", "∃");
|
||||
map.put("ƒ", "ƒ");
|
||||
map.put("∀", "∀");
|
||||
map.put("½", "½");
|
||||
map.put("¼", "¼");
|
||||
map.put("¾", "¾");
|
||||
map.put("⁄", "⁄");
|
||||
map.put("Γ", "Γ");
|
||||
map.put("γ", "γ");
|
||||
map.put("≥", "≥");
|
||||
map.put("↔", "↔");
|
||||
map.put("⇔", "⇔");
|
||||
map.put("♥", "♥");
|
||||
map.put("…", "…");
|
||||
map.put("Í", "Í");
|
||||
map.put("í", "í");
|
||||
map.put("Î", "Î");
|
||||
map.put("î", "î");
|
||||
map.put("¡", "¡");
|
||||
map.put("Ì", "Ì");
|
||||
map.put("ì", "ì");
|
||||
map.put("ℑ", "ℑ");
|
||||
map.put("∞", "∞");
|
||||
map.put("∫", "∫");
|
||||
map.put("Ι", "Ι");
|
||||
map.put("ι", "ι");
|
||||
map.put("¿", "¿");
|
||||
map.put("∈", "∈");
|
||||
map.put("Ï", "Ï");
|
||||
map.put("ï", "ï");
|
||||
map.put("Κ", "Κ");
|
||||
map.put("κ", "κ");
|
||||
map.put("Λ", "Λ");
|
||||
map.put("λ", "λ");
|
||||
map.put("⟨", "〈");
|
||||
map.put("«", "«");
|
||||
map.put("←", "←");
|
||||
map.put("⇐", "⇐");
|
||||
map.put("⌈", "⌈");
|
||||
map.put("“", "“");
|
||||
map.put("≤", "≤");
|
||||
map.put("⌊", "⌊");
|
||||
map.put("∗", "∗");
|
||||
map.put("◊", "◊");
|
||||
map.put("‎", "‎");
|
||||
map.put("‹", "‹");
|
||||
map.put("‘", "‘");
|
||||
map.put("¯", "¯");
|
||||
map.put("—", "—");
|
||||
map.put("µ", "µ");
|
||||
map.put("·", "·");
|
||||
map.put("−", "−");
|
||||
map.put("Μ", "Μ");
|
||||
map.put("μ", "μ");
|
||||
map.put("∇", "∇");
|
||||
map.put(" ", " ");
|
||||
map.put("–", "–");
|
||||
map.put("≠", "≠");
|
||||
map.put("∋", "∋");
|
||||
map.put("¬", "¬");
|
||||
map.put("∉", "∉");
|
||||
map.put("⊄", "⊄");
|
||||
map.put("Ñ", "Ñ");
|
||||
map.put("ñ", "ñ");
|
||||
map.put("Ν", "Ν");
|
||||
map.put("ν", "ν");
|
||||
map.put("Ó", "Ó");
|
||||
map.put("ó", "ó");
|
||||
map.put("Ô", "Ô");
|
||||
map.put("ô", "ô");
|
||||
map.put("Œ", "Œ");
|
||||
map.put("œ", "œ");
|
||||
map.put("Ò", "Ò");
|
||||
map.put("ò", "ò");
|
||||
map.put("‾", "‾");
|
||||
map.put("Ω", "Ω");
|
||||
map.put("ω", "ω");
|
||||
map.put("Ο", "Ο");
|
||||
map.put("ο", "ο");
|
||||
map.put("⊕", "⊕");
|
||||
map.put("∨", "∨");
|
||||
map.put("ª", "ª");
|
||||
map.put("º", "º");
|
||||
map.put("Ø", "Ø");
|
||||
map.put("ø", "ø");
|
||||
map.put("Õ", "Õ");
|
||||
map.put("õ", "õ");
|
||||
map.put("⊗", "⊗");
|
||||
map.put("Ö", "Ö");
|
||||
map.put("ö", "ö");
|
||||
map.put("¶", "¶");
|
||||
map.put("∂", "∂");
|
||||
map.put("‰", "‰");
|
||||
map.put("⊥", "⊥");
|
||||
map.put("Φ", "Φ");
|
||||
map.put("φ", "φ");
|
||||
map.put("Π", "Π");
|
||||
map.put("π", "π");
|
||||
map.put("ϖ", "ϖ");
|
||||
map.put("±", "±");
|
||||
map.put("£", "£");
|
||||
map.put("′", "′");
|
||||
map.put("″", "″");
|
||||
map.put("∏", "∏");
|
||||
map.put("∝", "∝");
|
||||
map.put("Ψ", "Ψ");
|
||||
map.put("ψ", "ψ");
|
||||
map.put(""", """);
|
||||
map.put("√", "√");
|
||||
map.put("⟩", "〉");
|
||||
map.put("»", "»");
|
||||
map.put("→", "→");
|
||||
map.put("⇒", "⇒");
|
||||
map.put("⌉", "⌉");
|
||||
map.put("”", "”");
|
||||
map.put("ℜ", "ℜ");
|
||||
map.put("®", "®");
|
||||
map.put("⌋", "⌋");
|
||||
map.put("Ρ", "Ρ");
|
||||
map.put("ρ", "ρ");
|
||||
map.put("‏", "‏");
|
||||
map.put("›", "›");
|
||||
map.put("’", "’");
|
||||
map.put("‚", "‚");
|
||||
map.put("Š", "Š");
|
||||
map.put("š", "š");
|
||||
map.put("⋅", "⋅");
|
||||
map.put("§", "§");
|
||||
map.put("­", "­");
|
||||
map.put("Σ", "Σ");
|
||||
map.put("σ", "σ");
|
||||
map.put("ς", "ς");
|
||||
map.put("∼", "∼");
|
||||
map.put("♠", "♠");
|
||||
map.put("⊂", "⊂");
|
||||
map.put("⊆", "⊆");
|
||||
map.put("∑", "∑");
|
||||
map.put("¹", "¹");
|
||||
map.put("²", "²");
|
||||
map.put("³", "³");
|
||||
map.put("⊃", "⊃");
|
||||
map.put("⊇", "⊇");
|
||||
map.put("ß", "ß");
|
||||
map.put("Τ", "Τ");
|
||||
map.put("τ", "τ");
|
||||
map.put("∴", "∴");
|
||||
map.put("Θ", "Θ");
|
||||
map.put("θ", "θ");
|
||||
map.put("ϑ", "ϑ");
|
||||
map.put(" ", " ");
|
||||
map.put("Þ", "Þ");
|
||||
map.put("þ", "þ");
|
||||
map.put("˜", "˜");
|
||||
map.put("×", "×");
|
||||
map.put("™", "™");
|
||||
map.put("Ú", "Ú");
|
||||
map.put("ú", "ú");
|
||||
map.put("↑", "↑");
|
||||
map.put("⇑", "⇑");
|
||||
map.put("Û", "Û");
|
||||
map.put("û", "û");
|
||||
map.put("Ù", "Ù");
|
||||
map.put("ù", "ù");
|
||||
map.put("¨", "¨");
|
||||
map.put("ϒ", "ϒ");
|
||||
map.put("Υ", "Υ");
|
||||
map.put("υ", "υ");
|
||||
map.put("Ü", "Ü");
|
||||
map.put("ü", "ü");
|
||||
map.put("℘", "℘");
|
||||
map.put("Ξ", "Ξ");
|
||||
map.put("ξ", "ξ");
|
||||
map.put("Ý", "Ý");
|
||||
map.put("ý", "ý");
|
||||
map.put("¥", "¥");
|
||||
map.put("ÿ", "ÿ");
|
||||
map.put("Ÿ", "Ÿ");
|
||||
map.put("Ζ", "Ζ");
|
||||
map.put("ζ", "ζ");
|
||||
map.put("‍", "‍");
|
||||
map.put("‌", "‌");
|
||||
|
||||
HTML_TO_NUMERIC_MAP = Collections.unmodifiableMap(map);
|
||||
HTML_ENTITIES = map.keySet().toArray(new String[map.size()]);
|
||||
NUMERIC_ENTITIES = map.values().toArray(new String[map.size()]);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user