forked from Archives/Athou_commafeed
make each step of feed fetching return its own model
This commit is contained in:
@@ -14,7 +14,6 @@ import org.apache.http.HttpHeaders;
|
||||
import org.apache.http.HttpHost;
|
||||
import org.apache.http.HttpResponseInterceptor;
|
||||
import org.apache.http.HttpStatus;
|
||||
import org.apache.http.client.ClientProtocolException;
|
||||
import org.apache.http.client.HttpResponseException;
|
||||
import org.apache.http.client.config.CookieSpecs;
|
||||
import org.apache.http.client.config.RequestConfig;
|
||||
@@ -59,7 +58,7 @@ public class HttpGetter {
|
||||
}
|
||||
}
|
||||
|
||||
public HttpResult getBinary(String url, int timeout) throws ClientProtocolException, IOException, NotModifiedException {
|
||||
public HttpResult getBinary(String url, int timeout) throws IOException, NotModifiedException {
|
||||
return getBinary(url, null, null, timeout);
|
||||
}
|
||||
|
||||
@@ -71,14 +70,10 @@ public class HttpGetter {
|
||||
* header we got last time we queried that url, or null
|
||||
* @param eTag
|
||||
* header we got last time we queried that url, or null
|
||||
* @return
|
||||
* @throws ClientProtocolException
|
||||
* @throws IOException
|
||||
* @throws NotModifiedException
|
||||
* if the url hasn't changed since we asked for it last time
|
||||
*/
|
||||
public HttpResult getBinary(String url, String lastModified, String eTag, int timeout)
|
||||
throws ClientProtocolException, IOException, NotModifiedException {
|
||||
public HttpResult getBinary(String url, String lastModified, String eTag, int timeout) throws IOException, NotModifiedException {
|
||||
HttpResult result = null;
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
@@ -175,13 +170,6 @@ public class HttpGetter {
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
CommaFeedConfiguration config = new CommaFeedConfiguration();
|
||||
HttpGetter getter = new HttpGetter(config);
|
||||
HttpResult result = getter.getBinary("https://sourceforge.net/projects/mpv-player-windows/rss", 30000);
|
||||
System.out.println(new String(result.content));
|
||||
}
|
||||
|
||||
@Getter
|
||||
public static class NotModifiedException extends Exception {
|
||||
private static final long serialVersionUID = 1L;
|
||||
@@ -189,12 +177,12 @@ public class HttpGetter {
|
||||
/**
|
||||
* if the value of this header changed, this is its new value
|
||||
*/
|
||||
private String newLastModifiedHeader;
|
||||
private final String newLastModifiedHeader;
|
||||
|
||||
/**
|
||||
* if the value of this header changed, this is its new value
|
||||
*/
|
||||
private String newEtagHeader;
|
||||
private final String newEtagHeader;
|
||||
|
||||
public NotModifiedException(String message) {
|
||||
this(message, null, null);
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.commafeed.backend.model.Feed;
|
||||
import com.commafeed.backend.model.FeedEntry;
|
||||
|
||||
import lombok.Value;
|
||||
|
||||
@Value
|
||||
public class FeedAndEntries {
|
||||
Feed feed;
|
||||
List<FeedEntry> entries;
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package com.commafeed.backend.feed;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.inject.Inject;
|
||||
@@ -13,13 +14,19 @@ import org.apache.commons.codec.digest.DigestUtils;
|
||||
import com.commafeed.backend.HttpGetter;
|
||||
import com.commafeed.backend.HttpGetter.HttpResult;
|
||||
import com.commafeed.backend.HttpGetter.NotModifiedException;
|
||||
import com.commafeed.backend.feed.FeedParser.FeedParserResult;
|
||||
import com.commafeed.backend.model.Feed;
|
||||
import com.commafeed.backend.model.FeedEntry;
|
||||
import com.commafeed.backend.urlprovider.FeedURLProvider;
|
||||
import com.rometools.rome.io.FeedException;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.Value;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* Fetches a feed then parses it
|
||||
*/
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
|
||||
@Singleton
|
||||
@@ -29,18 +36,18 @@ public class FeedFetcher {
|
||||
private final HttpGetter getter;
|
||||
private final Set<FeedURLProvider> urlProviders;
|
||||
|
||||
public FetchedFeed fetch(String feedUrl, boolean extractFeedUrlFromHtml, String lastModified, String eTag, Date lastPublishedDate,
|
||||
public FeedFetcherResult fetch(String feedUrl, boolean extractFeedUrlFromHtml, String lastModified, String eTag, Date lastPublishedDate,
|
||||
String lastContentHash) throws FeedException, IOException, NotModifiedException {
|
||||
log.debug("Fetching feed {}", feedUrl);
|
||||
FetchedFeed fetchedFeed = null;
|
||||
|
||||
int timeout = 20000;
|
||||
|
||||
HttpResult result = getter.getBinary(feedUrl, lastModified, eTag, timeout);
|
||||
byte[] content = result.getContent();
|
||||
|
||||
FeedParserResult parserResult;
|
||||
try {
|
||||
fetchedFeed = parser.parse(result.getUrlAfterRedirect(), content);
|
||||
parserResult = parser.parse(result.getUrlAfterRedirect(), content);
|
||||
} catch (FeedException e) {
|
||||
if (extractFeedUrlFromHtml) {
|
||||
String extractedUrl = extractFeedUrl(urlProviders, feedUrl, StringUtils.newStringUtf8(result.getContent()));
|
||||
@@ -49,7 +56,7 @@ public class FeedFetcher {
|
||||
|
||||
result = getter.getBinary(extractedUrl, lastModified, eTag, timeout);
|
||||
content = result.getContent();
|
||||
fetchedFeed = parser.parse(result.getUrlAfterRedirect(), content);
|
||||
parserResult = parser.parse(result.getUrlAfterRedirect(), content);
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
@@ -73,21 +80,20 @@ public class FeedFetcher {
|
||||
etagHeaderValueChanged ? result.getETag() : null);
|
||||
}
|
||||
|
||||
if (lastPublishedDate != null && fetchedFeed.getFeed().getLastPublishedDate() != null
|
||||
&& lastPublishedDate.getTime() == fetchedFeed.getFeed().getLastPublishedDate().getTime()) {
|
||||
if (lastPublishedDate != null && parserResult.getFeed().getLastPublishedDate() != null
|
||||
&& lastPublishedDate.getTime() == parserResult.getFeed().getLastPublishedDate().getTime()) {
|
||||
log.debug("publishedDate not modified: {}", feedUrl);
|
||||
throw new NotModifiedException("publishedDate not modified",
|
||||
lastModifiedHeaderValueChanged ? result.getLastModifiedSince() : null,
|
||||
etagHeaderValueChanged ? result.getETag() : null);
|
||||
}
|
||||
|
||||
Feed feed = fetchedFeed.getFeed();
|
||||
Feed feed = parserResult.getFeed();
|
||||
feed.setLastModifiedHeader(result.getLastModifiedSince());
|
||||
feed.setEtagHeader(FeedUtils.truncate(result.getETag(), 255));
|
||||
feed.setLastContentHash(hash);
|
||||
fetchedFeed.setFetchDuration(result.getDuration());
|
||||
fetchedFeed.setUrlAfterRedirect(result.getUrlAfterRedirect());
|
||||
return fetchedFeed;
|
||||
return new FeedFetcherResult(parserResult.getFeed(), parserResult.getEntries(), parserResult.getTitle(),
|
||||
result.getUrlAfterRedirect(), result.getDuration());
|
||||
}
|
||||
|
||||
private static String extractFeedUrl(Set<FeedURLProvider> urlProviders, String url, String urlContent) {
|
||||
@@ -100,4 +106,14 @@ public class FeedFetcher {
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Value
|
||||
public static class FeedFetcherResult {
|
||||
Feed feed;
|
||||
List<FeedEntry> entries;
|
||||
String title;
|
||||
String urlAfterRedirect;
|
||||
long fetchDuration;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.commafeed.backend.feed;
|
||||
import java.io.StringReader;
|
||||
import java.nio.charset.Charset;
|
||||
import java.text.DateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -37,8 +38,12 @@ import com.rometools.rome.io.SyndFeedInput;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.Value;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* Parses raw xml as a Feed object
|
||||
*/
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
|
||||
@Singleton
|
||||
@@ -50,10 +55,7 @@ public class FeedParser {
|
||||
private static final Date START = new Date(86400000);
|
||||
private static final Date END = new Date(1000L * Integer.MAX_VALUE - 86400000);
|
||||
|
||||
public FetchedFeed parse(String feedUrl, byte[] xml) throws FeedException {
|
||||
FetchedFeed fetchedFeed = new FetchedFeed();
|
||||
Feed feed = fetchedFeed.getFeed();
|
||||
List<FeedEntry> entries = fetchedFeed.getEntries();
|
||||
public FeedParserResult parse(String feedUrl, byte[] xml) throws FeedException {
|
||||
|
||||
try {
|
||||
Charset encoding = FeedUtils.guessEncoding(xml);
|
||||
@@ -63,17 +65,19 @@ public class FeedParser {
|
||||
}
|
||||
xmlString = FeedUtils.replaceHtmlEntitiesWithNumericEntities(xmlString);
|
||||
InputSource source = new InputSource(new StringReader(xmlString));
|
||||
|
||||
SyndFeed rss = new SyndFeedInput().build(source);
|
||||
handleForeignMarkup(rss);
|
||||
|
||||
fetchedFeed.setTitle(rss.getTitle());
|
||||
String title = rss.getTitle();
|
||||
Feed feed = new Feed();
|
||||
feed.setPushHub(findHub(rss));
|
||||
feed.setPushTopic(findSelf(rss));
|
||||
feed.setUrl(feedUrl);
|
||||
feed.setLink(rss.getLink());
|
||||
List<SyndEntry> items = rss.getEntries();
|
||||
|
||||
for (SyndEntry item : items) {
|
||||
List<FeedEntry> entries = new ArrayList<>();
|
||||
for (SyndEntry item : rss.getEntries()) {
|
||||
FeedEntry entry = new FeedEntry();
|
||||
|
||||
String guid = item.getUri();
|
||||
@@ -121,6 +125,7 @@ public class FeedParser {
|
||||
|
||||
entries.add(entry);
|
||||
}
|
||||
|
||||
Date lastEntryDate = null;
|
||||
Date publishedDate = validateDate(rss.getPublishedDate(), false);
|
||||
if (!entries.isEmpty()) {
|
||||
@@ -133,10 +138,10 @@ public class FeedParser {
|
||||
feed.setAverageEntryInterval(FeedUtils.averageTimeBetweenEntries(entries));
|
||||
feed.setLastEntryDate(lastEntryDate);
|
||||
|
||||
return new FeedParserResult(feed, entries, title);
|
||||
} catch (Exception e) {
|
||||
throw new FeedException(String.format("Could not parse feed from %s : %s", feedUrl, e.getMessage()), e);
|
||||
}
|
||||
return fetchedFeed;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -273,4 +278,11 @@ public class FeedParser {
|
||||
}
|
||||
}
|
||||
|
||||
@Value
|
||||
public static class FeedParserResult {
|
||||
Feed feed;
|
||||
List<FeedEntry> entries;
|
||||
String title;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -22,9 +22,9 @@ public class FeedRefreshIntervalCalculator {
|
||||
this.refreshIntervalMinutes = config.getApplicationSettings().getRefreshIntervalMinutes();
|
||||
}
|
||||
|
||||
public Date onFetchSuccess(FetchedFeed fetchedFeed) {
|
||||
public Date onFetchSuccess(Feed feed) {
|
||||
Date defaultRefreshInterval = getDefaultRefreshInterval();
|
||||
return heavyLoad ? computeRefreshIntervalForHeavyLoad(fetchedFeed.getFeed(), defaultRefreshInterval) : defaultRefreshInterval;
|
||||
return heavyLoad ? computeRefreshIntervalForHeavyLoad(feed, defaultRefreshInterval) : defaultRefreshInterval;
|
||||
}
|
||||
|
||||
public Date onFeedNotModified(Feed feed) {
|
||||
|
||||
@@ -39,6 +39,9 @@ import io.dropwizard.lifecycle.Managed;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* Updates the feed in the database and inserts new entries
|
||||
*/
|
||||
@Slf4j
|
||||
@Singleton
|
||||
public class FeedRefreshUpdater implements Managed {
|
||||
|
||||
@@ -15,13 +15,15 @@ import com.codahale.metrics.Meter;
|
||||
import com.codahale.metrics.MetricRegistry;
|
||||
import com.commafeed.CommaFeedConfiguration;
|
||||
import com.commafeed.backend.HttpGetter.NotModifiedException;
|
||||
import com.commafeed.backend.feed.FeedFetcher.FeedFetcherResult;
|
||||
import com.commafeed.backend.model.Feed;
|
||||
import com.commafeed.backend.model.FeedEntry;
|
||||
|
||||
import lombok.Value;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* Calls {@link FeedFetcher} and handles its outcome
|
||||
* Calls {@link FeedFetcher} and updates the Feed object, but does not update the database ({@link FeedRefreshUpdater} does that)
|
||||
*/
|
||||
@Slf4j
|
||||
@Singleton
|
||||
@@ -42,39 +44,39 @@ public class FeedRefreshWorker {
|
||||
|
||||
}
|
||||
|
||||
public FeedAndEntries update(Feed feed) {
|
||||
public FeedRefreshWorkerResult update(Feed feed) {
|
||||
try {
|
||||
String url = Optional.ofNullable(feed.getUrlAfterRedirect()).orElse(feed.getUrl());
|
||||
FetchedFeed fetchedFeed = fetcher.fetch(url, false, feed.getLastModifiedHeader(), feed.getEtagHeader(),
|
||||
FeedFetcherResult feedFetcherResult = fetcher.fetch(url, false, feed.getLastModifiedHeader(), feed.getEtagHeader(),
|
||||
feed.getLastPublishedDate(), feed.getLastContentHash());
|
||||
// stops here if NotModifiedException or any other exception is thrown
|
||||
List<FeedEntry> entries = fetchedFeed.getEntries();
|
||||
List<FeedEntry> entries = feedFetcherResult.getEntries();
|
||||
|
||||
Integer maxFeedCapacity = config.getApplicationSettings().getMaxFeedCapacity();
|
||||
if (maxFeedCapacity > 0) {
|
||||
entries = entries.stream().limit(maxFeedCapacity).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
String urlAfterRedirect = fetchedFeed.getUrlAfterRedirect();
|
||||
String urlAfterRedirect = feedFetcherResult.getUrlAfterRedirect();
|
||||
if (StringUtils.equals(url, urlAfterRedirect)) {
|
||||
urlAfterRedirect = null;
|
||||
}
|
||||
feed.setUrlAfterRedirect(urlAfterRedirect);
|
||||
feed.setLink(fetchedFeed.getFeed().getLink());
|
||||
feed.setLastModifiedHeader(fetchedFeed.getFeed().getLastModifiedHeader());
|
||||
feed.setEtagHeader(fetchedFeed.getFeed().getEtagHeader());
|
||||
feed.setLastContentHash(fetchedFeed.getFeed().getLastContentHash());
|
||||
feed.setLastPublishedDate(fetchedFeed.getFeed().getLastPublishedDate());
|
||||
feed.setAverageEntryInterval(fetchedFeed.getFeed().getAverageEntryInterval());
|
||||
feed.setLastEntryDate(fetchedFeed.getFeed().getLastEntryDate());
|
||||
feed.setLink(feedFetcherResult.getFeed().getLink());
|
||||
feed.setLastModifiedHeader(feedFetcherResult.getFeed().getLastModifiedHeader());
|
||||
feed.setEtagHeader(feedFetcherResult.getFeed().getEtagHeader());
|
||||
feed.setLastContentHash(feedFetcherResult.getFeed().getLastContentHash());
|
||||
feed.setLastPublishedDate(feedFetcherResult.getFeed().getLastPublishedDate());
|
||||
feed.setAverageEntryInterval(feedFetcherResult.getFeed().getAverageEntryInterval());
|
||||
feed.setLastEntryDate(feedFetcherResult.getFeed().getLastEntryDate());
|
||||
|
||||
feed.setErrorCount(0);
|
||||
feed.setMessage(null);
|
||||
feed.setDisabledUntil(refreshIntervalCalculator.onFetchSuccess(fetchedFeed));
|
||||
feed.setDisabledUntil(refreshIntervalCalculator.onFetchSuccess(feedFetcherResult.getFeed()));
|
||||
|
||||
handlePubSub(feed, fetchedFeed.getFeed());
|
||||
handlePubSub(feed, feedFetcherResult.getFeed());
|
||||
|
||||
return new FeedAndEntries(feed, entries);
|
||||
return new FeedRefreshWorkerResult(feed, entries);
|
||||
} catch (NotModifiedException e) {
|
||||
log.debug("Feed not modified : {} - {}", feed.getUrl(), e.getMessage());
|
||||
|
||||
@@ -90,7 +92,7 @@ public class FeedRefreshWorker {
|
||||
feed.setEtagHeader(e.getNewEtagHeader());
|
||||
}
|
||||
|
||||
return new FeedAndEntries(feed, Collections.emptyList());
|
||||
return new FeedRefreshWorkerResult(feed, Collections.emptyList());
|
||||
} catch (Exception e) {
|
||||
String message = "Unable to refresh feed " + feed.getUrl() + " : " + e.getMessage();
|
||||
log.debug(e.getClass().getName() + " " + message, e);
|
||||
@@ -99,7 +101,7 @@ public class FeedRefreshWorker {
|
||||
feed.setMessage(message);
|
||||
feed.setDisabledUntil(refreshIntervalCalculator.onFetchError(feed));
|
||||
|
||||
return new FeedAndEntries(feed, Collections.emptyList());
|
||||
return new FeedRefreshWorkerResult(feed, Collections.emptyList());
|
||||
} finally {
|
||||
feedFetched.mark();
|
||||
}
|
||||
@@ -127,4 +129,10 @@ public class FeedRefreshWorker {
|
||||
}
|
||||
}
|
||||
|
||||
@Value
|
||||
public static class FeedRefreshWorkerResult {
|
||||
Feed feed;
|
||||
List<FeedEntry> entries;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.commafeed.backend.model.Feed;
|
||||
import com.commafeed.backend.model.FeedEntry;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class FetchedFeed {
|
||||
|
||||
private Feed feed = new Feed();
|
||||
private List<FeedEntry> entries = new ArrayList<>();
|
||||
|
||||
private String title;
|
||||
private String urlAfterRedirect;
|
||||
private long fetchDuration;
|
||||
|
||||
}
|
||||
@@ -53,7 +53,7 @@ public class FeedRefreshEngine implements Managed {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start() throws Exception {
|
||||
public void start() {
|
||||
Flowable<Feed> database = Flowable.fromCallable(() -> findNextUpdatableFeeds(getBatchSize(), getLastLoginThreshold()))
|
||||
.onErrorResumeNext(e -> {
|
||||
log.error("error while fetching next updatable feeds", e);
|
||||
@@ -114,7 +114,7 @@ public class FeedRefreshEngine implements Managed {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stop() throws Exception {
|
||||
public void stop() {
|
||||
flow.dispose();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,8 +45,8 @@ import com.commafeed.backend.dao.FeedSubscriptionDAO;
|
||||
import com.commafeed.backend.favicon.AbstractFaviconFetcher.Favicon;
|
||||
import com.commafeed.backend.feed.FeedEntryKeyword;
|
||||
import com.commafeed.backend.feed.FeedFetcher;
|
||||
import com.commafeed.backend.feed.FeedFetcher.FeedFetcherResult;
|
||||
import com.commafeed.backend.feed.FeedUtils;
|
||||
import com.commafeed.backend.feed.FetchedFeed;
|
||||
import com.commafeed.backend.model.Feed;
|
||||
import com.commafeed.backend.model.FeedCategory;
|
||||
import com.commafeed.backend.model.FeedEntry;
|
||||
@@ -244,10 +244,10 @@ public class FeedREST {
|
||||
url = StringUtils.trimToEmpty(url);
|
||||
url = prependHttp(url);
|
||||
try {
|
||||
FetchedFeed feed = feedFetcher.fetch(url, true, null, null, null, null);
|
||||
FeedFetcherResult feedFetcherResult = feedFetcher.fetch(url, true, null, null, null, null);
|
||||
info = new FeedInfo();
|
||||
info.setUrl(feed.getUrlAfterRedirect());
|
||||
info.setTitle(feed.getTitle());
|
||||
info.setUrl(feedFetcherResult.getUrlAfterRedirect());
|
||||
info.setTitle(feedFetcherResult.getTitle());
|
||||
|
||||
} catch (Exception e) {
|
||||
log.debug(e.getMessage(), e);
|
||||
|
||||
@@ -26,7 +26,7 @@ import com.codahale.metrics.annotation.Timed;
|
||||
import com.commafeed.CommaFeedConfiguration;
|
||||
import com.commafeed.backend.dao.FeedDAO;
|
||||
import com.commafeed.backend.feed.FeedParser;
|
||||
import com.commafeed.backend.feed.FetchedFeed;
|
||||
import com.commafeed.backend.feed.FeedParser.FeedParserResult;
|
||||
import com.commafeed.backend.model.Feed;
|
||||
import com.commafeed.backend.service.FeedRefreshEngine;
|
||||
import com.google.common.base.Preconditions;
|
||||
@@ -100,8 +100,8 @@ public class PubSubHubbubCallbackREST {
|
||||
return Response.status(Status.BAD_REQUEST).entity("empty body received").build();
|
||||
}
|
||||
|
||||
FetchedFeed fetchedFeed = parser.parse(null, bytes);
|
||||
String topic = fetchedFeed.getFeed().getPushTopic();
|
||||
FeedParserResult feedParserResult = parser.parse(null, bytes);
|
||||
String topic = feedParserResult.getFeed().getPushTopic();
|
||||
if (StringUtils.isBlank(topic)) {
|
||||
return Response.status(Status.BAD_REQUEST).entity("empty topic received").build();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user