From 9c628a8f537fcebe5616237cc55a4d5577ba64b0 Mon Sep 17 00:00:00 2001 From: Athou Date: Mon, 1 May 2023 09:25:44 +0200 Subject: [PATCH] make each step of feed fetching return its own model --- .../com/commafeed/backend/HttpGetter.java | 20 ++------- .../backend/feed/FeedAndEntries.java | 14 ------- .../commafeed/backend/feed/FeedFetcher.java | 36 +++++++++++----- .../commafeed/backend/feed/FeedParser.java | 28 +++++++++---- .../feed/FeedRefreshIntervalCalculator.java | 4 +- .../backend/feed/FeedRefreshUpdater.java | 3 ++ .../backend/feed/FeedRefreshWorker.java | 42 +++++++++++-------- .../commafeed/backend/feed/FetchedFeed.java | 23 ---------- .../backend/service/FeedRefreshEngine.java | 4 +- .../commafeed/frontend/resource/FeedREST.java | 8 ++-- .../resource/PubSubHubbubCallbackREST.java | 6 +-- 11 files changed, 89 insertions(+), 99 deletions(-) delete mode 100644 commafeed-server/src/main/java/com/commafeed/backend/feed/FeedAndEntries.java delete mode 100644 commafeed-server/src/main/java/com/commafeed/backend/feed/FetchedFeed.java diff --git a/commafeed-server/src/main/java/com/commafeed/backend/HttpGetter.java b/commafeed-server/src/main/java/com/commafeed/backend/HttpGetter.java index eae5093a..b453bd81 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/HttpGetter.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/HttpGetter.java @@ -14,7 +14,6 @@ import org.apache.http.HttpHeaders; import org.apache.http.HttpHost; import org.apache.http.HttpResponseInterceptor; import org.apache.http.HttpStatus; -import org.apache.http.client.ClientProtocolException; import org.apache.http.client.HttpResponseException; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; @@ -59,7 +58,7 @@ public class HttpGetter { } } - public HttpResult getBinary(String url, int timeout) throws ClientProtocolException, IOException, NotModifiedException { + public HttpResult getBinary(String url, int timeout) throws IOException, NotModifiedException { return getBinary(url, null, null, timeout); } @@ -71,14 +70,10 @@ public class HttpGetter { * header we got last time we queried that url, or null * @param eTag * header we got last time we queried that url, or null - * @return - * @throws ClientProtocolException - * @throws IOException * @throws NotModifiedException * if the url hasn't changed since we asked for it last time */ - public HttpResult getBinary(String url, String lastModified, String eTag, int timeout) - throws ClientProtocolException, IOException, NotModifiedException { + public HttpResult getBinary(String url, String lastModified, String eTag, int timeout) throws IOException, NotModifiedException { HttpResult result = null; long start = System.currentTimeMillis(); @@ -175,13 +170,6 @@ public class HttpGetter { return builder.build(); } - public static void main(String[] args) throws Exception { - CommaFeedConfiguration config = new CommaFeedConfiguration(); - HttpGetter getter = new HttpGetter(config); - HttpResult result = getter.getBinary("https://sourceforge.net/projects/mpv-player-windows/rss", 30000); - System.out.println(new String(result.content)); - } - @Getter public static class NotModifiedException extends Exception { private static final long serialVersionUID = 1L; @@ -189,12 +177,12 @@ public class HttpGetter { /** * if the value of this header changed, this is its new value */ - private String newLastModifiedHeader; + private final String newLastModifiedHeader; /** * if the value of this header changed, this is its new value */ - private String newEtagHeader; + private final String newEtagHeader; public NotModifiedException(String message) { this(message, null, null); diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedAndEntries.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedAndEntries.java deleted file mode 100644 index 20d77df0..00000000 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedAndEntries.java +++ /dev/null @@ -1,14 +0,0 @@ -package com.commafeed.backend.feed; - -import java.util.List; - -import com.commafeed.backend.model.Feed; -import com.commafeed.backend.model.FeedEntry; - -import lombok.Value; - -@Value -public class FeedAndEntries { - Feed feed; - List entries; -} diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedFetcher.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedFetcher.java index 8c9052e5..450f6963 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedFetcher.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedFetcher.java @@ -2,6 +2,7 @@ package com.commafeed.backend.feed; import java.io.IOException; import java.util.Date; +import java.util.List; import java.util.Set; import javax.inject.Inject; @@ -13,13 +14,19 @@ import org.apache.commons.codec.digest.DigestUtils; import com.commafeed.backend.HttpGetter; import com.commafeed.backend.HttpGetter.HttpResult; import com.commafeed.backend.HttpGetter.NotModifiedException; +import com.commafeed.backend.feed.FeedParser.FeedParserResult; import com.commafeed.backend.model.Feed; +import com.commafeed.backend.model.FeedEntry; import com.commafeed.backend.urlprovider.FeedURLProvider; import com.rometools.rome.io.FeedException; import lombok.RequiredArgsConstructor; +import lombok.Value; import lombok.extern.slf4j.Slf4j; +/** + * Fetches a feed then parses it + */ @Slf4j @RequiredArgsConstructor(onConstructor = @__({ @Inject })) @Singleton @@ -29,18 +36,18 @@ public class FeedFetcher { private final HttpGetter getter; private final Set urlProviders; - public FetchedFeed fetch(String feedUrl, boolean extractFeedUrlFromHtml, String lastModified, String eTag, Date lastPublishedDate, + public FeedFetcherResult fetch(String feedUrl, boolean extractFeedUrlFromHtml, String lastModified, String eTag, Date lastPublishedDate, String lastContentHash) throws FeedException, IOException, NotModifiedException { log.debug("Fetching feed {}", feedUrl); - FetchedFeed fetchedFeed = null; int timeout = 20000; HttpResult result = getter.getBinary(feedUrl, lastModified, eTag, timeout); byte[] content = result.getContent(); + FeedParserResult parserResult; try { - fetchedFeed = parser.parse(result.getUrlAfterRedirect(), content); + parserResult = parser.parse(result.getUrlAfterRedirect(), content); } catch (FeedException e) { if (extractFeedUrlFromHtml) { String extractedUrl = extractFeedUrl(urlProviders, feedUrl, StringUtils.newStringUtf8(result.getContent())); @@ -49,7 +56,7 @@ public class FeedFetcher { result = getter.getBinary(extractedUrl, lastModified, eTag, timeout); content = result.getContent(); - fetchedFeed = parser.parse(result.getUrlAfterRedirect(), content); + parserResult = parser.parse(result.getUrlAfterRedirect(), content); } else { throw e; } @@ -73,21 +80,20 @@ public class FeedFetcher { etagHeaderValueChanged ? result.getETag() : null); } - if (lastPublishedDate != null && fetchedFeed.getFeed().getLastPublishedDate() != null - && lastPublishedDate.getTime() == fetchedFeed.getFeed().getLastPublishedDate().getTime()) { + if (lastPublishedDate != null && parserResult.getFeed().getLastPublishedDate() != null + && lastPublishedDate.getTime() == parserResult.getFeed().getLastPublishedDate().getTime()) { log.debug("publishedDate not modified: {}", feedUrl); throw new NotModifiedException("publishedDate not modified", lastModifiedHeaderValueChanged ? result.getLastModifiedSince() : null, etagHeaderValueChanged ? result.getETag() : null); } - Feed feed = fetchedFeed.getFeed(); + Feed feed = parserResult.getFeed(); feed.setLastModifiedHeader(result.getLastModifiedSince()); feed.setEtagHeader(FeedUtils.truncate(result.getETag(), 255)); feed.setLastContentHash(hash); - fetchedFeed.setFetchDuration(result.getDuration()); - fetchedFeed.setUrlAfterRedirect(result.getUrlAfterRedirect()); - return fetchedFeed; + return new FeedFetcherResult(parserResult.getFeed(), parserResult.getEntries(), parserResult.getTitle(), + result.getUrlAfterRedirect(), result.getDuration()); } private static String extractFeedUrl(Set urlProviders, String url, String urlContent) { @@ -100,4 +106,14 @@ public class FeedFetcher { return null; } + + @Value + public static class FeedFetcherResult { + Feed feed; + List entries; + String title; + String urlAfterRedirect; + long fetchDuration; + } + } diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedParser.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedParser.java index 54ea77a8..4b2b0456 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedParser.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedParser.java @@ -3,6 +3,7 @@ package com.commafeed.backend.feed; import java.io.StringReader; import java.nio.charset.Charset; import java.text.DateFormat; +import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.stream.Collectors; @@ -37,8 +38,12 @@ import com.rometools.rome.io.SyndFeedInput; import lombok.Data; import lombok.RequiredArgsConstructor; +import lombok.Value; import lombok.extern.slf4j.Slf4j; +/** + * Parses raw xml as a Feed object + */ @Slf4j @RequiredArgsConstructor(onConstructor = @__({ @Inject })) @Singleton @@ -50,10 +55,7 @@ public class FeedParser { private static final Date START = new Date(86400000); private static final Date END = new Date(1000L * Integer.MAX_VALUE - 86400000); - public FetchedFeed parse(String feedUrl, byte[] xml) throws FeedException { - FetchedFeed fetchedFeed = new FetchedFeed(); - Feed feed = fetchedFeed.getFeed(); - List entries = fetchedFeed.getEntries(); + public FeedParserResult parse(String feedUrl, byte[] xml) throws FeedException { try { Charset encoding = FeedUtils.guessEncoding(xml); @@ -63,17 +65,19 @@ public class FeedParser { } xmlString = FeedUtils.replaceHtmlEntitiesWithNumericEntities(xmlString); InputSource source = new InputSource(new StringReader(xmlString)); + SyndFeed rss = new SyndFeedInput().build(source); handleForeignMarkup(rss); - fetchedFeed.setTitle(rss.getTitle()); + String title = rss.getTitle(); + Feed feed = new Feed(); feed.setPushHub(findHub(rss)); feed.setPushTopic(findSelf(rss)); feed.setUrl(feedUrl); feed.setLink(rss.getLink()); - List items = rss.getEntries(); - for (SyndEntry item : items) { + List entries = new ArrayList<>(); + for (SyndEntry item : rss.getEntries()) { FeedEntry entry = new FeedEntry(); String guid = item.getUri(); @@ -121,6 +125,7 @@ public class FeedParser { entries.add(entry); } + Date lastEntryDate = null; Date publishedDate = validateDate(rss.getPublishedDate(), false); if (!entries.isEmpty()) { @@ -133,10 +138,10 @@ public class FeedParser { feed.setAverageEntryInterval(FeedUtils.averageTimeBetweenEntries(entries)); feed.setLastEntryDate(lastEntryDate); + return new FeedParserResult(feed, entries, title); } catch (Exception e) { throw new FeedException(String.format("Could not parse feed from %s : %s", feedUrl, e.getMessage()), e); } - return fetchedFeed; } /** @@ -273,4 +278,11 @@ public class FeedParser { } } + @Value + public static class FeedParserResult { + Feed feed; + List entries; + String title; + } + } diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshIntervalCalculator.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshIntervalCalculator.java index f630722e..300ceaa7 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshIntervalCalculator.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshIntervalCalculator.java @@ -22,9 +22,9 @@ public class FeedRefreshIntervalCalculator { this.refreshIntervalMinutes = config.getApplicationSettings().getRefreshIntervalMinutes(); } - public Date onFetchSuccess(FetchedFeed fetchedFeed) { + public Date onFetchSuccess(Feed feed) { Date defaultRefreshInterval = getDefaultRefreshInterval(); - return heavyLoad ? computeRefreshIntervalForHeavyLoad(fetchedFeed.getFeed(), defaultRefreshInterval) : defaultRefreshInterval; + return heavyLoad ? computeRefreshIntervalForHeavyLoad(feed, defaultRefreshInterval) : defaultRefreshInterval; } public Date onFeedNotModified(Feed feed) { diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshUpdater.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshUpdater.java index 6c67e815..174e1d29 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshUpdater.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshUpdater.java @@ -39,6 +39,9 @@ import io.dropwizard.lifecycle.Managed; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; +/** + * Updates the feed in the database and inserts new entries + */ @Slf4j @Singleton public class FeedRefreshUpdater implements Managed { diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshWorker.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshWorker.java index 4a208b05..85ccbde4 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshWorker.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedRefreshWorker.java @@ -15,13 +15,15 @@ import com.codahale.metrics.Meter; import com.codahale.metrics.MetricRegistry; import com.commafeed.CommaFeedConfiguration; import com.commafeed.backend.HttpGetter.NotModifiedException; +import com.commafeed.backend.feed.FeedFetcher.FeedFetcherResult; import com.commafeed.backend.model.Feed; import com.commafeed.backend.model.FeedEntry; +import lombok.Value; import lombok.extern.slf4j.Slf4j; /** - * Calls {@link FeedFetcher} and handles its outcome + * Calls {@link FeedFetcher} and updates the Feed object, but does not update the database ({@link FeedRefreshUpdater} does that) */ @Slf4j @Singleton @@ -42,39 +44,39 @@ public class FeedRefreshWorker { } - public FeedAndEntries update(Feed feed) { + public FeedRefreshWorkerResult update(Feed feed) { try { String url = Optional.ofNullable(feed.getUrlAfterRedirect()).orElse(feed.getUrl()); - FetchedFeed fetchedFeed = fetcher.fetch(url, false, feed.getLastModifiedHeader(), feed.getEtagHeader(), + FeedFetcherResult feedFetcherResult = fetcher.fetch(url, false, feed.getLastModifiedHeader(), feed.getEtagHeader(), feed.getLastPublishedDate(), feed.getLastContentHash()); // stops here if NotModifiedException or any other exception is thrown - List entries = fetchedFeed.getEntries(); + List entries = feedFetcherResult.getEntries(); Integer maxFeedCapacity = config.getApplicationSettings().getMaxFeedCapacity(); if (maxFeedCapacity > 0) { entries = entries.stream().limit(maxFeedCapacity).collect(Collectors.toList()); } - String urlAfterRedirect = fetchedFeed.getUrlAfterRedirect(); + String urlAfterRedirect = feedFetcherResult.getUrlAfterRedirect(); if (StringUtils.equals(url, urlAfterRedirect)) { urlAfterRedirect = null; } feed.setUrlAfterRedirect(urlAfterRedirect); - feed.setLink(fetchedFeed.getFeed().getLink()); - feed.setLastModifiedHeader(fetchedFeed.getFeed().getLastModifiedHeader()); - feed.setEtagHeader(fetchedFeed.getFeed().getEtagHeader()); - feed.setLastContentHash(fetchedFeed.getFeed().getLastContentHash()); - feed.setLastPublishedDate(fetchedFeed.getFeed().getLastPublishedDate()); - feed.setAverageEntryInterval(fetchedFeed.getFeed().getAverageEntryInterval()); - feed.setLastEntryDate(fetchedFeed.getFeed().getLastEntryDate()); + feed.setLink(feedFetcherResult.getFeed().getLink()); + feed.setLastModifiedHeader(feedFetcherResult.getFeed().getLastModifiedHeader()); + feed.setEtagHeader(feedFetcherResult.getFeed().getEtagHeader()); + feed.setLastContentHash(feedFetcherResult.getFeed().getLastContentHash()); + feed.setLastPublishedDate(feedFetcherResult.getFeed().getLastPublishedDate()); + feed.setAverageEntryInterval(feedFetcherResult.getFeed().getAverageEntryInterval()); + feed.setLastEntryDate(feedFetcherResult.getFeed().getLastEntryDate()); feed.setErrorCount(0); feed.setMessage(null); - feed.setDisabledUntil(refreshIntervalCalculator.onFetchSuccess(fetchedFeed)); + feed.setDisabledUntil(refreshIntervalCalculator.onFetchSuccess(feedFetcherResult.getFeed())); - handlePubSub(feed, fetchedFeed.getFeed()); + handlePubSub(feed, feedFetcherResult.getFeed()); - return new FeedAndEntries(feed, entries); + return new FeedRefreshWorkerResult(feed, entries); } catch (NotModifiedException e) { log.debug("Feed not modified : {} - {}", feed.getUrl(), e.getMessage()); @@ -90,7 +92,7 @@ public class FeedRefreshWorker { feed.setEtagHeader(e.getNewEtagHeader()); } - return new FeedAndEntries(feed, Collections.emptyList()); + return new FeedRefreshWorkerResult(feed, Collections.emptyList()); } catch (Exception e) { String message = "Unable to refresh feed " + feed.getUrl() + " : " + e.getMessage(); log.debug(e.getClass().getName() + " " + message, e); @@ -99,7 +101,7 @@ public class FeedRefreshWorker { feed.setMessage(message); feed.setDisabledUntil(refreshIntervalCalculator.onFetchError(feed)); - return new FeedAndEntries(feed, Collections.emptyList()); + return new FeedRefreshWorkerResult(feed, Collections.emptyList()); } finally { feedFetched.mark(); } @@ -127,4 +129,10 @@ public class FeedRefreshWorker { } } + @Value + public static class FeedRefreshWorkerResult { + Feed feed; + List entries; + } + } diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FetchedFeed.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FetchedFeed.java deleted file mode 100644 index 6de190d3..00000000 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FetchedFeed.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.commafeed.backend.feed; - -import java.util.ArrayList; -import java.util.List; - -import com.commafeed.backend.model.Feed; -import com.commafeed.backend.model.FeedEntry; - -import lombok.Getter; -import lombok.Setter; - -@Getter -@Setter -public class FetchedFeed { - - private Feed feed = new Feed(); - private List entries = new ArrayList<>(); - - private String title; - private String urlAfterRedirect; - private long fetchDuration; - -} diff --git a/commafeed-server/src/main/java/com/commafeed/backend/service/FeedRefreshEngine.java b/commafeed-server/src/main/java/com/commafeed/backend/service/FeedRefreshEngine.java index d92a7484..c570fcfc 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/service/FeedRefreshEngine.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/service/FeedRefreshEngine.java @@ -53,7 +53,7 @@ public class FeedRefreshEngine implements Managed { } @Override - public void start() throws Exception { + public void start() { Flowable database = Flowable.fromCallable(() -> findNextUpdatableFeeds(getBatchSize(), getLastLoginThreshold())) .onErrorResumeNext(e -> { log.error("error while fetching next updatable feeds", e); @@ -114,7 +114,7 @@ public class FeedRefreshEngine implements Managed { } @Override - public void stop() throws Exception { + public void stop() { flow.dispose(); } } diff --git a/commafeed-server/src/main/java/com/commafeed/frontend/resource/FeedREST.java b/commafeed-server/src/main/java/com/commafeed/frontend/resource/FeedREST.java index dfc48f54..ba1eac95 100644 --- a/commafeed-server/src/main/java/com/commafeed/frontend/resource/FeedREST.java +++ b/commafeed-server/src/main/java/com/commafeed/frontend/resource/FeedREST.java @@ -45,8 +45,8 @@ import com.commafeed.backend.dao.FeedSubscriptionDAO; import com.commafeed.backend.favicon.AbstractFaviconFetcher.Favicon; import com.commafeed.backend.feed.FeedEntryKeyword; import com.commafeed.backend.feed.FeedFetcher; +import com.commafeed.backend.feed.FeedFetcher.FeedFetcherResult; import com.commafeed.backend.feed.FeedUtils; -import com.commafeed.backend.feed.FetchedFeed; import com.commafeed.backend.model.Feed; import com.commafeed.backend.model.FeedCategory; import com.commafeed.backend.model.FeedEntry; @@ -244,10 +244,10 @@ public class FeedREST { url = StringUtils.trimToEmpty(url); url = prependHttp(url); try { - FetchedFeed feed = feedFetcher.fetch(url, true, null, null, null, null); + FeedFetcherResult feedFetcherResult = feedFetcher.fetch(url, true, null, null, null, null); info = new FeedInfo(); - info.setUrl(feed.getUrlAfterRedirect()); - info.setTitle(feed.getTitle()); + info.setUrl(feedFetcherResult.getUrlAfterRedirect()); + info.setTitle(feedFetcherResult.getTitle()); } catch (Exception e) { log.debug(e.getMessage(), e); diff --git a/commafeed-server/src/main/java/com/commafeed/frontend/resource/PubSubHubbubCallbackREST.java b/commafeed-server/src/main/java/com/commafeed/frontend/resource/PubSubHubbubCallbackREST.java index 34d2845a..e40f9e35 100644 --- a/commafeed-server/src/main/java/com/commafeed/frontend/resource/PubSubHubbubCallbackREST.java +++ b/commafeed-server/src/main/java/com/commafeed/frontend/resource/PubSubHubbubCallbackREST.java @@ -26,7 +26,7 @@ import com.codahale.metrics.annotation.Timed; import com.commafeed.CommaFeedConfiguration; import com.commafeed.backend.dao.FeedDAO; import com.commafeed.backend.feed.FeedParser; -import com.commafeed.backend.feed.FetchedFeed; +import com.commafeed.backend.feed.FeedParser.FeedParserResult; import com.commafeed.backend.model.Feed; import com.commafeed.backend.service.FeedRefreshEngine; import com.google.common.base.Preconditions; @@ -100,8 +100,8 @@ public class PubSubHubbubCallbackREST { return Response.status(Status.BAD_REQUEST).entity("empty body received").build(); } - FetchedFeed fetchedFeed = parser.parse(null, bytes); - String topic = fetchedFeed.getFeed().getPushTopic(); + FeedParserResult feedParserResult = parser.parse(null, bytes); + String topic = feedParserResult.getFeed().getPushTopic(); if (StringUtils.isBlank(topic)) { return Response.status(Status.BAD_REQUEST).entity("empty topic received").build(); }