From 23a91aab124cdef64ffc936c675a51ec783cb674 Mon Sep 17 00:00:00 2001 From: Athou Date: Mon, 21 Jul 2025 16:46:40 +0200 Subject: [PATCH] providers can now return multiple urls --- .../commafeed/backend/feed/FeedFetcher.java | 13 ++--- .../backend/urlprovider/FeedURLProvider.java | 4 +- .../InPageReferenceFeedURLProvider.java | 24 +++----- .../urlprovider/YoutubeFeedURLProvider.java | 8 ++- .../InPageReferenceFeedURLProviderTest.java | 56 +++---------------- .../YoutubeFeedURLProviderTest.java | 8 ++- 6 files changed, 36 insertions(+), 77 deletions(-) diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedFetcher.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedFetcher.java index 3b98d4f5..010e7818 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedFetcher.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedFetcher.java @@ -98,14 +98,11 @@ public class FeedFetcher { } private static String extractFeedUrl(List urlProviders, String url, String urlContent) { - for (FeedURLProvider urlProvider : urlProviders) { - String feedUrl = urlProvider.get(url, urlContent); - if (feedUrl != null) { - return feedUrl; - } - } - - return null; + return urlProviders.stream() + .flatMap(provider -> provider.get(url, urlContent).stream()) + .filter(StringUtils::isNotBlank) + .findFirst() + .orElse(null); } public record FeedFetcherResult(FeedParserResult feed, String urlAfterRedirect, String lastModifiedHeader, String lastETagHeader, diff --git a/commafeed-server/src/main/java/com/commafeed/backend/urlprovider/FeedURLProvider.java b/commafeed-server/src/main/java/com/commafeed/backend/urlprovider/FeedURLProvider.java index 91cf9c4c..c3983f52 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/urlprovider/FeedURLProvider.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/urlprovider/FeedURLProvider.java @@ -1,10 +1,12 @@ package com.commafeed.backend.urlprovider; +import java.util.List; + /** * Tries to find a feed url given the url and page content */ public interface FeedURLProvider { - String get(String url, String urlContent); + List get(String url, String urlContent); } diff --git a/commafeed-server/src/main/java/com/commafeed/backend/urlprovider/InPageReferenceFeedURLProvider.java b/commafeed-server/src/main/java/com/commafeed/backend/urlprovider/InPageReferenceFeedURLProvider.java index b7102f87..4a5a239b 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/urlprovider/InPageReferenceFeedURLProvider.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/urlprovider/InPageReferenceFeedURLProvider.java @@ -1,31 +1,25 @@ package com.commafeed.backend.urlprovider; +import java.util.List; +import java.util.stream.Stream; + import jakarta.inject.Singleton; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; -import org.jsoup.select.Elements; @Singleton public class InPageReferenceFeedURLProvider implements FeedURLProvider { @Override - public String get(String url, String urlContent) { - String foundUrl = null; - + public List get(String url, String urlContent) { Document doc = Jsoup.parse(urlContent, url); - String root = doc.children().get(0).tagName(); - if ("html".equals(root)) { - Elements atom = doc.select("link[type=application/atom+xml]"); - Elements rss = doc.select("link[type=application/rss+xml]"); - if (!atom.isEmpty()) { - foundUrl = atom.get(0).attr("abs:href"); - } else if (!rss.isEmpty()) { - foundUrl = rss.get(0).attr("abs:href"); - } + if (!"html".equals(doc.children().get(0).tagName())) { + return List.of(); } - - return foundUrl; + return Stream.concat(doc.select("link[type=application/atom+xml]").stream(), doc.select("link[type=application/rss+xml]").stream()) + .map(node -> node.attr("abs:href")) + .toList(); } } diff --git a/commafeed-server/src/main/java/com/commafeed/backend/urlprovider/YoutubeFeedURLProvider.java b/commafeed-server/src/main/java/com/commafeed/backend/urlprovider/YoutubeFeedURLProvider.java index cc7c5a4e..85e8a866 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/urlprovider/YoutubeFeedURLProvider.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/urlprovider/YoutubeFeedURLProvider.java @@ -1,5 +1,7 @@ package com.commafeed.backend.urlprovider; +import java.util.List; + import jakarta.inject.Singleton; import org.apache.commons.lang3.StringUtils; @@ -17,12 +19,12 @@ public class YoutubeFeedURLProvider implements FeedURLProvider { private static final String REPLACEMENT_PREFIX = "https://www.youtube.com/feeds/videos.xml?channel_id="; @Override - public String get(String url, String urlContent) { + public List get(String url, String urlContent) { if (!StringUtils.startsWithIgnoreCase(url, PREFIX)) { - return null; + return List.of(); } - return REPLACEMENT_PREFIX + url.substring(PREFIX.length()); + return List.of(REPLACEMENT_PREFIX + url.substring(PREFIX.length())); } } diff --git a/commafeed-server/src/test/java/com/commafeed/backend/urlprovider/InPageReferenceFeedURLProviderTest.java b/commafeed-server/src/test/java/com/commafeed/backend/urlprovider/InPageReferenceFeedURLProviderTest.java index f6169c30..6286afe2 100644 --- a/commafeed-server/src/test/java/com/commafeed/backend/urlprovider/InPageReferenceFeedURLProviderTest.java +++ b/commafeed-server/src/test/java/com/commafeed/backend/urlprovider/InPageReferenceFeedURLProviderTest.java @@ -1,5 +1,7 @@ package com.commafeed.backend.urlprovider; +import java.util.List; + import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -8,41 +10,7 @@ class InPageReferenceFeedURLProviderTest { private final InPageReferenceFeedURLProvider provider = new InPageReferenceFeedURLProvider(); @Test - void extractsAtomFeedURL() { - String url = "http://example.com"; - String html = """ - - - - - - - """; - - String result = provider.get(url, html); - - Assertions.assertEquals("http://example.com/feed.atom", result); - } - - @Test - void extractsRSSFeedURL() { - String url = "http://example.com"; - String html = """ - - - - - - - """; - - String result = provider.get(url, html); - - Assertions.assertEquals("http://example.com/feed.rss", result); - } - - @Test - void prefersAtomOverRSS() { + void extractUrls() { String url = "http://example.com"; String html = """ @@ -54,26 +22,22 @@ class InPageReferenceFeedURLProviderTest { """; - String result = provider.get(url, html); - - Assertions.assertEquals("http://example.com/feed.atom", result); + Assertions.assertIterableEquals(List.of("http://example.com/feed.atom", "http://example.com/feed.rss"), provider.get(url, html)); } @Test - void returnsNullForNonHtmlContent() { + void returnsEmptyListForNonHtmlContent() { String url = "http://example.com"; - String content = """ + String html = """ """; - String result = provider.get(url, content); - - Assertions.assertNull(result); + Assertions.assertTrue(provider.get(url, html).isEmpty()); } @Test - void returnsNullForHtmlWithoutFeedLinks() { + void returnsEmptyListForHtmlWithoutFeedLinks() { String url = "http://example.com"; String html = """ @@ -84,8 +48,6 @@ class InPageReferenceFeedURLProviderTest { """; - String result = provider.get(url, html); - - Assertions.assertNull(result); + Assertions.assertTrue(provider.get(url, html).isEmpty()); } } \ No newline at end of file diff --git a/commafeed-server/src/test/java/com/commafeed/backend/urlprovider/YoutubeFeedURLProviderTest.java b/commafeed-server/src/test/java/com/commafeed/backend/urlprovider/YoutubeFeedURLProviderTest.java index 0788e7dc..fec3778a 100644 --- a/commafeed-server/src/test/java/com/commafeed/backend/urlprovider/YoutubeFeedURLProviderTest.java +++ b/commafeed-server/src/test/java/com/commafeed/backend/urlprovider/YoutubeFeedURLProviderTest.java @@ -1,5 +1,7 @@ package com.commafeed.backend.urlprovider; +import java.util.List; + import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -9,14 +11,14 @@ class YoutubeFeedURLProviderTest { @Test void matchesYoutubeChannelURL() { - Assertions.assertEquals("https://www.youtube.com/feeds/videos.xml?channel_id=abc", + Assertions.assertIterableEquals(List.of("https://www.youtube.com/feeds/videos.xml?channel_id=abc"), provider.get("https://www.youtube.com/channel/abc", null)); } @Test void doesNotmatchYoutubeChannelURL() { - Assertions.assertNull(provider.get("https://www.anothersite.com/channel/abc", null)); - Assertions.assertNull(provider.get("https://www.youtube.com/user/abc", null)); + Assertions.assertTrue(provider.get("https://www.anothersite.com/channel/abc", null).isEmpty()); + Assertions.assertTrue(provider.get("https://www.youtube.com/user/abc", null).isEmpty()); } } \ No newline at end of file