providers can now return multiple urls

This commit is contained in:
Athou
2025-07-21 16:46:40 +02:00
parent 085a3cbb50
commit 23a91aab12
6 changed files with 36 additions and 77 deletions

View File

@@ -98,14 +98,11 @@ public class FeedFetcher {
} }
private static String extractFeedUrl(List<FeedURLProvider> urlProviders, String url, String urlContent) { private static String extractFeedUrl(List<FeedURLProvider> urlProviders, String url, String urlContent) {
for (FeedURLProvider urlProvider : urlProviders) { return urlProviders.stream()
String feedUrl = urlProvider.get(url, urlContent); .flatMap(provider -> provider.get(url, urlContent).stream())
if (feedUrl != null) { .filter(StringUtils::isNotBlank)
return feedUrl; .findFirst()
} .orElse(null);
}
return null;
} }
public record FeedFetcherResult(FeedParserResult feed, String urlAfterRedirect, String lastModifiedHeader, String lastETagHeader, public record FeedFetcherResult(FeedParserResult feed, String urlAfterRedirect, String lastModifiedHeader, String lastETagHeader,

View File

@@ -1,10 +1,12 @@
package com.commafeed.backend.urlprovider; package com.commafeed.backend.urlprovider;
import java.util.List;
/** /**
* Tries to find a feed url given the url and page content * Tries to find a feed url given the url and page content
*/ */
public interface FeedURLProvider { public interface FeedURLProvider {
String get(String url, String urlContent); List<String> get(String url, String urlContent);
} }

View File

@@ -1,31 +1,25 @@
package com.commafeed.backend.urlprovider; package com.commafeed.backend.urlprovider;
import java.util.List;
import java.util.stream.Stream;
import jakarta.inject.Singleton; import jakarta.inject.Singleton;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
@Singleton @Singleton
public class InPageReferenceFeedURLProvider implements FeedURLProvider { public class InPageReferenceFeedURLProvider implements FeedURLProvider {
@Override @Override
public String get(String url, String urlContent) { public List<String> get(String url, String urlContent) {
String foundUrl = null;
Document doc = Jsoup.parse(urlContent, url); Document doc = Jsoup.parse(urlContent, url);
String root = doc.children().get(0).tagName(); if (!"html".equals(doc.children().get(0).tagName())) {
if ("html".equals(root)) { return List.of();
Elements atom = doc.select("link[type=application/atom+xml]");
Elements rss = doc.select("link[type=application/rss+xml]");
if (!atom.isEmpty()) {
foundUrl = atom.get(0).attr("abs:href");
} else if (!rss.isEmpty()) {
foundUrl = rss.get(0).attr("abs:href");
}
} }
return Stream.concat(doc.select("link[type=application/atom+xml]").stream(), doc.select("link[type=application/rss+xml]").stream())
return foundUrl; .map(node -> node.attr("abs:href"))
.toList();
} }
} }

View File

@@ -1,5 +1,7 @@
package com.commafeed.backend.urlprovider; package com.commafeed.backend.urlprovider;
import java.util.List;
import jakarta.inject.Singleton; import jakarta.inject.Singleton;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@@ -17,12 +19,12 @@ public class YoutubeFeedURLProvider implements FeedURLProvider {
private static final String REPLACEMENT_PREFIX = "https://www.youtube.com/feeds/videos.xml?channel_id="; private static final String REPLACEMENT_PREFIX = "https://www.youtube.com/feeds/videos.xml?channel_id=";
@Override @Override
public String get(String url, String urlContent) { public List<String> get(String url, String urlContent) {
if (!StringUtils.startsWithIgnoreCase(url, PREFIX)) { if (!StringUtils.startsWithIgnoreCase(url, PREFIX)) {
return null; return List.of();
} }
return REPLACEMENT_PREFIX + url.substring(PREFIX.length()); return List.of(REPLACEMENT_PREFIX + url.substring(PREFIX.length()));
} }
} }

View File

@@ -1,5 +1,7 @@
package com.commafeed.backend.urlprovider; package com.commafeed.backend.urlprovider;
import java.util.List;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@@ -8,41 +10,7 @@ class InPageReferenceFeedURLProviderTest {
private final InPageReferenceFeedURLProvider provider = new InPageReferenceFeedURLProvider(); private final InPageReferenceFeedURLProvider provider = new InPageReferenceFeedURLProvider();
@Test @Test
void extractsAtomFeedURL() { void extractUrls() {
String url = "http://example.com";
String html = """
<html>
<head>
<link type="application/atom+xml" href="/feed.atom">
</head>
<body>
</body>
</html>""";
String result = provider.get(url, html);
Assertions.assertEquals("http://example.com/feed.atom", result);
}
@Test
void extractsRSSFeedURL() {
String url = "http://example.com";
String html = """
<html>
<head>
<link type="application/rss+xml" href="/feed.rss">
</head>
<body>
</body>
</html>""";
String result = provider.get(url, html);
Assertions.assertEquals("http://example.com/feed.rss", result);
}
@Test
void prefersAtomOverRSS() {
String url = "http://example.com"; String url = "http://example.com";
String html = """ String html = """
<html> <html>
@@ -54,26 +22,22 @@ class InPageReferenceFeedURLProviderTest {
</body> </body>
</html>"""; </html>""";
String result = provider.get(url, html); Assertions.assertIterableEquals(List.of("http://example.com/feed.atom", "http://example.com/feed.rss"), provider.get(url, html));
Assertions.assertEquals("http://example.com/feed.atom", result);
} }
@Test @Test
void returnsNullForNonHtmlContent() { void returnsEmptyListForNonHtmlContent() {
String url = "http://example.com"; String url = "http://example.com";
String content = """ String html = """
<?xml version="1.0"?> <?xml version="1.0"?>
<feed></feed> <feed></feed>
</xml>"""; </xml>""";
String result = provider.get(url, content); Assertions.assertTrue(provider.get(url, html).isEmpty());
Assertions.assertNull(result);
} }
@Test @Test
void returnsNullForHtmlWithoutFeedLinks() { void returnsEmptyListForHtmlWithoutFeedLinks() {
String url = "http://example.com"; String url = "http://example.com";
String html = """ String html = """
<html> <html>
@@ -84,8 +48,6 @@ class InPageReferenceFeedURLProviderTest {
</body> </body>
</html>"""; </html>""";
String result = provider.get(url, html); Assertions.assertTrue(provider.get(url, html).isEmpty());
Assertions.assertNull(result);
} }
} }

View File

@@ -1,5 +1,7 @@
package com.commafeed.backend.urlprovider; package com.commafeed.backend.urlprovider;
import java.util.List;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@@ -9,14 +11,14 @@ class YoutubeFeedURLProviderTest {
@Test @Test
void matchesYoutubeChannelURL() { void matchesYoutubeChannelURL() {
Assertions.assertEquals("https://www.youtube.com/feeds/videos.xml?channel_id=abc", Assertions.assertIterableEquals(List.of("https://www.youtube.com/feeds/videos.xml?channel_id=abc"),
provider.get("https://www.youtube.com/channel/abc", null)); provider.get("https://www.youtube.com/channel/abc", null));
} }
@Test @Test
void doesNotmatchYoutubeChannelURL() { void doesNotmatchYoutubeChannelURL() {
Assertions.assertNull(provider.get("https://www.anothersite.com/channel/abc", null)); Assertions.assertTrue(provider.get("https://www.anothersite.com/channel/abc", null).isEmpty());
Assertions.assertNull(provider.get("https://www.youtube.com/user/abc", null)); Assertions.assertTrue(provider.get("https://www.youtube.com/user/abc", null).isEmpty());
} }
} }