forked from Archives/Athou_commafeed
providers can now return multiple urls
This commit is contained in:
@@ -98,14 +98,11 @@ public class FeedFetcher {
|
||||
}
|
||||
|
||||
private static String extractFeedUrl(List<FeedURLProvider> urlProviders, String url, String urlContent) {
|
||||
for (FeedURLProvider urlProvider : urlProviders) {
|
||||
String feedUrl = urlProvider.get(url, urlContent);
|
||||
if (feedUrl != null) {
|
||||
return feedUrl;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return urlProviders.stream()
|
||||
.flatMap(provider -> provider.get(url, urlContent).stream())
|
||||
.filter(StringUtils::isNotBlank)
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
public record FeedFetcherResult(FeedParserResult feed, String urlAfterRedirect, String lastModifiedHeader, String lastETagHeader,
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
package com.commafeed.backend.urlprovider;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Tries to find a feed url given the url and page content
|
||||
*/
|
||||
public interface FeedURLProvider {
|
||||
|
||||
String get(String url, String urlContent);
|
||||
List<String> get(String url, String urlContent);
|
||||
|
||||
}
|
||||
|
||||
@@ -1,31 +1,25 @@
|
||||
package com.commafeed.backend.urlprovider;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import jakarta.inject.Singleton;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
@Singleton
|
||||
public class InPageReferenceFeedURLProvider implements FeedURLProvider {
|
||||
|
||||
@Override
|
||||
public String get(String url, String urlContent) {
|
||||
String foundUrl = null;
|
||||
|
||||
public List<String> get(String url, String urlContent) {
|
||||
Document doc = Jsoup.parse(urlContent, url);
|
||||
String root = doc.children().get(0).tagName();
|
||||
if ("html".equals(root)) {
|
||||
Elements atom = doc.select("link[type=application/atom+xml]");
|
||||
Elements rss = doc.select("link[type=application/rss+xml]");
|
||||
if (!atom.isEmpty()) {
|
||||
foundUrl = atom.get(0).attr("abs:href");
|
||||
} else if (!rss.isEmpty()) {
|
||||
foundUrl = rss.get(0).attr("abs:href");
|
||||
}
|
||||
if (!"html".equals(doc.children().get(0).tagName())) {
|
||||
return List.of();
|
||||
}
|
||||
|
||||
return foundUrl;
|
||||
return Stream.concat(doc.select("link[type=application/atom+xml]").stream(), doc.select("link[type=application/rss+xml]").stream())
|
||||
.map(node -> node.attr("abs:href"))
|
||||
.toList();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package com.commafeed.backend.urlprovider;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import jakarta.inject.Singleton;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
@@ -17,12 +19,12 @@ public class YoutubeFeedURLProvider implements FeedURLProvider {
|
||||
private static final String REPLACEMENT_PREFIX = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
||||
|
||||
@Override
|
||||
public String get(String url, String urlContent) {
|
||||
public List<String> get(String url, String urlContent) {
|
||||
if (!StringUtils.startsWithIgnoreCase(url, PREFIX)) {
|
||||
return null;
|
||||
return List.of();
|
||||
}
|
||||
|
||||
return REPLACEMENT_PREFIX + url.substring(PREFIX.length());
|
||||
return List.of(REPLACEMENT_PREFIX + url.substring(PREFIX.length()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package com.commafeed.backend.urlprovider;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@@ -8,41 +10,7 @@ class InPageReferenceFeedURLProviderTest {
|
||||
private final InPageReferenceFeedURLProvider provider = new InPageReferenceFeedURLProvider();
|
||||
|
||||
@Test
|
||||
void extractsAtomFeedURL() {
|
||||
String url = "http://example.com";
|
||||
String html = """
|
||||
<html>
|
||||
<head>
|
||||
<link type="application/atom+xml" href="/feed.atom">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>""";
|
||||
|
||||
String result = provider.get(url, html);
|
||||
|
||||
Assertions.assertEquals("http://example.com/feed.atom", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
void extractsRSSFeedURL() {
|
||||
String url = "http://example.com";
|
||||
String html = """
|
||||
<html>
|
||||
<head>
|
||||
<link type="application/rss+xml" href="/feed.rss">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>""";
|
||||
|
||||
String result = provider.get(url, html);
|
||||
|
||||
Assertions.assertEquals("http://example.com/feed.rss", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
void prefersAtomOverRSS() {
|
||||
void extractUrls() {
|
||||
String url = "http://example.com";
|
||||
String html = """
|
||||
<html>
|
||||
@@ -54,26 +22,22 @@ class InPageReferenceFeedURLProviderTest {
|
||||
</body>
|
||||
</html>""";
|
||||
|
||||
String result = provider.get(url, html);
|
||||
|
||||
Assertions.assertEquals("http://example.com/feed.atom", result);
|
||||
Assertions.assertIterableEquals(List.of("http://example.com/feed.atom", "http://example.com/feed.rss"), provider.get(url, html));
|
||||
}
|
||||
|
||||
@Test
|
||||
void returnsNullForNonHtmlContent() {
|
||||
void returnsEmptyListForNonHtmlContent() {
|
||||
String url = "http://example.com";
|
||||
String content = """
|
||||
String html = """
|
||||
<?xml version="1.0"?>
|
||||
<feed></feed>
|
||||
</xml>""";
|
||||
|
||||
String result = provider.get(url, content);
|
||||
|
||||
Assertions.assertNull(result);
|
||||
Assertions.assertTrue(provider.get(url, html).isEmpty());
|
||||
}
|
||||
|
||||
@Test
|
||||
void returnsNullForHtmlWithoutFeedLinks() {
|
||||
void returnsEmptyListForHtmlWithoutFeedLinks() {
|
||||
String url = "http://example.com";
|
||||
String html = """
|
||||
<html>
|
||||
@@ -84,8 +48,6 @@ class InPageReferenceFeedURLProviderTest {
|
||||
</body>
|
||||
</html>""";
|
||||
|
||||
String result = provider.get(url, html);
|
||||
|
||||
Assertions.assertNull(result);
|
||||
Assertions.assertTrue(provider.get(url, html).isEmpty());
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,7 @@
|
||||
package com.commafeed.backend.urlprovider;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@@ -9,14 +11,14 @@ class YoutubeFeedURLProviderTest {
|
||||
|
||||
@Test
|
||||
void matchesYoutubeChannelURL() {
|
||||
Assertions.assertEquals("https://www.youtube.com/feeds/videos.xml?channel_id=abc",
|
||||
Assertions.assertIterableEquals(List.of("https://www.youtube.com/feeds/videos.xml?channel_id=abc"),
|
||||
provider.get("https://www.youtube.com/channel/abc", null));
|
||||
}
|
||||
|
||||
@Test
|
||||
void doesNotmatchYoutubeChannelURL() {
|
||||
Assertions.assertNull(provider.get("https://www.anothersite.com/channel/abc", null));
|
||||
Assertions.assertNull(provider.get("https://www.youtube.com/user/abc", null));
|
||||
Assertions.assertTrue(provider.get("https://www.anothersite.com/channel/abc", null).isEmpty());
|
||||
Assertions.assertTrue(provider.get("https://www.youtube.com/user/abc", null).isEmpty());
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user