forked from Archives/Athou_commafeed
providers can now return multiple urls
This commit is contained in:
@@ -98,14 +98,11 @@ public class FeedFetcher {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static String extractFeedUrl(List<FeedURLProvider> urlProviders, String url, String urlContent) {
|
private static String extractFeedUrl(List<FeedURLProvider> urlProviders, String url, String urlContent) {
|
||||||
for (FeedURLProvider urlProvider : urlProviders) {
|
return urlProviders.stream()
|
||||||
String feedUrl = urlProvider.get(url, urlContent);
|
.flatMap(provider -> provider.get(url, urlContent).stream())
|
||||||
if (feedUrl != null) {
|
.filter(StringUtils::isNotBlank)
|
||||||
return feedUrl;
|
.findFirst()
|
||||||
}
|
.orElse(null);
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public record FeedFetcherResult(FeedParserResult feed, String urlAfterRedirect, String lastModifiedHeader, String lastETagHeader,
|
public record FeedFetcherResult(FeedParserResult feed, String urlAfterRedirect, String lastModifiedHeader, String lastETagHeader,
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
package com.commafeed.backend.urlprovider;
|
package com.commafeed.backend.urlprovider;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tries to find a feed url given the url and page content
|
* Tries to find a feed url given the url and page content
|
||||||
*/
|
*/
|
||||||
public interface FeedURLProvider {
|
public interface FeedURLProvider {
|
||||||
|
|
||||||
String get(String url, String urlContent);
|
List<String> get(String url, String urlContent);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,31 +1,25 @@
|
|||||||
package com.commafeed.backend.urlprovider;
|
package com.commafeed.backend.urlprovider;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import jakarta.inject.Singleton;
|
import jakarta.inject.Singleton;
|
||||||
|
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
|
|
||||||
@Singleton
|
@Singleton
|
||||||
public class InPageReferenceFeedURLProvider implements FeedURLProvider {
|
public class InPageReferenceFeedURLProvider implements FeedURLProvider {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String get(String url, String urlContent) {
|
public List<String> get(String url, String urlContent) {
|
||||||
String foundUrl = null;
|
|
||||||
|
|
||||||
Document doc = Jsoup.parse(urlContent, url);
|
Document doc = Jsoup.parse(urlContent, url);
|
||||||
String root = doc.children().get(0).tagName();
|
if (!"html".equals(doc.children().get(0).tagName())) {
|
||||||
if ("html".equals(root)) {
|
return List.of();
|
||||||
Elements atom = doc.select("link[type=application/atom+xml]");
|
|
||||||
Elements rss = doc.select("link[type=application/rss+xml]");
|
|
||||||
if (!atom.isEmpty()) {
|
|
||||||
foundUrl = atom.get(0).attr("abs:href");
|
|
||||||
} else if (!rss.isEmpty()) {
|
|
||||||
foundUrl = rss.get(0).attr("abs:href");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
return Stream.concat(doc.select("link[type=application/atom+xml]").stream(), doc.select("link[type=application/rss+xml]").stream())
|
||||||
return foundUrl;
|
.map(node -> node.attr("abs:href"))
|
||||||
|
.toList();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
package com.commafeed.backend.urlprovider;
|
package com.commafeed.backend.urlprovider;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import jakarta.inject.Singleton;
|
import jakarta.inject.Singleton;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
@@ -17,12 +19,12 @@ public class YoutubeFeedURLProvider implements FeedURLProvider {
|
|||||||
private static final String REPLACEMENT_PREFIX = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
private static final String REPLACEMENT_PREFIX = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String get(String url, String urlContent) {
|
public List<String> get(String url, String urlContent) {
|
||||||
if (!StringUtils.startsWithIgnoreCase(url, PREFIX)) {
|
if (!StringUtils.startsWithIgnoreCase(url, PREFIX)) {
|
||||||
return null;
|
return List.of();
|
||||||
}
|
}
|
||||||
|
|
||||||
return REPLACEMENT_PREFIX + url.substring(PREFIX.length());
|
return List.of(REPLACEMENT_PREFIX + url.substring(PREFIX.length()));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
package com.commafeed.backend.urlprovider;
|
package com.commafeed.backend.urlprovider;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
@@ -8,41 +10,7 @@ class InPageReferenceFeedURLProviderTest {
|
|||||||
private final InPageReferenceFeedURLProvider provider = new InPageReferenceFeedURLProvider();
|
private final InPageReferenceFeedURLProvider provider = new InPageReferenceFeedURLProvider();
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void extractsAtomFeedURL() {
|
void extractUrls() {
|
||||||
String url = "http://example.com";
|
|
||||||
String html = """
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<link type="application/atom+xml" href="/feed.atom">
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
</body>
|
|
||||||
</html>""";
|
|
||||||
|
|
||||||
String result = provider.get(url, html);
|
|
||||||
|
|
||||||
Assertions.assertEquals("http://example.com/feed.atom", result);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void extractsRSSFeedURL() {
|
|
||||||
String url = "http://example.com";
|
|
||||||
String html = """
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<link type="application/rss+xml" href="/feed.rss">
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
</body>
|
|
||||||
</html>""";
|
|
||||||
|
|
||||||
String result = provider.get(url, html);
|
|
||||||
|
|
||||||
Assertions.assertEquals("http://example.com/feed.rss", result);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void prefersAtomOverRSS() {
|
|
||||||
String url = "http://example.com";
|
String url = "http://example.com";
|
||||||
String html = """
|
String html = """
|
||||||
<html>
|
<html>
|
||||||
@@ -54,26 +22,22 @@ class InPageReferenceFeedURLProviderTest {
|
|||||||
</body>
|
</body>
|
||||||
</html>""";
|
</html>""";
|
||||||
|
|
||||||
String result = provider.get(url, html);
|
Assertions.assertIterableEquals(List.of("http://example.com/feed.atom", "http://example.com/feed.rss"), provider.get(url, html));
|
||||||
|
|
||||||
Assertions.assertEquals("http://example.com/feed.atom", result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void returnsNullForNonHtmlContent() {
|
void returnsEmptyListForNonHtmlContent() {
|
||||||
String url = "http://example.com";
|
String url = "http://example.com";
|
||||||
String content = """
|
String html = """
|
||||||
<?xml version="1.0"?>
|
<?xml version="1.0"?>
|
||||||
<feed></feed>
|
<feed></feed>
|
||||||
</xml>""";
|
</xml>""";
|
||||||
|
|
||||||
String result = provider.get(url, content);
|
Assertions.assertTrue(provider.get(url, html).isEmpty());
|
||||||
|
|
||||||
Assertions.assertNull(result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void returnsNullForHtmlWithoutFeedLinks() {
|
void returnsEmptyListForHtmlWithoutFeedLinks() {
|
||||||
String url = "http://example.com";
|
String url = "http://example.com";
|
||||||
String html = """
|
String html = """
|
||||||
<html>
|
<html>
|
||||||
@@ -84,8 +48,6 @@ class InPageReferenceFeedURLProviderTest {
|
|||||||
</body>
|
</body>
|
||||||
</html>""";
|
</html>""";
|
||||||
|
|
||||||
String result = provider.get(url, html);
|
Assertions.assertTrue(provider.get(url, html).isEmpty());
|
||||||
|
|
||||||
Assertions.assertNull(result);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
package com.commafeed.backend.urlprovider;
|
package com.commafeed.backend.urlprovider;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
@@ -9,14 +11,14 @@ class YoutubeFeedURLProviderTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
void matchesYoutubeChannelURL() {
|
void matchesYoutubeChannelURL() {
|
||||||
Assertions.assertEquals("https://www.youtube.com/feeds/videos.xml?channel_id=abc",
|
Assertions.assertIterableEquals(List.of("https://www.youtube.com/feeds/videos.xml?channel_id=abc"),
|
||||||
provider.get("https://www.youtube.com/channel/abc", null));
|
provider.get("https://www.youtube.com/channel/abc", null));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void doesNotmatchYoutubeChannelURL() {
|
void doesNotmatchYoutubeChannelURL() {
|
||||||
Assertions.assertNull(provider.get("https://www.anothersite.com/channel/abc", null));
|
Assertions.assertTrue(provider.get("https://www.anothersite.com/channel/abc", null).isEmpty());
|
||||||
Assertions.assertNull(provider.get("https://www.youtube.com/user/abc", null));
|
Assertions.assertTrue(provider.get("https://www.youtube.com/user/abc", null).isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user