mirror of
https://github.com/Athou/commafeed.git
synced 2026-03-21 21:37:29 +00:00
Merge branch 'ildar-shaimordanov-master'
This commit is contained in:
@@ -26,6 +26,9 @@ import com.commafeed.backend.task.OldStatusesCleanupTask;
|
|||||||
import com.commafeed.backend.task.OrphanedContentsCleanupTask;
|
import com.commafeed.backend.task.OrphanedContentsCleanupTask;
|
||||||
import com.commafeed.backend.task.OrphanedFeedsCleanupTask;
|
import com.commafeed.backend.task.OrphanedFeedsCleanupTask;
|
||||||
import com.commafeed.backend.task.ScheduledTask;
|
import com.commafeed.backend.task.ScheduledTask;
|
||||||
|
import com.commafeed.backend.urlprovider.FeedURLProvider;
|
||||||
|
import com.commafeed.backend.urlprovider.InPageReferenceFeedURLProvider;
|
||||||
|
import com.commafeed.backend.urlprovider.YoutubeFeedURLProvider;
|
||||||
import com.google.inject.AbstractModule;
|
import com.google.inject.AbstractModule;
|
||||||
import com.google.inject.Provides;
|
import com.google.inject.Provides;
|
||||||
import com.google.inject.multibindings.Multibinder;
|
import com.google.inject.multibindings.Multibinder;
|
||||||
@@ -55,6 +58,10 @@ public class CommaFeedModule extends AbstractModule {
|
|||||||
faviconMultibinder.addBinding().to(FacebookFaviconFetcher.class);
|
faviconMultibinder.addBinding().to(FacebookFaviconFetcher.class);
|
||||||
faviconMultibinder.addBinding().to(DefaultFaviconFetcher.class);
|
faviconMultibinder.addBinding().to(DefaultFaviconFetcher.class);
|
||||||
|
|
||||||
|
Multibinder<FeedURLProvider> urlProviderMultibinder = Multibinder.newSetBinder(binder(), FeedURLProvider.class);
|
||||||
|
urlProviderMultibinder.addBinding().to(InPageReferenceFeedURLProvider.class);
|
||||||
|
urlProviderMultibinder.addBinding().to(YoutubeFeedURLProvider.class);
|
||||||
|
|
||||||
Multibinder<ScheduledTask> taskMultibinder = Multibinder.newSetBinder(binder(), ScheduledTask.class);
|
Multibinder<ScheduledTask> taskMultibinder = Multibinder.newSetBinder(binder(), ScheduledTask.class);
|
||||||
taskMultibinder.addBinding().to(OldStatusesCleanupTask.class);
|
taskMultibinder.addBinding().to(OldStatusesCleanupTask.class);
|
||||||
taskMultibinder.addBinding().to(OldEntriesCleanupTask.class);
|
taskMultibinder.addBinding().to(OldEntriesCleanupTask.class);
|
||||||
|
|||||||
@@ -2,26 +2,24 @@ package com.commafeed.backend.feed;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import javax.inject.Inject;
|
import javax.inject.Inject;
|
||||||
import javax.inject.Singleton;
|
import javax.inject.Singleton;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
|
|
||||||
import org.apache.commons.codec.binary.StringUtils;
|
import org.apache.commons.codec.binary.StringUtils;
|
||||||
import org.apache.commons.codec.digest.DigestUtils;
|
import org.apache.commons.codec.digest.DigestUtils;
|
||||||
import org.apache.http.client.ClientProtocolException;
|
|
||||||
import org.jsoup.Jsoup;
|
|
||||||
import org.jsoup.nodes.Document;
|
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
|
|
||||||
import com.commafeed.backend.HttpGetter;
|
import com.commafeed.backend.HttpGetter;
|
||||||
import com.commafeed.backend.HttpGetter.HttpResult;
|
import com.commafeed.backend.HttpGetter.HttpResult;
|
||||||
import com.commafeed.backend.HttpGetter.NotModifiedException;
|
import com.commafeed.backend.HttpGetter.NotModifiedException;
|
||||||
import com.commafeed.backend.model.Feed;
|
import com.commafeed.backend.model.Feed;
|
||||||
|
import com.commafeed.backend.urlprovider.FeedURLProvider;
|
||||||
import com.rometools.rome.io.FeedException;
|
import com.rometools.rome.io.FeedException;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
|
@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
|
||||||
@Singleton
|
@Singleton
|
||||||
@@ -29,9 +27,10 @@ public class FeedFetcher {
|
|||||||
|
|
||||||
private final FeedParser parser;
|
private final FeedParser parser;
|
||||||
private final HttpGetter getter;
|
private final HttpGetter getter;
|
||||||
|
private final Set<FeedURLProvider> urlProviders;
|
||||||
|
|
||||||
public FetchedFeed fetch(String feedUrl, boolean extractFeedUrlFromHtml, String lastModified, String eTag, Date lastPublishedDate,
|
public FetchedFeed fetch(String feedUrl, boolean extractFeedUrlFromHtml, String lastModified, String eTag, Date lastPublishedDate,
|
||||||
String lastContentHash) throws FeedException, ClientProtocolException, IOException, NotModifiedException {
|
String lastContentHash) throws FeedException, IOException, NotModifiedException {
|
||||||
log.debug("Fetching feed {}", feedUrl);
|
log.debug("Fetching feed {}", feedUrl);
|
||||||
FetchedFeed fetchedFeed = null;
|
FetchedFeed fetchedFeed = null;
|
||||||
|
|
||||||
@@ -44,7 +43,7 @@ public class FeedFetcher {
|
|||||||
fetchedFeed = parser.parse(result.getUrlAfterRedirect(), content);
|
fetchedFeed = parser.parse(result.getUrlAfterRedirect(), content);
|
||||||
} catch (FeedException e) {
|
} catch (FeedException e) {
|
||||||
if (extractFeedUrlFromHtml) {
|
if (extractFeedUrlFromHtml) {
|
||||||
String extractedUrl = extractFeedUrl(StringUtils.newStringUtf8(result.getContent()), feedUrl);
|
String extractedUrl = extractFeedUrl(urlProviders, StringUtils.newStringUtf8(result.getContent()), feedUrl);
|
||||||
if (org.apache.commons.lang3.StringUtils.isNotBlank(extractedUrl)) {
|
if (org.apache.commons.lang3.StringUtils.isNotBlank(extractedUrl)) {
|
||||||
feedUrl = extractedUrl;
|
feedUrl = extractedUrl;
|
||||||
|
|
||||||
@@ -84,20 +83,13 @@ public class FeedFetcher {
|
|||||||
return fetchedFeed;
|
return fetchedFeed;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String extractFeedUrl(String html, String baseUri) {
|
private static String extractFeedUrl(Set<FeedURLProvider> urlProviders, String html, String baseUri) {
|
||||||
String foundUrl = null;
|
for (FeedURLProvider urlProvider : urlProviders) {
|
||||||
|
String url = urlProvider.get(html, baseUri);
|
||||||
Document doc = Jsoup.parse(html, baseUri);
|
if (url != null)
|
||||||
String root = doc.children().get(0).tagName();
|
return url;
|
||||||
if ("html".equals(root)) {
|
|
||||||
Elements atom = doc.select("link[type=application/atom+xml]");
|
|
||||||
Elements rss = doc.select("link[type=application/rss+xml]");
|
|
||||||
if (!atom.isEmpty()) {
|
|
||||||
foundUrl = atom.get(0).attr("abs:href");
|
|
||||||
} else if (!rss.isEmpty()) {
|
|
||||||
foundUrl = rss.get(0).attr("abs:href");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return foundUrl;
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,7 @@
|
|||||||
|
package com.commafeed.backend.urlprovider;
|
||||||
|
|
||||||
|
public interface FeedURLProvider {
|
||||||
|
|
||||||
|
String get(String html, String url);
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
package com.commafeed.backend.urlprovider;
|
||||||
|
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
|
public class InPageReferenceFeedURLProvider implements FeedURLProvider {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String get(String html, String url) {
|
||||||
|
String foundUrl = null;
|
||||||
|
|
||||||
|
Document doc = Jsoup.parse(html, url);
|
||||||
|
String root = doc.children().get(0).tagName();
|
||||||
|
if ("html".equals(root)) {
|
||||||
|
Elements atom = doc.select("link[type=application/atom+xml]");
|
||||||
|
Elements rss = doc.select("link[type=application/rss+xml]");
|
||||||
|
if (!atom.isEmpty()) {
|
||||||
|
foundUrl = atom.get(0).attr("abs:href");
|
||||||
|
} else if (!rss.isEmpty()) {
|
||||||
|
foundUrl = rss.get(0).attr("abs:href");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return foundUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
package com.commafeed.backend.urlprovider;
|
||||||
|
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Workaround for Youtube channels
|
||||||
|
*
|
||||||
|
* converts the channel URL https://www.youtube.com/channel/CHANNEL_ID to the valid feed URL
|
||||||
|
* https://www.youtube.com/feeds/videos.xml?channel_id=CHANNEL_ID
|
||||||
|
*/
|
||||||
|
public class YoutubeFeedURLProvider implements FeedURLProvider {
|
||||||
|
|
||||||
|
private static final Pattern REGEXP = Pattern.compile("(.*\\byoutube\\.com)\\/channel\\/([^\\/]+)", Pattern.CASE_INSENSITIVE);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String get(String html, String url) {
|
||||||
|
Matcher matcher = REGEXP.matcher(url);
|
||||||
|
return matcher.find() ? matcher.group(1) + "/feeds/videos.xml?channel_id=" + matcher.group(2) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user