forked from Archives/Athou_commafeed
Merge branch 'ildar-shaimordanov-master'
This commit is contained in:
@@ -26,6 +26,9 @@ import com.commafeed.backend.task.OldStatusesCleanupTask;
|
||||
import com.commafeed.backend.task.OrphanedContentsCleanupTask;
|
||||
import com.commafeed.backend.task.OrphanedFeedsCleanupTask;
|
||||
import com.commafeed.backend.task.ScheduledTask;
|
||||
import com.commafeed.backend.urlprovider.FeedURLProvider;
|
||||
import com.commafeed.backend.urlprovider.InPageReferenceFeedURLProvider;
|
||||
import com.commafeed.backend.urlprovider.YoutubeFeedURLProvider;
|
||||
import com.google.inject.AbstractModule;
|
||||
import com.google.inject.Provides;
|
||||
import com.google.inject.multibindings.Multibinder;
|
||||
@@ -55,6 +58,10 @@ public class CommaFeedModule extends AbstractModule {
|
||||
faviconMultibinder.addBinding().to(FacebookFaviconFetcher.class);
|
||||
faviconMultibinder.addBinding().to(DefaultFaviconFetcher.class);
|
||||
|
||||
Multibinder<FeedURLProvider> urlProviderMultibinder = Multibinder.newSetBinder(binder(), FeedURLProvider.class);
|
||||
urlProviderMultibinder.addBinding().to(InPageReferenceFeedURLProvider.class);
|
||||
urlProviderMultibinder.addBinding().to(YoutubeFeedURLProvider.class);
|
||||
|
||||
Multibinder<ScheduledTask> taskMultibinder = Multibinder.newSetBinder(binder(), ScheduledTask.class);
|
||||
taskMultibinder.addBinding().to(OldStatusesCleanupTask.class);
|
||||
taskMultibinder.addBinding().to(OldEntriesCleanupTask.class);
|
||||
|
||||
@@ -2,26 +2,24 @@ package com.commafeed.backend.feed;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Date;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.apache.commons.codec.binary.StringUtils;
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.http.client.ClientProtocolException;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import com.commafeed.backend.HttpGetter;
|
||||
import com.commafeed.backend.HttpGetter.HttpResult;
|
||||
import com.commafeed.backend.HttpGetter.NotModifiedException;
|
||||
import com.commafeed.backend.model.Feed;
|
||||
import com.commafeed.backend.urlprovider.FeedURLProvider;
|
||||
import com.rometools.rome.io.FeedException;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
|
||||
@Singleton
|
||||
@@ -29,9 +27,10 @@ public class FeedFetcher {
|
||||
|
||||
private final FeedParser parser;
|
||||
private final HttpGetter getter;
|
||||
private final Set<FeedURLProvider> urlProviders;
|
||||
|
||||
public FetchedFeed fetch(String feedUrl, boolean extractFeedUrlFromHtml, String lastModified, String eTag, Date lastPublishedDate,
|
||||
String lastContentHash) throws FeedException, ClientProtocolException, IOException, NotModifiedException {
|
||||
String lastContentHash) throws FeedException, IOException, NotModifiedException {
|
||||
log.debug("Fetching feed {}", feedUrl);
|
||||
FetchedFeed fetchedFeed = null;
|
||||
|
||||
@@ -44,7 +43,7 @@ public class FeedFetcher {
|
||||
fetchedFeed = parser.parse(result.getUrlAfterRedirect(), content);
|
||||
} catch (FeedException e) {
|
||||
if (extractFeedUrlFromHtml) {
|
||||
String extractedUrl = extractFeedUrl(StringUtils.newStringUtf8(result.getContent()), feedUrl);
|
||||
String extractedUrl = extractFeedUrl(urlProviders, StringUtils.newStringUtf8(result.getContent()), feedUrl);
|
||||
if (org.apache.commons.lang3.StringUtils.isNotBlank(extractedUrl)) {
|
||||
feedUrl = extractedUrl;
|
||||
|
||||
@@ -84,20 +83,13 @@ public class FeedFetcher {
|
||||
return fetchedFeed;
|
||||
}
|
||||
|
||||
private String extractFeedUrl(String html, String baseUri) {
|
||||
String foundUrl = null;
|
||||
|
||||
Document doc = Jsoup.parse(html, baseUri);
|
||||
String root = doc.children().get(0).tagName();
|
||||
if ("html".equals(root)) {
|
||||
Elements atom = doc.select("link[type=application/atom+xml]");
|
||||
Elements rss = doc.select("link[type=application/rss+xml]");
|
||||
if (!atom.isEmpty()) {
|
||||
foundUrl = atom.get(0).attr("abs:href");
|
||||
} else if (!rss.isEmpty()) {
|
||||
foundUrl = rss.get(0).attr("abs:href");
|
||||
}
|
||||
private static String extractFeedUrl(Set<FeedURLProvider> urlProviders, String html, String baseUri) {
|
||||
for (FeedURLProvider urlProvider : urlProviders) {
|
||||
String url = urlProvider.get(html, baseUri);
|
||||
if (url != null)
|
||||
return url;
|
||||
}
|
||||
return foundUrl;
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
package com.commafeed.backend.urlprovider;
|
||||
|
||||
public interface FeedURLProvider {
|
||||
|
||||
String get(String html, String url);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
package com.commafeed.backend.urlprovider;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
public class InPageReferenceFeedURLProvider implements FeedURLProvider {
|
||||
|
||||
@Override
|
||||
public String get(String html, String url) {
|
||||
String foundUrl = null;
|
||||
|
||||
Document doc = Jsoup.parse(html, url);
|
||||
String root = doc.children().get(0).tagName();
|
||||
if ("html".equals(root)) {
|
||||
Elements atom = doc.select("link[type=application/atom+xml]");
|
||||
Elements rss = doc.select("link[type=application/rss+xml]");
|
||||
if (!atom.isEmpty()) {
|
||||
foundUrl = atom.get(0).attr("abs:href");
|
||||
} else if (!rss.isEmpty()) {
|
||||
foundUrl = rss.get(0).attr("abs:href");
|
||||
}
|
||||
}
|
||||
|
||||
return foundUrl;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.commafeed.backend.urlprovider;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Workaround for Youtube channels
|
||||
*
|
||||
* converts the channel URL https://www.youtube.com/channel/CHANNEL_ID to the valid feed URL
|
||||
* https://www.youtube.com/feeds/videos.xml?channel_id=CHANNEL_ID
|
||||
*/
|
||||
public class YoutubeFeedURLProvider implements FeedURLProvider {
|
||||
|
||||
private static final Pattern REGEXP = Pattern.compile("(.*\\byoutube\\.com)\\/channel\\/([^\\/]+)", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
@Override
|
||||
public String get(String html, String url) {
|
||||
Matcher matcher = REGEXP.matcher(url);
|
||||
return matcher.find() ? matcher.group(1) + "/feeds/videos.xml?channel_id=" + matcher.group(2) : null;
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user