forked from Archives/Athou_commafeed
try to parse given url before using embedded links
This commit is contained in:
@@ -36,15 +36,26 @@ public class FeedFetcher {
|
|||||||
FetchedFeed fetchedFeed = null;
|
FetchedFeed fetchedFeed = null;
|
||||||
|
|
||||||
int timeout = 20000;
|
int timeout = 20000;
|
||||||
|
|
||||||
HttpResult result = getter.getBinary(feedUrl, lastModified, eTag, timeout);
|
HttpResult result = getter.getBinary(feedUrl, lastModified, eTag, timeout);
|
||||||
if (extractFeedUrlFromHtml) {
|
byte[] content = result.getContent();
|
||||||
String extractedUrl = extractFeedUrl(StringUtils.newStringUtf8(result.getContent()), feedUrl);
|
|
||||||
if (org.apache.commons.lang.StringUtils.isNotBlank(extractedUrl)) {
|
try {
|
||||||
result = getter.getBinary(extractedUrl, lastModified, eTag, timeout);
|
fetchedFeed = parser.parse(feedUrl, content);
|
||||||
feedUrl = extractedUrl;
|
} catch (FeedException e) {
|
||||||
|
if (extractFeedUrlFromHtml) {
|
||||||
|
String extractedUrl = extractFeedUrl(StringUtils.newStringUtf8(result.getContent()), feedUrl);
|
||||||
|
if (org.apache.commons.lang.StringUtils.isNotBlank(extractedUrl)) {
|
||||||
|
feedUrl = extractedUrl;
|
||||||
|
|
||||||
|
result = getter.getBinary(extractedUrl, lastModified, eTag, timeout);
|
||||||
|
content = result.getContent();
|
||||||
|
fetchedFeed = parser.parse(feedUrl, content);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
byte[] content = result.getContent();
|
|
||||||
|
|
||||||
if (content == null) {
|
if (content == null) {
|
||||||
throw new IOException("Feed content is empty.");
|
throw new IOException("Feed content is empty.");
|
||||||
@@ -56,8 +67,6 @@ public class FeedFetcher {
|
|||||||
throw new NotModifiedException("content hash not modified");
|
throw new NotModifiedException("content hash not modified");
|
||||||
}
|
}
|
||||||
|
|
||||||
fetchedFeed = parser.parse(feedUrl, content);
|
|
||||||
|
|
||||||
if (lastPublishedDate != null && fetchedFeed.getFeed().getLastPublishedDate() != null
|
if (lastPublishedDate != null && fetchedFeed.getFeed().getLastPublishedDate() != null
|
||||||
&& lastPublishedDate.getTime() == fetchedFeed.getFeed().getLastPublishedDate().getTime()) {
|
&& lastPublishedDate.getTime() == fetchedFeed.getFeed().getLastPublishedDate().getTime()) {
|
||||||
log.debug("publishedDate not modified: {}", feedUrl);
|
log.debug("publishedDate not modified: {}", feedUrl);
|
||||||
|
|||||||
Reference in New Issue
Block a user