parse fetched result only if it's html

This commit is contained in:
Athou
2013-03-31 14:30:44 +02:00
parent c507b583c0
commit a9a59f58bd

View File

@@ -59,17 +59,19 @@ public class FeedFetcher {
}
private String extractFeedUrl(String html) {
String foundUrl = null;
Document doc = Jsoup.parse(html);
Elements rss = doc.select("link[type=application/rss+xml]");
Elements atom = doc.select("link[type=application/atom+xml]");
if (rss.size() > 0) {
return rss.get(0).attr("abs:href").toString();
} else if (atom.size() > 0) {
return atom.get(0).attr("abs:href").toString();
} else {
return null;
String root = doc.children().get(0).tagName();
if ("html".equals(root)) {
Elements rss = doc.select("link[type=application/rss+xml]");
Elements atom = doc.select("link[type=application/atom+xml]");
if (!rss.isEmpty()) {
foundUrl = rss.get(0).attr("abs:href").toString();
} else if (!atom.isEmpty()) {
foundUrl = atom.get(0).attr("abs:href").toString();
}
}
return foundUrl;
}
}