fix NPE when content is empty and fix character encoding issues

This commit is contained in:
Athou
2013-04-05 22:38:35 +02:00
parent 3db578a2d6
commit 648b4017d7
2 changed files with 18 additions and 13 deletions

View File

@@ -27,10 +27,10 @@ public class FeedFetcher {
Feed feed = null; Feed feed = null;
try { try {
String content = getter.get(feedUrl); byte[] content = getter.getBinary(feedUrl);
String extractedUrl = extractFeedUrl(content); String extractedUrl = extractFeedUrl(new String(content, "UTF-8"));
if (extractedUrl != null) { if (extractedUrl != null) {
content = getter.get(extractedUrl); content = getter.getBinary(extractedUrl);
feedUrl = extractedUrl; feedUrl = extractedUrl;
} }
feed = parser.parse(feedUrl, content); feed = parser.parse(feedUrl, content);

View File

@@ -1,6 +1,6 @@
package com.commafeed.backend.feeds; package com.commafeed.backend.feeds;
import java.io.StringReader; import java.io.ByteArrayInputStream;
import java.util.Calendar; import java.util.Calendar;
import java.util.List; import java.util.List;
@@ -8,6 +8,7 @@ import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.SystemUtils; import org.apache.commons.lang.SystemUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.safety.Whitelist; import org.jsoup.safety.Whitelist;
import org.xml.sax.InputSource;
import com.commafeed.backend.model.Feed; import com.commafeed.backend.model.Feed;
import com.commafeed.backend.model.FeedEntry; import com.commafeed.backend.model.FeedEntry;
@@ -22,12 +23,13 @@ import com.sun.syndication.io.SyndFeedInput;
public class FeedParser { public class FeedParser {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public Feed parse(String feedUrl, String xml) throws FeedException { public Feed parse(String feedUrl, byte[] xml) throws FeedException {
Feed feed = new Feed(); Feed feed = new Feed();
feed.setLastUpdated(Calendar.getInstance().getTime()); feed.setLastUpdated(Calendar.getInstance().getTime());
try { try {
SyndFeed rss = new SyndFeedInput().build(new StringReader(xml)); SyndFeed rss = new SyndFeedInput().build(new InputSource(
new ByteArrayInputStream(xml)));
feed.setUrl(feedUrl); feed.setUrl(feedUrl);
feed.setTitle(rss.getTitle()); feed.setTitle(rss.getTitle());
feed.setLink(rss.getLink()); feed.setLink(rss.getLink());
@@ -70,14 +72,17 @@ public class FeedParser {
} }
private String handleContent(String content) { private String handleContent(String content) {
Whitelist whitelist = Whitelist.relaxed(); if (StringUtils.isNotBlank(content)) {
whitelist.addEnforcedAttribute("a", "target", "_blank"); Whitelist whitelist = Whitelist.relaxed();
whitelist.addEnforcedAttribute("a", "target", "_blank");
// TODO evaluate potential security issues // TODO evaluate potential security issues
whitelist.addTags("iframe"); whitelist.addTags("iframe");
whitelist.addAttributes("iframe", "src", "height", "width", whitelist.addAttributes("iframe", "src", "height", "width",
"allowfullscreen", "frameborder"); "allowfullscreen", "frameborder");
return Jsoup.clean(content, whitelist); content = Jsoup.clean(content, whitelist);
}
return content;
} }
} }