From 648b4017d761886cfa39ac9289bc329e7f6bafc9 Mon Sep 17 00:00:00 2001 From: Athou Date: Fri, 5 Apr 2013 22:38:35 +0200 Subject: [PATCH] fix NPE when content is empty and fix character encoding issues --- .../commafeed/backend/feeds/FeedFetcher.java | 6 ++--- .../commafeed/backend/feeds/FeedParser.java | 25 +++++++++++-------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java b/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java index 10cc9aee..99535ff2 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java @@ -27,10 +27,10 @@ public class FeedFetcher { Feed feed = null; try { - String content = getter.get(feedUrl); - String extractedUrl = extractFeedUrl(content); + byte[] content = getter.getBinary(feedUrl); + String extractedUrl = extractFeedUrl(new String(content, "UTF-8")); if (extractedUrl != null) { - content = getter.get(extractedUrl); + content = getter.getBinary(extractedUrl); feedUrl = extractedUrl; } feed = parser.parse(feedUrl, content); diff --git a/src/main/java/com/commafeed/backend/feeds/FeedParser.java b/src/main/java/com/commafeed/backend/feeds/FeedParser.java index 630747bd..bc395fde 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedParser.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedParser.java @@ -1,6 +1,6 @@ package com.commafeed.backend.feeds; -import java.io.StringReader; +import java.io.ByteArrayInputStream; import java.util.Calendar; import java.util.List; @@ -8,6 +8,7 @@ import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.SystemUtils; import org.jsoup.Jsoup; import org.jsoup.safety.Whitelist; +import org.xml.sax.InputSource; import com.commafeed.backend.model.Feed; import com.commafeed.backend.model.FeedEntry; @@ -22,12 +23,13 @@ import com.sun.syndication.io.SyndFeedInput; public class FeedParser { @SuppressWarnings("unchecked") - public Feed parse(String feedUrl, String xml) throws FeedException { + public Feed parse(String feedUrl, byte[] xml) throws FeedException { Feed feed = new Feed(); feed.setLastUpdated(Calendar.getInstance().getTime()); try { - SyndFeed rss = new SyndFeedInput().build(new StringReader(xml)); + SyndFeed rss = new SyndFeedInput().build(new InputSource( + new ByteArrayInputStream(xml))); feed.setUrl(feedUrl); feed.setTitle(rss.getTitle()); feed.setLink(rss.getLink()); @@ -70,14 +72,17 @@ public class FeedParser { } private String handleContent(String content) { - Whitelist whitelist = Whitelist.relaxed(); - whitelist.addEnforcedAttribute("a", "target", "_blank"); + if (StringUtils.isNotBlank(content)) { + Whitelist whitelist = Whitelist.relaxed(); + whitelist.addEnforcedAttribute("a", "target", "_blank"); - // TODO evaluate potential security issues - whitelist.addTags("iframe"); - whitelist.addAttributes("iframe", "src", "height", "width", - "allowfullscreen", "frameborder"); + // TODO evaluate potential security issues + whitelist.addTags("iframe"); + whitelist.addAttributes("iframe", "src", "height", "width", + "allowfullscreen", "frameborder"); - return Jsoup.clean(content, whitelist); + content = Jsoup.clean(content, whitelist); + } + return content; } }