mirror of
https://github.com/Athou/commafeed.git
synced 2026-03-21 21:37:29 +00:00
fix encoding issues with idiots using word for writing content
This commit is contained in:
@@ -7,6 +7,8 @@ import java.util.List;
|
|||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.commons.lang.SystemUtils;
|
import org.apache.commons.lang.SystemUtils;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document.OutputSettings;
|
||||||
|
import org.jsoup.nodes.Entities.EscapeMode;
|
||||||
import org.jsoup.safety.Whitelist;
|
import org.jsoup.safety.Whitelist;
|
||||||
import org.xml.sax.InputSource;
|
import org.xml.sax.InputSource;
|
||||||
|
|
||||||
@@ -28,8 +30,14 @@ public class FeedParser {
|
|||||||
feed.setLastUpdated(Calendar.getInstance().getTime());
|
feed.setLastUpdated(Calendar.getInstance().getTime());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SyndFeed rss = new SyndFeedInput().build(new InputSource(
|
InputSource source = new InputSource(new ByteArrayInputStream(xml));
|
||||||
new ByteArrayInputStream(xml)));
|
if (new String(xml).split(SystemUtils.LINE_SEPARATOR)[0]
|
||||||
|
.toUpperCase().contains("ISO-8859-1")) {
|
||||||
|
// they probably use word, we need to handle curly quotes and
|
||||||
|
// other word special characters
|
||||||
|
source.setEncoding("windows-1252");
|
||||||
|
}
|
||||||
|
SyndFeed rss = new SyndFeedInput().build(source);
|
||||||
feed.setUrl(feedUrl);
|
feed.setUrl(feedUrl);
|
||||||
feed.setTitle(rss.getTitle());
|
feed.setTitle(rss.getTitle());
|
||||||
feed.setLink(rss.getLink());
|
feed.setLink(rss.getLink());
|
||||||
@@ -38,7 +46,7 @@ public class FeedParser {
|
|||||||
FeedEntry entry = new FeedEntry();
|
FeedEntry entry = new FeedEntry();
|
||||||
entry.setGuid(item.getUri());
|
entry.setGuid(item.getUri());
|
||||||
entry.setTitle(handleContent(item.getTitle()));
|
entry.setTitle(handleContent(item.getTitle()));
|
||||||
entry.setContent(getContent(item));
|
entry.setContent(handleContent(getContent(item)));
|
||||||
entry.setUrl(item.getLink());
|
entry.setUrl(item.getLink());
|
||||||
entry.setUpdated(item.getUpdatedDate() != null ? item
|
entry.setUpdated(item.getUpdatedDate() != null ? item
|
||||||
.getUpdatedDate() : item.getPublishedDate());
|
.getUpdatedDate() : item.getPublishedDate());
|
||||||
@@ -67,7 +75,6 @@ public class FeedParser {
|
|||||||
}
|
}
|
||||||
}), SystemUtils.LINE_SEPARATOR);
|
}), SystemUtils.LINE_SEPARATOR);
|
||||||
}
|
}
|
||||||
content = handleContent(content);
|
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -81,7 +88,8 @@ public class FeedParser {
|
|||||||
whitelist.addAttributes("iframe", "src", "height", "width",
|
whitelist.addAttributes("iframe", "src", "height", "width",
|
||||||
"allowfullscreen", "frameborder");
|
"allowfullscreen", "frameborder");
|
||||||
|
|
||||||
content = Jsoup.clean(content, whitelist);
|
content = Jsoup.clean(content, "", whitelist,
|
||||||
|
new OutputSettings().escapeMode(EscapeMode.extended));
|
||||||
}
|
}
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user