From 6c67e6363a28db0b7dcd954aab7be47b886b0152 Mon Sep 17 00:00:00 2001 From: Athou Date: Fri, 12 Dec 2014 10:12:56 +0100 Subject: [PATCH] return charset instead of stirng --- .../java/com/commafeed/backend/feed/FeedParser.java | 3 ++- .../java/com/commafeed/backend/feed/FeedUtils.java | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/commafeed/backend/feed/FeedParser.java b/src/main/java/com/commafeed/backend/feed/FeedParser.java index 29ebd60c..d74ab8aa 100644 --- a/src/main/java/com/commafeed/backend/feed/FeedParser.java +++ b/src/main/java/com/commafeed/backend/feed/FeedParser.java @@ -1,6 +1,7 @@ package com.commafeed.backend.feed; import java.io.StringReader; +import java.nio.charset.Charset; import java.text.DateFormat; import java.util.Date; import java.util.List; @@ -46,7 +47,7 @@ public class FeedParser { List entries = fetchedFeed.getEntries(); try { - String encoding = FeedUtils.guessEncoding(xml); + Charset encoding = FeedUtils.guessEncoding(xml); String xmlString = FeedUtils.trimInvalidXmlCharacters(new String(xml, encoding)); if (xmlString == null) { throw new FeedException("Input string is null for url " + feedUrl); diff --git a/src/main/java/com/commafeed/backend/feed/FeedUtils.java b/src/main/java/com/commafeed/backend/feed/FeedUtils.java index c3e617ee..45846136 100644 --- a/src/main/java/com/commafeed/backend/feed/FeedUtils.java +++ b/src/main/java/com/commafeed/backend/feed/FeedUtils.java @@ -3,6 +3,7 @@ package com.commafeed.backend.feed; import java.io.StringReader; import java.net.MalformedURLException; import java.net.URL; +import java.nio.charset.Charset; import java.util.Arrays; import java.util.Collections; import java.util.Date; @@ -100,14 +101,14 @@ public class FeedUtils { * feed * */ - public static String guessEncoding(byte[] bytes) { + public static Charset guessEncoding(byte[] bytes) { String extracted = extractDeclaredEncoding(bytes); if (StringUtils.startsWithIgnoreCase(extracted, "iso-8859-")) { if (StringUtils.endsWith(extracted, "1") == false) { - return extracted; + return Charset.forName(extracted); } } else if (StringUtils.startsWithIgnoreCase(extracted, "windows-")) { - return extracted; + return Charset.forName(extracted); } return detectEncoding(bytes); } @@ -115,7 +116,7 @@ public class FeedUtils { /** * Detect encoding by analyzing characters in the array */ - public static String detectEncoding(byte[] bytes) { + public static Charset detectEncoding(byte[] bytes) { String encoding = "UTF-8"; CharsetDetector detector = new CharsetDetector(); @@ -127,7 +128,7 @@ public class FeedUtils { if (encoding.equalsIgnoreCase("ISO-8859-1")) { encoding = "windows-1252"; } - return encoding; + return Charset.forName(encoding); } public static String replaceHtmlEntitiesWithNumericEntities(String source) {