return charset instead of stirng

This commit is contained in:
Athou
2014-12-12 10:12:56 +01:00
parent e2888beb4c
commit 6c67e6363a
2 changed files with 8 additions and 6 deletions

View File

@@ -1,6 +1,7 @@
package com.commafeed.backend.feed; package com.commafeed.backend.feed;
import java.io.StringReader; import java.io.StringReader;
import java.nio.charset.Charset;
import java.text.DateFormat; import java.text.DateFormat;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
@@ -46,7 +47,7 @@ public class FeedParser {
List<FeedEntry> entries = fetchedFeed.getEntries(); List<FeedEntry> entries = fetchedFeed.getEntries();
try { try {
String encoding = FeedUtils.guessEncoding(xml); Charset encoding = FeedUtils.guessEncoding(xml);
String xmlString = FeedUtils.trimInvalidXmlCharacters(new String(xml, encoding)); String xmlString = FeedUtils.trimInvalidXmlCharacters(new String(xml, encoding));
if (xmlString == null) { if (xmlString == null) {
throw new FeedException("Input string is null for url " + feedUrl); throw new FeedException("Input string is null for url " + feedUrl);

View File

@@ -3,6 +3,7 @@ package com.commafeed.backend.feed;
import java.io.StringReader; import java.io.StringReader;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.nio.charset.Charset;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Date; import java.util.Date;
@@ -100,14 +101,14 @@ public class FeedUtils {
* feed * feed
* *
*/ */
public static String guessEncoding(byte[] bytes) { public static Charset guessEncoding(byte[] bytes) {
String extracted = extractDeclaredEncoding(bytes); String extracted = extractDeclaredEncoding(bytes);
if (StringUtils.startsWithIgnoreCase(extracted, "iso-8859-")) { if (StringUtils.startsWithIgnoreCase(extracted, "iso-8859-")) {
if (StringUtils.endsWith(extracted, "1") == false) { if (StringUtils.endsWith(extracted, "1") == false) {
return extracted; return Charset.forName(extracted);
} }
} else if (StringUtils.startsWithIgnoreCase(extracted, "windows-")) { } else if (StringUtils.startsWithIgnoreCase(extracted, "windows-")) {
return extracted; return Charset.forName(extracted);
} }
return detectEncoding(bytes); return detectEncoding(bytes);
} }
@@ -115,7 +116,7 @@ public class FeedUtils {
/** /**
* Detect encoding by analyzing characters in the array * Detect encoding by analyzing characters in the array
*/ */
public static String detectEncoding(byte[] bytes) { public static Charset detectEncoding(byte[] bytes) {
String encoding = "UTF-8"; String encoding = "UTF-8";
CharsetDetector detector = new CharsetDetector(); CharsetDetector detector = new CharsetDetector();
@@ -127,7 +128,7 @@ public class FeedUtils {
if (encoding.equalsIgnoreCase("ISO-8859-1")) { if (encoding.equalsIgnoreCase("ISO-8859-1")) {
encoding = "windows-1252"; encoding = "windows-1252";
} }
return encoding; return Charset.forName(encoding);
} }
public static String replaceHtmlEntitiesWithNumericEntities(String source) { public static String replaceHtmlEntitiesWithNumericEntities(String source) {