mirror of
https://github.com/Athou/commafeed.git
synced 2026-03-21 21:37:29 +00:00
return charset instead of stirng
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
package com.commafeed.backend.feed;
|
package com.commafeed.backend.feed;
|
||||||
|
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
import java.text.DateFormat;
|
import java.text.DateFormat;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@@ -46,7 +47,7 @@ public class FeedParser {
|
|||||||
List<FeedEntry> entries = fetchedFeed.getEntries();
|
List<FeedEntry> entries = fetchedFeed.getEntries();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
String encoding = FeedUtils.guessEncoding(xml);
|
Charset encoding = FeedUtils.guessEncoding(xml);
|
||||||
String xmlString = FeedUtils.trimInvalidXmlCharacters(new String(xml, encoding));
|
String xmlString = FeedUtils.trimInvalidXmlCharacters(new String(xml, encoding));
|
||||||
if (xmlString == null) {
|
if (xmlString == null) {
|
||||||
throw new FeedException("Input string is null for url " + feedUrl);
|
throw new FeedException("Input string is null for url " + feedUrl);
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package com.commafeed.backend.feed;
|
|||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
@@ -100,14 +101,14 @@ public class FeedUtils {
|
|||||||
* feed
|
* feed
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public static String guessEncoding(byte[] bytes) {
|
public static Charset guessEncoding(byte[] bytes) {
|
||||||
String extracted = extractDeclaredEncoding(bytes);
|
String extracted = extractDeclaredEncoding(bytes);
|
||||||
if (StringUtils.startsWithIgnoreCase(extracted, "iso-8859-")) {
|
if (StringUtils.startsWithIgnoreCase(extracted, "iso-8859-")) {
|
||||||
if (StringUtils.endsWith(extracted, "1") == false) {
|
if (StringUtils.endsWith(extracted, "1") == false) {
|
||||||
return extracted;
|
return Charset.forName(extracted);
|
||||||
}
|
}
|
||||||
} else if (StringUtils.startsWithIgnoreCase(extracted, "windows-")) {
|
} else if (StringUtils.startsWithIgnoreCase(extracted, "windows-")) {
|
||||||
return extracted;
|
return Charset.forName(extracted);
|
||||||
}
|
}
|
||||||
return detectEncoding(bytes);
|
return detectEncoding(bytes);
|
||||||
}
|
}
|
||||||
@@ -115,7 +116,7 @@ public class FeedUtils {
|
|||||||
/**
|
/**
|
||||||
* Detect encoding by analyzing characters in the array
|
* Detect encoding by analyzing characters in the array
|
||||||
*/
|
*/
|
||||||
public static String detectEncoding(byte[] bytes) {
|
public static Charset detectEncoding(byte[] bytes) {
|
||||||
String encoding = "UTF-8";
|
String encoding = "UTF-8";
|
||||||
|
|
||||||
CharsetDetector detector = new CharsetDetector();
|
CharsetDetector detector = new CharsetDetector();
|
||||||
@@ -127,7 +128,7 @@ public class FeedUtils {
|
|||||||
if (encoding.equalsIgnoreCase("ISO-8859-1")) {
|
if (encoding.equalsIgnoreCase("ISO-8859-1")) {
|
||||||
encoding = "windows-1252";
|
encoding = "windows-1252";
|
||||||
}
|
}
|
||||||
return encoding;
|
return Charset.forName(encoding);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String replaceHtmlEntitiesWithNumericEntities(String source) {
|
public static String replaceHtmlEntitiesWithNumericEntities(String source) {
|
||||||
|
|||||||
Reference in New Issue
Block a user