diff --git a/pom.xml b/pom.xml
index 02845c26..eb6cf3e8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -304,9 +304,9 @@
1.8.1
- com.googlecode.juniversalchardet
- juniversalchardet
- 1.0.3
+ com.ibm.icu
+ icu4j
+ 54.1.1
net.sourceforge.cssparser
diff --git a/src/main/java/com/commafeed/backend/feed/FeedUtils.java b/src/main/java/com/commafeed/backend/feed/FeedUtils.java
index fca78591..4ce026bc 100644
--- a/src/main/java/com/commafeed/backend/feed/FeedUtils.java
+++ b/src/main/java/com/commafeed/backend/feed/FeedUtils.java
@@ -25,7 +25,6 @@ import org.jsoup.nodes.Entities.EscapeMode;
import org.jsoup.safety.Cleaner;
import org.jsoup.safety.Whitelist;
import org.jsoup.select.Elements;
-import org.mozilla.universalchardet.UniversalDetector;
import org.w3c.css.sac.InputSource;
import org.w3c.dom.css.CSSStyleDeclaration;
@@ -34,6 +33,8 @@ import com.commafeed.backend.model.FeedEntry;
import com.commafeed.backend.model.FeedSubscription;
import com.commafeed.frontend.model.Entry;
import com.google.common.collect.Lists;
+import com.ibm.icu.text.CharsetDetector;
+import com.ibm.icu.text.CharsetMatch;
import com.steadystate.css.parser.CSSOMParser;
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
@@ -114,15 +115,15 @@ public class FeedUtils {
* Detect encoding by analyzing characters in the array
*/
public static String detectEncoding(byte[] bytes) {
- String DEFAULT_ENCODING = "UTF-8";
- UniversalDetector detector = new UniversalDetector(null);
- detector.handleData(bytes, 0, bytes.length);
- detector.dataEnd();
- String encoding = detector.getDetectedCharset();
- detector.reset();
- if (encoding == null) {
- encoding = DEFAULT_ENCODING;
- } else if (encoding.equalsIgnoreCase("ISO-8859-1")) {
+ String encoding = "UTF-8";
+
+ CharsetDetector detector = new CharsetDetector();
+ detector.setText(bytes);
+ CharsetMatch match = detector.detect();
+ if (match != null) {
+ encoding = match.getName();
+ }
+ if (encoding.equalsIgnoreCase("ISO-8859-1")) {
encoding = "windows-1252";
}
return encoding;