diff --git a/pom.xml b/pom.xml
index eddbf3c4..35d075e8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -264,11 +264,6 @@
juniversalchardet
1.0.3
-
- com.google.gwt
- gwt-servlet
- 2.6.1
-
net.sourceforge.cssparser
cssparser
diff --git a/src/main/java/com/commafeed/backend/feed/EstimateDirection.java b/src/main/java/com/commafeed/backend/feed/EstimateDirection.java
new file mode 100644
index 00000000..58bf74d7
--- /dev/null
+++ b/src/main/java/com/commafeed/backend/feed/EstimateDirection.java
@@ -0,0 +1,55 @@
+package com.commafeed.backend.feed;
+
+import java.util.regex.Pattern;
+
+/**
+ * This code is copied and simplified from GWT
+ * https://github.com/google-web-toolkit/gwt/blob/master/user/src/com/google/gwt/i18n/shared/BidiUtils.java
+ * Released under Apache 2.0 license, credit of it goes to Google and please use GWT wherever possible instead of this
+ */
+class EstimateDirection {
+ private static final float RTL_DETECTION_THRESHOLD = 0.40f;
+
+ private static volatile String LTR_CHARS =
+ "A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u0300-\u0590\u0800-\u1FFF" +
+ "\u2C00-\uFB1C\uFDFE-\uFE6F\uFEFD-\uFFFF";
+ private static volatile String RTL_CHARS =
+ "\u0591-\u07FF\uFB1D-\uFDFD\uFE70-\uFEFC";
+
+ private static final Pattern WORD_SEPARATOR_RE = Pattern.compile("\\s+");
+ private static final Pattern FIRST_STRONG_IS_RTL_RE =
+ Pattern.compile("^[^" + LTR_CHARS + "]*[" + RTL_CHARS + ']');
+ private static final Pattern IS_REQUIRED_LTR_RE = Pattern.compile("^http://.*");
+ private static final Pattern HAS_ANY_LTR_RE =
+ Pattern.compile("[" + LTR_CHARS + ']');
+ private static final Pattern HAS_NUMERALS_RE = Pattern.compile("\\d");
+
+ private static boolean startsWithRtl(String str) {
+ return FIRST_STRONG_IS_RTL_RE.matcher(str).matches();
+ }
+ private static boolean hasAnyLtr(String str) {
+ return HAS_ANY_LTR_RE.matcher(str).matches();
+ }
+
+ static boolean isRTL(String str) {
+ int rtlCount = 0;
+ int total = 0;
+ boolean hasWeaklyLtr = false;
+ String[] tokens = WORD_SEPARATOR_RE.split(str);
+ for (int i = 0; i < tokens.length; i++) {
+ String token = tokens[i];
+ if (startsWithRtl(token)) {
+ rtlCount++;
+ total++;
+ } else if (IS_REQUIRED_LTR_RE.matcher(token).matches()) {
+ hasWeaklyLtr = true;
+ } else if (hasAnyLtr(token)) {
+ total++;
+ } else if (HAS_NUMERALS_RE.matcher(token).matches()) {
+ hasWeaklyLtr = true;
+ }
+ }
+
+ return total == 0 ? false : ((float) rtlCount / total > RTL_DETECTION_THRESHOLD ? true : false);
+ }
+}
diff --git a/src/main/java/com/commafeed/backend/feed/FeedUtils.java b/src/main/java/com/commafeed/backend/feed/FeedUtils.java
index daa1b986..e271f85d 100644
--- a/src/main/java/com/commafeed/backend/feed/FeedUtils.java
+++ b/src/main/java/com/commafeed/backend/feed/FeedUtils.java
@@ -33,8 +33,6 @@ import com.commafeed.backend.model.FeedEntry;
import com.commafeed.backend.model.FeedSubscription;
import com.commafeed.frontend.model.Entry;
import com.google.common.collect.Lists;
-import com.google.gwt.i18n.client.HasDirection.Direction;
-import com.google.gwt.i18n.shared.BidiUtils;
import com.steadystate.css.parser.CSSOMParser;
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
@@ -291,8 +289,7 @@ public class FeedUtils {
return false;
}
- Direction direction = BidiUtils.get().estimateDirection(text);
- return direction == Direction.RTL;
+ return EstimateDirection.isRTL(text);
}
public static String trimInvalidXmlCharacters(String xml) {
diff --git a/src/test/java/com/commafeed/backend/feed/EstimateDirectionTest.java b/src/test/java/com/commafeed/backend/feed/EstimateDirectionTest.java
new file mode 100644
index 00000000..1fe77b04
--- /dev/null
+++ b/src/test/java/com/commafeed/backend/feed/EstimateDirectionTest.java
@@ -0,0 +1,65 @@
+package com.commafeed.backend.feed;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import static com.commafeed.backend.feed.EstimateDirection.isRTL;
+
+/**
+ * These tests are copied and simplified from GWT
+ * https://github.com/google-web-toolkit/gwt/blob/master/user/test/com/google/gwt/i18n/shared/BidiUtilsTest.java
+ * Released under Apache 2.0 license, credit of it goes to Google and please use GWT wherever possible instead of this
+ */
+public class EstimateDirectionTest {
+
+ @Test
+ public void testEstimateDirection() {
+ Assert.assertEquals(false, isRTL(""));
+ Assert.assertEquals(false, isRTL(" "));
+ Assert.assertEquals(false, isRTL("! (...)"));
+ Assert.assertEquals(false, isRTL("Pure Ascii content"));
+ Assert.assertEquals(false, isRTL("-17.0%"));
+ Assert.assertEquals(false, isRTL("http://foo/bar/"));
+ Assert.assertEquals(false, isRTL("http://foo/bar/?s=\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0"
+ + "\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0"
+ + "\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0"));
+ Assert.assertEquals(true, isRTL("\u05d0"));
+ Assert.assertEquals(true, isRTL("\u05d0"));
+ Assert.assertEquals(true, isRTL("9 \u05d0 -> 17.5, 23, 45, 19"));
+ Assert.assertEquals(true, isRTL("http://foo/bar/ \u05d0 http://foo2/bar2/ http://foo3/bar3/"));
+ Assert.assertEquals(true, isRTL("\u05d0\u05d9\u05df \u05de\u05de\u05e9 "
+ + "\u05de\u05d4 \u05dc\u05e8\u05d0\u05d5\u05ea: "
+ + "\u05dc\u05d0 \u05e6\u05d9\u05dc\u05de\u05ea\u05d9 "
+ + "\u05d4\u05e8\u05d1\u05d4 \u05d5\u05d2\u05dd \u05d0"
+ + "\u05dd \u05d4\u05d9\u05d9\u05ea\u05d9 \u05de\u05e6\u05dc"
+ + "\u05dd, \u05d4\u05d9\u05d4 \u05e9\u05dd"));
+ Assert.assertEquals(true, isRTL("\u05db\u05d0\u05df - http://geek.co.il/gallery/v/2007-06"
+ + " - \u05d0\u05d9\u05df \u05de\u05de\u05e9 \u05de\u05d4 "
+ + "\u05dc\u05e8\u05d0\u05d5\u05ea: \u05dc\u05d0 \u05e6"
+ + "\u05d9\u05dc\u05de\u05ea\u05d9 \u05d4\u05e8\u05d1\u05d4 "
+ + "\u05d5\u05d2\u05dd \u05d0\u05dd \u05d4\u05d9\u05d9\u05ea"
+ + "\u05d9 \u05de\u05e6\u05dc\u05dd, \u05d4\u05d9\u05d4 "
+ + "\u05e9\u05dd \u05d1\u05e2\u05d9\u05e7\u05e8 \u05d4\u05e8"
+ + "\u05d1\u05d4 \u05d0\u05e0\u05e9\u05d9\u05dd. \u05de"
+ + "\u05d4 \u05e9\u05db\u05df - \u05d0\u05e4\u05e9\u05e8 "
+ + "\u05dc\u05e0\u05e6\u05dc \u05d0\u05ea \u05d4\u05d4 "
+ + "\u05d3\u05d6\u05de\u05e0\u05d5\u05ea \u05dc\u05d4\u05e1"
+ + "\u05ea\u05db\u05dc \u05e2\u05dc \u05db\u05de\u05d4 "
+ + "\u05ea\u05de\u05d5\u05e0\u05d5\u05ea \u05de\u05e9\u05e2"
+ + "\u05e9\u05e2\u05d5\u05ea \u05d9\u05e9\u05e0\u05d5\u05ea "
+ + "\u05d9\u05d5\u05ea\u05e8 \u05e9\u05d9\u05e9 \u05dc"
+ + "\u05d9 \u05d1\u05d0\u05ea\u05e8"));
+ Assert.assertEquals(true, isRTL("CAPTCHA \u05de\u05e9\u05d5\u05db\u05dc\u05dc "
+ + "\u05de\u05d3\u05d9?"));
+ Assert.assertEquals(true, isRTL("Yes Prime Minister \u05e2\u05d3\u05db\u05d5\u05df. "
+ + "\u05e9\u05d0\u05dc\u05d5 \u05d0\u05d5\u05ea\u05d9 "
+ + "\u05de\u05d4 \u05d0\u05e0\u05d9 \u05e8\u05d5\u05e6"
+ + "\u05d4 \u05de\u05ea\u05e0\u05d4 \u05dc\u05d7\u05d2"));
+ Assert.assertEquals(true, isRTL("17.4.02 \u05e9\u05e2\u05d4:13-20 .15-00 .\u05dc\u05d0 "
+ + "\u05d4\u05d9\u05d9\u05ea\u05d9 \u05db\u05d0\u05df."));
+ Assert.assertEquals(true, isRTL("5710 5720 5730. \u05d4\u05d3\u05dc\u05ea. "
+ + "\u05d4\u05e0\u05e9\u05d9\u05e7\u05d4"));
+ Assert.assertEquals(true, isRTL("\u05d4\u05d3\u05dc\u05ea http://www.google.com "
+ + "http://www.gmail.com"));
+ }
+}
\ No newline at end of file