diff --git a/commafeed-server/pom.xml b/commafeed-server/pom.xml
index ab861694..5dae894e 100644
--- a/commafeed-server/pom.xml
+++ b/commafeed-server/pom.xml
@@ -447,11 +447,6 @@
urlcanon
0.4.0
-
- org.gwtproject
- gwt-servlet
- 2.11.0
-
org.apache.httpcomponents.client5
httpclient5
diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java
index 3a59eff3..7c23e734 100644
--- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java
+++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java
@@ -16,11 +16,10 @@ import org.netpreserve.urlcanon.Canonicalizer;
import org.netpreserve.urlcanon.ParsedUrl;
import com.commafeed.backend.feed.FeedEntryKeyword.Mode;
+import com.commafeed.backend.feed.parser.TextDirectionDetector;
import com.commafeed.backend.model.FeedEntry;
import com.commafeed.backend.model.FeedSubscription;
import com.commafeed.frontend.model.Entry;
-import com.google.gwt.i18n.client.HasDirection.Direction;
-import com.google.gwt.i18n.shared.BidiUtils;
import lombok.extern.slf4j.Slf4j;
@@ -109,8 +108,8 @@ public class FeedUtils {
return false;
}
- Direction direction = BidiUtils.get().estimateDirection(text);
- return direction == Direction.RTL;
+ TextDirectionDetector.Direction direction = TextDirectionDetector.detect(text);
+ return direction == TextDirectionDetector.Direction.RIGHT_TO_LEFT;
}
public static String removeTrailingSlash(String url) {
diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/TextDirectionDetector.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/TextDirectionDetector.java
new file mode 100644
index 00000000..b2a482a9
--- /dev/null
+++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/TextDirectionDetector.java
@@ -0,0 +1,57 @@
+package com.commafeed.backend.feed.parser;
+
+import java.text.Bidi;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang3.math.NumberUtils;
+
+public class TextDirectionDetector {
+
+ private static final Pattern WORDS_PATTERN = Pattern.compile("\\s+");
+ private static final Pattern URL_PATTERN = Pattern.compile("^https?://.*");
+
+ private static final double RTL_THRESHOLD = 0.4D;
+
+ public enum Direction {
+ LEFT_TO_RIGHT, RIGHT_TO_LEFT
+ }
+
+ public static Direction detect(String input) {
+ if (input == null || input.isBlank()) {
+ return Direction.LEFT_TO_RIGHT;
+ }
+
+ AtomicLong rtl = new AtomicLong();
+ AtomicLong total = new AtomicLong();
+ for (String token : WORDS_PATTERN.split(input)) {
+ // skip urls
+ if (URL_PATTERN.matcher(token).matches()) {
+ continue;
+ }
+
+ // skip numbers
+ if (NumberUtils.isCreatable(token)) {
+ continue;
+ }
+
+ boolean requiresBidi = Bidi.requiresBidi(token.toCharArray(), 0, token.length());
+ if (requiresBidi) {
+ Bidi bidi = new Bidi(token, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT);
+ if (bidi.getBaseLevel() == 1) {
+ rtl.incrementAndGet();
+ }
+ }
+
+ total.incrementAndGet();
+ }
+
+ if (total.longValue() == 0) {
+ return Direction.LEFT_TO_RIGHT;
+ }
+
+ double ratio = rtl.doubleValue() / total.doubleValue();
+ return ratio > RTL_THRESHOLD ? Direction.RIGHT_TO_LEFT : Direction.LEFT_TO_RIGHT;
+ }
+
+}
diff --git a/commafeed-server/src/test/java/com/commafeed/backend/feed/FeedFetcherTest.java b/commafeed-server/src/test/java/com/commafeed/backend/feed/FeedFetcherTest.java
index 089f6e3c..69f4f395 100644
--- a/commafeed-server/src/test/java/com/commafeed/backend/feed/FeedFetcherTest.java
+++ b/commafeed-server/src/test/java/com/commafeed/backend/feed/FeedFetcherTest.java
@@ -11,12 +11,12 @@ import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
+import com.commafeed.backend.Digests;
import com.commafeed.backend.HttpGetter;
import com.commafeed.backend.HttpGetter.HttpResult;
import com.commafeed.backend.HttpGetter.NotModifiedException;
import com.commafeed.backend.feed.parser.FeedParser;
import com.commafeed.backend.urlprovider.FeedURLProvider;
-import com.google.gwt.thirdparty.guava.common.hash.Hashing;
@ExtendWith(MockitoExtension.class)
class FeedFetcherTest {
@@ -43,7 +43,7 @@ class FeedFetcherTest {
String lastModified = "last-modified-1";
String etag = "etag-1";
byte[] content = "content".getBytes();
- String lastContentHash = Hashing.sha1().hashBytes(content).toString();
+ String lastContentHash = Digests.sha1Hex(content);
Mockito.when(getter.getBinary(url, lastModified, etag))
.thenReturn(new HttpResult(content, "content-type", "last-modified-2", "etag-2", null));
diff --git a/commafeed-server/src/test/java/com/commafeed/backend/feed/parser/TextDirectionDetectorTest.java b/commafeed-server/src/test/java/com/commafeed/backend/feed/parser/TextDirectionDetectorTest.java
new file mode 100644
index 00000000..8e27a06f
--- /dev/null
+++ b/commafeed-server/src/test/java/com/commafeed/backend/feed/parser/TextDirectionDetectorTest.java
@@ -0,0 +1,53 @@
+package com.commafeed.backend.feed.parser;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+class TextDirectionDetectorTest {
+
+ @Test
+ public void testEstimateDirection() {
+ Assertions.assertEquals(TextDirectionDetector.Direction.LEFT_TO_RIGHT, TextDirectionDetector.detect(""));
+ Assertions.assertEquals(TextDirectionDetector.Direction.LEFT_TO_RIGHT, TextDirectionDetector.detect(" "));
+ Assertions.assertEquals(TextDirectionDetector.Direction.LEFT_TO_RIGHT, TextDirectionDetector.detect("! (...)"));
+ Assertions.assertEquals(TextDirectionDetector.Direction.LEFT_TO_RIGHT, TextDirectionDetector.detect("Pure Ascii content"));
+ Assertions.assertEquals(TextDirectionDetector.Direction.LEFT_TO_RIGHT, TextDirectionDetector.detect("-17.0%"));
+ Assertions.assertEquals(TextDirectionDetector.Direction.LEFT_TO_RIGHT, TextDirectionDetector.detect("http://foo/bar/"));
+ Assertions.assertEquals(TextDirectionDetector.Direction.LEFT_TO_RIGHT,
+ TextDirectionDetector.detect("http://foo/bar/?s=\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0"
+ + "\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0" + "\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0"));
+ Assertions.assertEquals(TextDirectionDetector.Direction.RIGHT_TO_LEFT, TextDirectionDetector.detect("\u05d0"));
+ Assertions.assertEquals(TextDirectionDetector.Direction.RIGHT_TO_LEFT, TextDirectionDetector.detect("\u05d0"));
+ Assertions.assertEquals(TextDirectionDetector.Direction.RIGHT_TO_LEFT,
+ TextDirectionDetector.detect("http://foo/bar/ \u05d0 http://foo2/bar2/ http://foo3/bar3/"));
+ Assertions.assertEquals(TextDirectionDetector.Direction.RIGHT_TO_LEFT,
+ TextDirectionDetector.detect("\u05d0\u05d9\u05df \u05de\u05de\u05e9 " + "\u05de\u05d4 \u05dc\u05e8\u05d0\u05d5\u05ea: "
+ + "\u05dc\u05d0 \u05e6\u05d9\u05dc\u05de\u05ea\u05d9 " + "\u05d4\u05e8\u05d1\u05d4 \u05d5\u05d2\u05dd \u05d0"
+ + "\u05dd \u05d4\u05d9\u05d9\u05ea\u05d9 \u05de\u05e6\u05dc" + "\u05dd, \u05d4\u05d9\u05d4 \u05e9\u05dd"));
+ Assertions.assertEquals(TextDirectionDetector.Direction.RIGHT_TO_LEFT,
+ TextDirectionDetector.detect("\u05db\u05d0\u05df - http://geek.co.il/gallery/v/2007-06"
+ + " - \u05d0\u05d9\u05df \u05de\u05de\u05e9 \u05de\u05d4 " + "\u05dc\u05e8\u05d0\u05d5\u05ea: \u05dc\u05d0 \u05e6"
+ + "\u05d9\u05dc\u05de\u05ea\u05d9 \u05d4\u05e8\u05d1\u05d4 "
+ + "\u05d5\u05d2\u05dd \u05d0\u05dd \u05d4\u05d9\u05d9\u05ea"
+ + "\u05d9 \u05de\u05e6\u05dc\u05dd, \u05d4\u05d9\u05d4 "
+ + "\u05e9\u05dd \u05d1\u05e2\u05d9\u05e7\u05e8 \u05d4\u05e8" + "\u05d1\u05d4 \u05d0\u05e0\u05e9\u05d9\u05dd. \u05de"
+ + "\u05d4 \u05e9\u05db\u05df - \u05d0\u05e4\u05e9\u05e8 " + "\u05dc\u05e0\u05e6\u05dc \u05d0\u05ea \u05d4\u05d4 "
+ + "\u05d3\u05d6\u05de\u05e0\u05d5\u05ea \u05dc\u05d4\u05e1" + "\u05ea\u05db\u05dc \u05e2\u05dc \u05db\u05de\u05d4 "
+ + "\u05ea\u05de\u05d5\u05e0\u05d5\u05ea \u05de\u05e9\u05e2"
+ + "\u05e9\u05e2\u05d5\u05ea \u05d9\u05e9\u05e0\u05d5\u05ea " + "\u05d9\u05d5\u05ea\u05e8 \u05e9\u05d9\u05e9 \u05dc"
+ + "\u05d9 \u05d1\u05d0\u05ea\u05e8"));
+ Assertions.assertEquals(TextDirectionDetector.Direction.RIGHT_TO_LEFT,
+ TextDirectionDetector.detect("CAPTCHA \u05de\u05e9\u05d5\u05db\u05dc\u05dc " + "\u05de\u05d3\u05d9?"));
+ Assertions.assertEquals(TextDirectionDetector.Direction.RIGHT_TO_LEFT,
+ TextDirectionDetector.detect("Yes Prime Minister \u05e2\u05d3\u05db\u05d5\u05df. "
+ + "\u05e9\u05d0\u05dc\u05d5 \u05d0\u05d5\u05ea\u05d9 " + "\u05de\u05d4 \u05d0\u05e0\u05d9 \u05e8\u05d5\u05e6"
+ + "\u05d4 \u05de\u05ea\u05e0\u05d4 \u05dc\u05d7\u05d2"));
+ Assertions.assertEquals(TextDirectionDetector.Direction.RIGHT_TO_LEFT, TextDirectionDetector
+ .detect("17.4.02 \u05e9\u05e2\u05d4:13-20 .15-00 .\u05dc\u05d0 " + "\u05d4\u05d9\u05d9\u05ea\u05d9 \u05db\u05d0\u05df."));
+ Assertions.assertEquals(TextDirectionDetector.Direction.RIGHT_TO_LEFT,
+ TextDirectionDetector.detect("5710 5720 5730. \u05d4\u05d3\u05dc\u05ea. " + "\u05d4\u05e0\u05e9\u05d9\u05e7\u05d4"));
+ Assertions.assertEquals(TextDirectionDetector.Direction.RIGHT_TO_LEFT,
+ TextDirectionDetector.detect("\u05d4\u05d3\u05dc\u05ea http://www.google.com " + "http://www.gmail.com"));
+ }
+
+}
\ No newline at end of file