mirror of
https://github.com/Athou/commafeed.git
synced 2026-03-21 21:37:29 +00:00
Avoid GWT depedency by bringing simplified dir estimate logic
This commit is contained in:
@@ -0,0 +1,55 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* This code is copied and simplified from GWT
|
||||
* https://github.com/google-web-toolkit/gwt/blob/master/user/src/com/google/gwt/i18n/shared/BidiUtils.java
|
||||
* Released under Apache 2.0 license, credit of it goes to Google and please use GWT wherever possible instead of this
|
||||
*/
|
||||
class EstimateDirection {
|
||||
private static final float RTL_DETECTION_THRESHOLD = 0.40f;
|
||||
|
||||
private static volatile String LTR_CHARS =
|
||||
"A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u0300-\u0590\u0800-\u1FFF" +
|
||||
"\u2C00-\uFB1C\uFDFE-\uFE6F\uFEFD-\uFFFF";
|
||||
private static volatile String RTL_CHARS =
|
||||
"\u0591-\u07FF\uFB1D-\uFDFD\uFE70-\uFEFC";
|
||||
|
||||
private static final Pattern WORD_SEPARATOR_RE = Pattern.compile("\\s+");
|
||||
private static final Pattern FIRST_STRONG_IS_RTL_RE =
|
||||
Pattern.compile("^[^" + LTR_CHARS + "]*[" + RTL_CHARS + ']');
|
||||
private static final Pattern IS_REQUIRED_LTR_RE = Pattern.compile("^http://.*");
|
||||
private static final Pattern HAS_ANY_LTR_RE =
|
||||
Pattern.compile("[" + LTR_CHARS + ']');
|
||||
private static final Pattern HAS_NUMERALS_RE = Pattern.compile("\\d");
|
||||
|
||||
private static boolean startsWithRtl(String str) {
|
||||
return FIRST_STRONG_IS_RTL_RE.matcher(str).matches();
|
||||
}
|
||||
private static boolean hasAnyLtr(String str) {
|
||||
return HAS_ANY_LTR_RE.matcher(str).matches();
|
||||
}
|
||||
|
||||
static boolean isRTL(String str) {
|
||||
int rtlCount = 0;
|
||||
int total = 0;
|
||||
boolean hasWeaklyLtr = false;
|
||||
String[] tokens = WORD_SEPARATOR_RE.split(str);
|
||||
for (int i = 0; i < tokens.length; i++) {
|
||||
String token = tokens[i];
|
||||
if (startsWithRtl(token)) {
|
||||
rtlCount++;
|
||||
total++;
|
||||
} else if (IS_REQUIRED_LTR_RE.matcher(token).matches()) {
|
||||
hasWeaklyLtr = true;
|
||||
} else if (hasAnyLtr(token)) {
|
||||
total++;
|
||||
} else if (HAS_NUMERALS_RE.matcher(token).matches()) {
|
||||
hasWeaklyLtr = true;
|
||||
}
|
||||
}
|
||||
|
||||
return total == 0 ? false : ((float) rtlCount / total > RTL_DETECTION_THRESHOLD ? true : false);
|
||||
}
|
||||
}
|
||||
@@ -33,8 +33,6 @@ import com.commafeed.backend.model.FeedEntry;
|
||||
import com.commafeed.backend.model.FeedSubscription;
|
||||
import com.commafeed.frontend.model.Entry;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.gwt.i18n.client.HasDirection.Direction;
|
||||
import com.google.gwt.i18n.shared.BidiUtils;
|
||||
import com.steadystate.css.parser.CSSOMParser;
|
||||
|
||||
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
|
||||
@@ -291,8 +289,7 @@ public class FeedUtils {
|
||||
return false;
|
||||
}
|
||||
|
||||
Direction direction = BidiUtils.get().estimateDirection(text);
|
||||
return direction == Direction.RTL;
|
||||
return EstimateDirection.isRTL(text);
|
||||
}
|
||||
|
||||
public static String trimInvalidXmlCharacters(String xml) {
|
||||
|
||||
Reference in New Issue
Block a user