Avoid GWT depedency by bringing simplified dir estimate logic

This commit is contained in:
Ebrahim Byagowi
2014-09-11 17:47:33 +04:30
parent ffa51406b6
commit a52b5fd711
4 changed files with 121 additions and 9 deletions

View File

@@ -0,0 +1,55 @@
package com.commafeed.backend.feed;
import java.util.regex.Pattern;
/**
* This code is copied and simplified from GWT
* https://github.com/google-web-toolkit/gwt/blob/master/user/src/com/google/gwt/i18n/shared/BidiUtils.java
* Released under Apache 2.0 license, credit of it goes to Google and please use GWT wherever possible instead of this
*/
class EstimateDirection {
private static final float RTL_DETECTION_THRESHOLD = 0.40f;
private static volatile String LTR_CHARS =
"A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u0300-\u0590\u0800-\u1FFF" +
"\u2C00-\uFB1C\uFDFE-\uFE6F\uFEFD-\uFFFF";
private static volatile String RTL_CHARS =
"\u0591-\u07FF\uFB1D-\uFDFD\uFE70-\uFEFC";
private static final Pattern WORD_SEPARATOR_RE = Pattern.compile("\\s+");
private static final Pattern FIRST_STRONG_IS_RTL_RE =
Pattern.compile("^[^" + LTR_CHARS + "]*[" + RTL_CHARS + ']');
private static final Pattern IS_REQUIRED_LTR_RE = Pattern.compile("^http://.*");
private static final Pattern HAS_ANY_LTR_RE =
Pattern.compile("[" + LTR_CHARS + ']');
private static final Pattern HAS_NUMERALS_RE = Pattern.compile("\\d");
private static boolean startsWithRtl(String str) {
return FIRST_STRONG_IS_RTL_RE.matcher(str).matches();
}
private static boolean hasAnyLtr(String str) {
return HAS_ANY_LTR_RE.matcher(str).matches();
}
static boolean isRTL(String str) {
int rtlCount = 0;
int total = 0;
boolean hasWeaklyLtr = false;
String[] tokens = WORD_SEPARATOR_RE.split(str);
for (int i = 0; i < tokens.length; i++) {
String token = tokens[i];
if (startsWithRtl(token)) {
rtlCount++;
total++;
} else if (IS_REQUIRED_LTR_RE.matcher(token).matches()) {
hasWeaklyLtr = true;
} else if (hasAnyLtr(token)) {
total++;
} else if (HAS_NUMERALS_RE.matcher(token).matches()) {
hasWeaklyLtr = true;
}
}
return total == 0 ? false : ((float) rtlCount / total > RTL_DETECTION_THRESHOLD ? true : false);
}
}

View File

@@ -33,8 +33,6 @@ import com.commafeed.backend.model.FeedEntry;
import com.commafeed.backend.model.FeedSubscription;
import com.commafeed.frontend.model.Entry;
import com.google.common.collect.Lists;
import com.google.gwt.i18n.client.HasDirection.Direction;
import com.google.gwt.i18n.shared.BidiUtils;
import com.steadystate.css.parser.CSSOMParser;
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
@@ -291,8 +289,7 @@ public class FeedUtils {
return false;
}
Direction direction = BidiUtils.get().estimateDirection(text);
return direction == Direction.RTL;
return EstimateDirection.isRTL(text);
}
public static String trimInvalidXmlCharacters(String xml) {