diff --git a/pom.xml b/pom.xml index e02a915d..adbef93b 100644 --- a/pom.xml +++ b/pom.xml @@ -466,6 +466,16 @@ cssparser 0.9.29 + + edu.uci.ics + crawler4j + 3.5 + + + com.google.gwt + gwt-servlet + 2.9.0 + com.google.apis diff --git a/src/main/java/com/commafeed/backend/feed/EstimateDirection.java b/src/main/java/com/commafeed/backend/feed/EstimateDirection.java deleted file mode 100644 index 701f6068..00000000 --- a/src/main/java/com/commafeed/backend/feed/EstimateDirection.java +++ /dev/null @@ -1,48 +0,0 @@ -package com.commafeed.backend.feed; - -import java.util.regex.Pattern; - -/** - * This code is copied and simplified from GWT - * https://github.com/google-web-toolkit/gwt/blob/master/user/src/com/google/gwt/i18n/shared/BidiUtils.java Released under Apache 2.0 - * license, credit of it goes to Google and please use GWT wherever possible instead of this - */ -class EstimateDirection { - private static final float RTL_DETECTION_THRESHOLD = 0.40f; - - private static final String LTR_CHARS = "A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u0300-\u0590\u0800-\u1FFF" - + "\u2C00-\uFB1C\uFDFE-\uFE6F\uFEFD-\uFFFF"; - private static final String RTL_CHARS = "\u0591-\u07FF\uFB1D-\uFDFD\uFE70-\uFEFC"; - - private static final Pattern WORD_SEPARATOR_RE = Pattern.compile("\\s+"); - private static final Pattern FIRST_STRONG_IS_RTL_RE = Pattern.compile("^[^" + LTR_CHARS + "]*[" + RTL_CHARS + ']'); - private static final Pattern IS_REQUIRED_LTR_RE = Pattern.compile("^http://.*"); - private static final Pattern HAS_ANY_LTR_RE = Pattern.compile("[" + LTR_CHARS + ']'); - - private static boolean startsWithRtl(String str) { - return FIRST_STRONG_IS_RTL_RE.matcher(str).matches(); - } - - private static boolean hasAnyLtr(String str) { - return HAS_ANY_LTR_RE.matcher(str).matches(); - } - - static boolean isRTL(String str) { - int rtlCount = 0; - int total = 0; - String[] tokens = WORD_SEPARATOR_RE.split(str, 20); // limit splits to 20, usually enough - for (int i = 0; i < tokens.length; i++) { - String token = tokens[i]; - if (startsWithRtl(token)) { - rtlCount++; - total++; - } else if (IS_REQUIRED_LTR_RE.matcher(token).matches()) { - // do nothing - } else if (hasAnyLtr(token)) { - total++; - } - } - - return total == 0 ? false : ((float) rtlCount / total > RTL_DETECTION_THRESHOLD ? true : false); - } -} diff --git a/src/main/java/com/commafeed/backend/feed/FeedUtils.java b/src/main/java/com/commafeed/backend/feed/FeedUtils.java index 5916b52c..f73a0826 100644 --- a/src/main/java/com/commafeed/backend/feed/FeedUtils.java +++ b/src/main/java/com/commafeed/backend/feed/FeedUtils.java @@ -37,6 +37,8 @@ import com.commafeed.backend.feed.FeedEntryKeyword.Mode; import com.commafeed.backend.model.FeedEntry; import com.commafeed.backend.model.FeedSubscription; import com.commafeed.frontend.model.Entry; +import com.google.gwt.i18n.client.HasDirection.Direction; +import com.google.gwt.i18n.shared.BidiUtils; import com.ibm.icu.text.CharsetDetector; import com.ibm.icu.text.CharsetMatch; import com.steadystate.css.parser.CSSOMParser; @@ -326,7 +328,8 @@ public class FeedUtils { return false; } - return EstimateDirection.isRTL(text); + Direction direction = BidiUtils.get().estimateDirection(text); + return direction == Direction.RTL; } public static String trimInvalidXmlCharacters(String xml) { diff --git a/src/main/java/edu/uci/ics/crawler4j/url/URLCanonicalizer.java b/src/main/java/edu/uci/ics/crawler4j/url/URLCanonicalizer.java deleted file mode 100644 index aaa57daf..00000000 --- a/src/main/java/edu/uci/ics/crawler4j/url/URLCanonicalizer.java +++ /dev/null @@ -1,211 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package edu.uci.ics.crawler4j.url; - -import java.net.MalformedURLException; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URL; -import java.net.URLDecoder; -import java.net.URLEncoder; -import java.util.HashMap; -import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; - -import org.apache.commons.lang3.StringUtils; - -/** - * See http://en.wikipedia.org/wiki/URL_normalization for a reference Note: some parts of the code are adapted from: - * http://stackoverflow.com/a/4057470/405418 - * - * @author Yasser Ganjisaffar - */ -public class URLCanonicalizer { - - public static String getCanonicalURL(String url) { - return getCanonicalURL(url, null); - } - - public static String getCanonicalURL(String href, String context) { - - try { - URL canonicalURL = new URL(UrlResolver.resolveUrl(context == null ? "" : context, href)); - - String host = canonicalURL.getHost().toLowerCase(); - if (StringUtils.isBlank(host)) { - // This is an invalid Url. - return null; - } - - String path = canonicalURL.getPath(); - - /* - * Normalize: no empty segments (i.e., "//"), no segments equal to - * ".", and no segments equal to ".." that are preceded by a segment - * not equal to "..". - */ - path = new URI(path).normalize().toString(); - - /* - * Convert '//' -> '/' - */ - int idx = path.indexOf("//"); - while (idx >= 0) { - path = path.replace("//", "/"); - idx = path.indexOf("//"); - } - - /* - * Drop starting '/../' - */ - while (path.startsWith("/../")) { - path = path.substring(3); - } - - /* - * Trim - */ - path = path.trim(); - - final SortedMap params = createParameterMap(canonicalURL.getQuery()); - final String queryString; - - if (params != null && params.size() > 0) { - String canonicalParams = canonicalize(params); - queryString = (canonicalParams.isEmpty() ? "" : "?" + canonicalParams); - } else { - queryString = ""; - } - - /* - * Add starting slash if needed - */ - if (path.length() == 0) { - path = "/" + path; - } - - /* - * Drop default port: example.com:80 -> example.com - */ - int port = canonicalURL.getPort(); - if (port == canonicalURL.getDefaultPort()) { - port = -1; - } - - String protocol = canonicalURL.getProtocol().toLowerCase(); - String pathAndQueryString = normalizePath(path) + queryString; - - URL result = new URL(protocol, host, port, pathAndQueryString); - return result.toExternalForm(); - - } catch (MalformedURLException ex) { - return null; - } catch (URISyntaxException ex) { - return null; - } - } - - /** - * Takes a query string, separates the constituent name-value pairs, and stores them in a SortedMap ordered by lexicographical order. - * - * @return Null if there is no query string. - */ - private static SortedMap createParameterMap(final String queryString) { - if (queryString == null || queryString.isEmpty()) { - return null; - } - - final String[] pairs = queryString.split("&"); - final Map params = new HashMap(pairs.length); - - for (final String pair : pairs) { - if (pair.length() == 0) { - continue; - } - - String[] tokens = pair.split("=", 2); - switch (tokens.length) { - case 1: - if (pair.charAt(0) == '=') { - params.put("", tokens[0]); - } else { - params.put(tokens[0], ""); - } - break; - case 2: - params.put(tokens[0], tokens[1]); - break; - } - } - return new TreeMap(params); - } - - /** - * Canonicalize the query string. - * - * @param sortedParamMap - * Parameter name-value pairs in lexicographical order. - * @return Canonical form of query string. - */ - private static String canonicalize(final SortedMap sortedParamMap) { - if (sortedParamMap == null || sortedParamMap.isEmpty()) { - return ""; - } - - final StringBuilder sb = new StringBuilder(100); - for (Map.Entry pair : sortedParamMap.entrySet()) { - final String key = pair.getKey().toLowerCase(); - if (key.equals("jsessionid") || key.equals("phpsessid") || key.equals("aspsessionid")) { - continue; - } - if (sb.length() > 0) { - sb.append('&'); - } - sb.append(percentEncodeRfc3986(pair.getKey())); - if (!pair.getValue().isEmpty()) { - sb.append('='); - sb.append(percentEncodeRfc3986(pair.getValue())); - } - } - return sb.toString(); - } - - /** - * Percent-encode values according the RFC 3986. The built-in Java URLEncoder does not encode according to the RFC, so we make the extra - * replacements. - * - * @param string - * Decoded string. - * @return Encoded string per RFC 3986. - */ - private static String percentEncodeRfc3986(String string) { - try { - string = string.replace("+", "%2B"); - string = URLDecoder.decode(string, "UTF-8"); - string = URLEncoder.encode(string, "UTF-8"); - return string.replace("+", "%20").replace("*", "%2A").replace("%7E", "~"); - } catch (Exception e) { - return string; - } - } - - private static String normalizePath(final String path) { - return path.replace("%7E", "~").replace(" ", "%20"); - } -} diff --git a/src/main/java/edu/uci/ics/crawler4j/url/UrlResolver.java b/src/main/java/edu/uci/ics/crawler4j/url/UrlResolver.java deleted file mode 100644 index 5a6f65a7..00000000 --- a/src/main/java/edu/uci/ics/crawler4j/url/UrlResolver.java +++ /dev/null @@ -1,462 +0,0 @@ -/** - * This class is adopted from Htmlunit with the following copyright: - * - * Copyright (c) 2002-2012 Gargoyle Software Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package edu.uci.ics.crawler4j.url; - - -public final class UrlResolver { - - /** - * Resolves a given relative URL against a base URL. See - * RFC1808 - * Section 4 for more details. - * - * @param baseUrl The base URL in which to resolve the specification. - * @param relativeUrl The relative URL to resolve against the base URL. - * @return the resolved specification. - */ - public static String resolveUrl(final String baseUrl, final String relativeUrl) { - if (baseUrl == null) { - throw new IllegalArgumentException("Base URL must not be null"); - } - if (relativeUrl == null) { - throw new IllegalArgumentException("Relative URL must not be null"); - } - final Url url = resolveUrl(parseUrl(baseUrl.trim()), relativeUrl.trim()); - - return url.toString(); - } - - /** - * Returns the index within the specified string of the first occurrence of - * the specified search character. - * - * @param s the string to search - * @param searchChar the character to search for - * @param beginIndex the index at which to start the search - * @param endIndex the index at which to stop the search - * @return the index of the first occurrence of the character in the string or -1 - */ - private static int indexOf(final String s, final char searchChar, final int beginIndex, final int endIndex) { - for (int i = beginIndex; i < endIndex; i++) { - if (s.charAt(i) == searchChar) { - return i; - } - } - return -1; - } - - /** - * Parses a given specification using the algorithm depicted in - * RFC1808: - * - * Section 2.4: Parsing a URL - * - * An accepted method for parsing URLs is useful to clarify the - * generic-RL syntax of Section 2.2 and to describe the algorithm for - * resolving relative URLs presented in Section 4. This section - * describes the parsing rules for breaking down a URL (relative or - * absolute) into the component parts described in Section 2.1. The - * rules assume that the URL has already been separated from any - * surrounding text and copied to a "parse string". The rules are - * listed in the order in which they would be applied by the parser. - * - * @param spec The specification to parse. - * @return the parsed specification. - */ - private static Url parseUrl(final String spec) { - final Url url = new Url(); - int startIndex = 0; - int endIndex = spec.length(); - - // Section 2.4.1: Parsing the Fragment Identifier - // - // If the parse string contains a crosshatch "#" character, then the - // substring after the first (left-most) crosshatch "#" and up to the - // end of the parse string is the identifier. If the - // crosshatch is the last character, or no crosshatch is present, then - // the fragment identifier is empty. The matched substring, including - // the crosshatch character, is removed from the parse string before - // continuing. - // - // Note that the fragment identifier is not considered part of the URL. - // However, since it is often attached to the URL, parsers must be able - // to recognize and set aside fragment identifiers as part of the - // process. - final int crosshatchIndex = indexOf(spec, '#', startIndex, endIndex); - - if (crosshatchIndex >= 0) { - url.fragment_ = spec.substring(crosshatchIndex + 1, endIndex); - endIndex = crosshatchIndex; - } - // Section 2.4.2: Parsing the Scheme - // - // If the parse string contains a colon ":" after the first character - // and before any characters not allowed as part of a scheme name (i.e., - // any not an alphanumeric, plus "+", period ".", or hyphen "-"), the - // of the URL is the substring of characters up to but not - // including the first colon. These characters and the colon are then - // removed from the parse string before continuing. - final int colonIndex = indexOf(spec, ':', startIndex, endIndex); - - if (colonIndex > 0) { - final String scheme = spec.substring(startIndex, colonIndex); - if (isValidScheme(scheme)) { - url.scheme_ = scheme; - startIndex = colonIndex + 1; - } - } - // Section 2.4.3: Parsing the Network Location/Login - // - // If the parse string begins with a double-slash "//", then the - // substring of characters after the double-slash and up to, but not - // including, the next slash "/" character is the network location/login - // () of the URL. If no trailing slash "/" is present, the - // entire remaining parse string is assigned to . The double- - // slash and are removed from the parse string before - // continuing. - // - // Note: We also accept a question mark "?" or a semicolon ";" character as - // delimiters for the network location/login () of the URL. - final int locationStartIndex; - int locationEndIndex; - - if (spec.startsWith("//", startIndex)) { - locationStartIndex = startIndex + 2; - locationEndIndex = indexOf(spec, '/', locationStartIndex, endIndex); - if (locationEndIndex >= 0) { - startIndex = locationEndIndex; - } - } - else { - locationStartIndex = -1; - locationEndIndex = -1; - } - // Section 2.4.4: Parsing the Query Information - // - // If the parse string contains a question mark "?" character, then the - // substring after the first (left-most) question mark "?" and up to the - // end of the parse string is the information. If the question - // mark is the last character, or no question mark is present, then the - // query information is empty. The matched substring, including the - // question mark character, is removed from the parse string before - // continuing. - final int questionMarkIndex = indexOf(spec, '?', startIndex, endIndex); - - if (questionMarkIndex >= 0) { - if ((locationStartIndex >= 0) && (locationEndIndex < 0)) { - // The substring of characters after the double-slash and up to, but not - // including, the question mark "?" character is the network location/login - // () of the URL. - locationEndIndex = questionMarkIndex; - startIndex = questionMarkIndex; - } - url.query_ = spec.substring(questionMarkIndex + 1, endIndex); - endIndex = questionMarkIndex; - } - // Section 2.4.5: Parsing the Parameters - // - // If the parse string contains a semicolon ";" character, then the - // substring after the first (left-most) semicolon ";" and up to the end - // of the parse string is the parameters (). If the semicolon - // is the last character, or no semicolon is present, then is - // empty. The matched substring, including the semicolon character, is - // removed from the parse string before continuing. - final int semicolonIndex = indexOf(spec, ';', startIndex, endIndex); - - if (semicolonIndex >= 0) { - if ((locationStartIndex >= 0) && (locationEndIndex < 0)) { - // The substring of characters after the double-slash and up to, but not - // including, the semicolon ";" character is the network location/login - // () of the URL. - locationEndIndex = semicolonIndex; - startIndex = semicolonIndex; - } - url.parameters_ = spec.substring(semicolonIndex + 1, endIndex); - endIndex = semicolonIndex; - } - // Section 2.4.6: Parsing the Path - // - // After the above steps, all that is left of the parse string is the - // URL and the slash "/" that may precede it. Even though the - // initial slash is not part of the URL path, the parser must remember - // whether or not it was present so that later processes can - // differentiate between relative and absolute paths. Often this is - // done by simply storing the preceding slash along with the path. - if ((locationStartIndex >= 0) && (locationEndIndex < 0)) { - // The entire remaining parse string is assigned to the network - // location/login () of the URL. - locationEndIndex = endIndex; - } - else if (startIndex < endIndex) { - url.path_ = spec.substring(startIndex, endIndex); - } - // Set the network location/login () of the URL. - if ((locationStartIndex >= 0) && (locationEndIndex >= 0)) { - url.location_ = spec.substring(locationStartIndex, locationEndIndex); - } - return url; - } - - /* - * Returns true if specified string is a valid scheme name. - */ - private static boolean isValidScheme(final String scheme) { - final int length = scheme.length(); - if (length < 1) { - return false; - } - char c = scheme.charAt(0); - if (!Character.isLetter(c)) { - return false; - } - for (int i = 1; i < length; i++) { - c = scheme.charAt(i); - if (!Character.isLetterOrDigit(c) && c != '.' && c != '+' && c != '-') { - return false; - } - } - return true; - } - - /** - * Resolves a given relative URL against a base URL using the algorithm - * depicted in RFC1808: - * - * Section 4: Resolving Relative URLs - * - * This section describes an example algorithm for resolving URLs within - * a context in which the URLs may be relative, such that the result is - * always a URL in absolute form. Although this algorithm cannot - * guarantee that the resulting URL will equal that intended by the - * original author, it does guarantee that any valid URL (relative or - * absolute) can be consistently transformed to an absolute form given a - * valid base URL. - * - * @param baseUrl The base URL in which to resolve the specification. - * @param relativeUrl The relative URL to resolve against the base URL. - * @return the resolved specification. - */ - private static Url resolveUrl(final Url baseUrl, final String relativeUrl) { - final Url url = parseUrl(relativeUrl); - // Step 1: The base URL is established according to the rules of - // Section 3. If the base URL is the empty string (unknown), - // the embedded URL is interpreted as an absolute URL and - // we are done. - if (baseUrl == null) { - return url; - } - // Step 2: Both the base and embedded URLs are parsed into their - // component parts as described in Section 2.4. - // a) If the embedded URL is entirely empty, it inherits the - // entire base URL (i.e., is set equal to the base URL) - // and we are done. - if (relativeUrl.length() == 0) { - return new Url(baseUrl); - } - // b) If the embedded URL starts with a scheme name, it is - // interpreted as an absolute URL and we are done. - if (url.scheme_ != null) { - return url; - } - // c) Otherwise, the embedded URL inherits the scheme of - // the base URL. - url.scheme_ = baseUrl.scheme_; - // Step 3: If the embedded URL's is non-empty, we skip to - // Step 7. Otherwise, the embedded URL inherits the - // (if any) of the base URL. - if (url.location_ != null) { - return url; - } - url.location_ = baseUrl.location_; - // Step 4: If the embedded URL path is preceded by a slash "/", the - // path is not relative and we skip to Step 7. - if ((url.path_ != null) && ((url.path_.length() > 0) && ('/' == url.path_.charAt(0)))) { - url.path_ = removeLeadingSlashPoints(url.path_); - return url; - } - // Step 5: If the embedded URL path is empty (and not preceded by a - // slash), then the embedded URL inherits the base URL path, - // and - if (url.path_ == null) { - url.path_ = baseUrl.path_; - // a) if the embedded URL's is non-empty, we skip to - // step 7; otherwise, it inherits the of the base - // URL (if any) and - if (url.parameters_ != null) { - return url; - } - url.parameters_ = baseUrl.parameters_; - // b) if the embedded URL's is non-empty, we skip to - // step 7; otherwise, it inherits the of the base - // URL (if any) and we skip to step 7. - if (url.query_ != null) { - return url; - } - url.query_ = baseUrl.query_; - return url; - } - // Step 6: The last segment of the base URL's path (anything - // following the rightmost slash "/", or the entire path if no - // slash is present) is removed and the embedded URL's path is - // appended in its place. The following operations are - // then applied, in order, to the new path: - final String basePath = baseUrl.path_; - String path = ""; - - if (basePath != null) { - final int lastSlashIndex = basePath.lastIndexOf('/'); - - if (lastSlashIndex >= 0) { - path = basePath.substring(0, lastSlashIndex + 1); - } - } - else { - path = "/"; - } - path = path.concat(url.path_); - // a) All occurrences of "./", where "." is a complete path - // segment, are removed. - int pathSegmentIndex; - - while ((pathSegmentIndex = path.indexOf("/./")) >= 0) { - path = path.substring(0, pathSegmentIndex + 1).concat(path.substring(pathSegmentIndex + 3)); - } - // b) If the path ends with "." as a complete path segment, - // that "." is removed. - if (path.endsWith("/.")) { - path = path.substring(0, path.length() - 1); - } - // c) All occurrences of "/../", where is a - // complete path segment not equal to "..", are removed. - // Removal of these path segments is performed iteratively, - // removing the leftmost matching pattern on each iteration, - // until no matching pattern remains. - while ((pathSegmentIndex = path.indexOf("/../")) > 0) { - final String pathSegment = path.substring(0, pathSegmentIndex); - final int slashIndex = pathSegment.lastIndexOf('/'); - - if (slashIndex < 0) { - continue; - } - if (!"..".equals(pathSegment.substring(slashIndex))) { - path = path.substring(0, slashIndex + 1).concat(path.substring(pathSegmentIndex + 4)); - } - } - // d) If the path ends with "/..", where is a - // complete path segment not equal to "..", that - // "/.." is removed. - if (path.endsWith("/..")) { - final String pathSegment = path.substring(0, path.length() - 3); - final int slashIndex = pathSegment.lastIndexOf('/'); - - if (slashIndex >= 0) { - path = path.substring(0, slashIndex + 1); - } - } - - path = removeLeadingSlashPoints(path); - - url.path_ = path; - // Step 7: The resulting URL components, including any inherited from - // the base URL, are recombined to give the absolute form of - // the embedded URL. - return url; - } - - /** - * "/.." at the beginning should be removed as browsers do (not in RFC) - */ - private static String removeLeadingSlashPoints(String path) { - while (path.startsWith("/..")) { - path = path.substring(3); - } - - return path; - } - - /** - * Class Url represents a Uniform Resource Locator. - * - * @author Martin Tamme - */ - private static class Url { - - String scheme_; - String location_; - String path_; - String parameters_; - String query_; - String fragment_; - - /** - * Creates a Url object. - */ - public Url() { - } - - /** - * Creates a Url object from the specified - * Url object. - * - * @param url a Url object. - */ - public Url(final Url url) { - scheme_ = url.scheme_; - location_ = url.location_; - path_ = url.path_; - parameters_ = url.parameters_; - query_ = url.query_; - fragment_ = url.fragment_; - } - - /** - * Returns a string representation of the Url object. - * - * @return a string representation of the Url object. - */ - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - - if (scheme_ != null) { - sb.append(scheme_); - sb.append(':'); - } - if (location_ != null) { - sb.append("//"); - sb.append(location_); - } - if (path_ != null) { - sb.append(path_); - } - if (parameters_ != null) { - sb.append(';'); - sb.append(parameters_); - } - if (query_ != null) { - sb.append('?'); - sb.append(query_); - } - if (fragment_ != null) { - sb.append('#'); - sb.append(fragment_); - } - return sb.toString(); - } - } -} diff --git a/src/test/java/com/commafeed/backend/feed/EstimateDirectionTest.java b/src/test/java/com/commafeed/backend/feed/EstimateDirectionTest.java deleted file mode 100644 index 3f8bdda0..00000000 --- a/src/test/java/com/commafeed/backend/feed/EstimateDirectionTest.java +++ /dev/null @@ -1,65 +0,0 @@ -package com.commafeed.backend.feed; - -import org.junit.Assert; -import org.junit.Test; - -import static com.commafeed.backend.feed.EstimateDirection.isRTL; - -/** - * These tests are copied and simplified from GWT - * https://github.com/google-web-toolkit/gwt/blob/master/user/test/com/google/gwt/i18n/shared/BidiUtilsTest.java Released under Apache 2.0 - * license, credit of it goes to Google and please use GWT wherever possible instead of this - */ -public class EstimateDirectionTest { - - @Test - public void testEstimateDirection() { - Assert.assertEquals(false, isRTL("")); - Assert.assertEquals(false, isRTL(" ")); - Assert.assertEquals(false, isRTL("! (...)")); - Assert.assertEquals(false, isRTL("Pure Ascii content")); - Assert.assertEquals(false, isRTL("-17.0%")); - Assert.assertEquals(false, isRTL("http://foo/bar/")); - Assert.assertEquals(false, isRTL("http://foo/bar/?s=\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0" - + "\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0" - + "\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0")); - Assert.assertEquals(true, isRTL("\u05d0")); - Assert.assertEquals(true, isRTL("\u05d0")); - Assert.assertEquals(true, isRTL("9 \u05d0 -> 17.5, 23, 45, 19")); - Assert.assertEquals(true, isRTL("http://foo/bar/ \u05d0 http://foo2/bar2/ http://foo3/bar3/")); - Assert.assertEquals(true, isRTL("\u05d0\u05d9\u05df \u05de\u05de\u05e9 " - + "\u05de\u05d4 \u05dc\u05e8\u05d0\u05d5\u05ea: " - + "\u05dc\u05d0 \u05e6\u05d9\u05dc\u05de\u05ea\u05d9 " - + "\u05d4\u05e8\u05d1\u05d4 \u05d5\u05d2\u05dd \u05d0" - + "\u05dd \u05d4\u05d9\u05d9\u05ea\u05d9 \u05de\u05e6\u05dc" - + "\u05dd, \u05d4\u05d9\u05d4 \u05e9\u05dd")); - Assert.assertEquals(true, isRTL("\u05db\u05d0\u05df - http://geek.co.il/gallery/v/2007-06" - + " - \u05d0\u05d9\u05df \u05de\u05de\u05e9 \u05de\u05d4 " - + "\u05dc\u05e8\u05d0\u05d5\u05ea: \u05dc\u05d0 \u05e6" - + "\u05d9\u05dc\u05de\u05ea\u05d9 \u05d4\u05e8\u05d1\u05d4 " - + "\u05d5\u05d2\u05dd \u05d0\u05dd \u05d4\u05d9\u05d9\u05ea" - + "\u05d9 \u05de\u05e6\u05dc\u05dd, \u05d4\u05d9\u05d4 " - + "\u05e9\u05dd \u05d1\u05e2\u05d9\u05e7\u05e8 \u05d4\u05e8" - + "\u05d1\u05d4 \u05d0\u05e0\u05e9\u05d9\u05dd. \u05de" - + "\u05d4 \u05e9\u05db\u05df - \u05d0\u05e4\u05e9\u05e8 " - + "\u05dc\u05e0\u05e6\u05dc \u05d0\u05ea \u05d4\u05d4 " - + "\u05d3\u05d6\u05de\u05e0\u05d5\u05ea \u05dc\u05d4\u05e1" - + "\u05ea\u05db\u05dc \u05e2\u05dc \u05db\u05de\u05d4 " - + "\u05ea\u05de\u05d5\u05e0\u05d5\u05ea \u05de\u05e9\u05e2" - + "\u05e9\u05e2\u05d5\u05ea \u05d9\u05e9\u05e0\u05d5\u05ea " - + "\u05d9\u05d5\u05ea\u05e8 \u05e9\u05d9\u05e9 \u05dc" - + "\u05d9 \u05d1\u05d0\u05ea\u05e8")); - Assert.assertEquals(true, isRTL("CAPTCHA \u05de\u05e9\u05d5\u05db\u05dc\u05dc " - + "\u05de\u05d3\u05d9?")); - Assert.assertEquals(true, isRTL("Yes Prime Minister \u05e2\u05d3\u05db\u05d5\u05df. " - + "\u05e9\u05d0\u05dc\u05d5 \u05d0\u05d5\u05ea\u05d9 " - + "\u05de\u05d4 \u05d0\u05e0\u05d9 \u05e8\u05d5\u05e6" - + "\u05d4 \u05de\u05ea\u05e0\u05d4 \u05dc\u05d7\u05d2")); - Assert.assertEquals(true, isRTL("17.4.02 \u05e9\u05e2\u05d4:13-20 .15-00 .\u05dc\u05d0 " - + "\u05d4\u05d9\u05d9\u05ea\u05d9 \u05db\u05d0\u05df.")); - Assert.assertEquals(true, isRTL("5710 5720 5730. \u05d4\u05d3\u05dc\u05ea. " - + "\u05d4\u05e0\u05e9\u05d9\u05e7\u05d4")); - Assert.assertEquals(true, isRTL("\u05d4\u05d3\u05dc\u05ea http://www.google.com " - + "http://www.gmail.com")); - } -} \ No newline at end of file