diff --git a/pom.xml b/pom.xml
index e02a915d..adbef93b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -466,6 +466,16 @@
cssparser
0.9.29
+
+ edu.uci.ics
+ crawler4j
+ 3.5
+
+
+ com.google.gwt
+ gwt-servlet
+ 2.9.0
+
com.google.apis
diff --git a/src/main/java/com/commafeed/backend/feed/EstimateDirection.java b/src/main/java/com/commafeed/backend/feed/EstimateDirection.java
deleted file mode 100644
index 701f6068..00000000
--- a/src/main/java/com/commafeed/backend/feed/EstimateDirection.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package com.commafeed.backend.feed;
-
-import java.util.regex.Pattern;
-
-/**
- * This code is copied and simplified from GWT
- * https://github.com/google-web-toolkit/gwt/blob/master/user/src/com/google/gwt/i18n/shared/BidiUtils.java Released under Apache 2.0
- * license, credit of it goes to Google and please use GWT wherever possible instead of this
- */
-class EstimateDirection {
- private static final float RTL_DETECTION_THRESHOLD = 0.40f;
-
- private static final String LTR_CHARS = "A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u0300-\u0590\u0800-\u1FFF"
- + "\u2C00-\uFB1C\uFDFE-\uFE6F\uFEFD-\uFFFF";
- private static final String RTL_CHARS = "\u0591-\u07FF\uFB1D-\uFDFD\uFE70-\uFEFC";
-
- private static final Pattern WORD_SEPARATOR_RE = Pattern.compile("\\s+");
- private static final Pattern FIRST_STRONG_IS_RTL_RE = Pattern.compile("^[^" + LTR_CHARS + "]*[" + RTL_CHARS + ']');
- private static final Pattern IS_REQUIRED_LTR_RE = Pattern.compile("^http://.*");
- private static final Pattern HAS_ANY_LTR_RE = Pattern.compile("[" + LTR_CHARS + ']');
-
- private static boolean startsWithRtl(String str) {
- return FIRST_STRONG_IS_RTL_RE.matcher(str).matches();
- }
-
- private static boolean hasAnyLtr(String str) {
- return HAS_ANY_LTR_RE.matcher(str).matches();
- }
-
- static boolean isRTL(String str) {
- int rtlCount = 0;
- int total = 0;
- String[] tokens = WORD_SEPARATOR_RE.split(str, 20); // limit splits to 20, usually enough
- for (int i = 0; i < tokens.length; i++) {
- String token = tokens[i];
- if (startsWithRtl(token)) {
- rtlCount++;
- total++;
- } else if (IS_REQUIRED_LTR_RE.matcher(token).matches()) {
- // do nothing
- } else if (hasAnyLtr(token)) {
- total++;
- }
- }
-
- return total == 0 ? false : ((float) rtlCount / total > RTL_DETECTION_THRESHOLD ? true : false);
- }
-}
diff --git a/src/main/java/com/commafeed/backend/feed/FeedUtils.java b/src/main/java/com/commafeed/backend/feed/FeedUtils.java
index 5916b52c..f73a0826 100644
--- a/src/main/java/com/commafeed/backend/feed/FeedUtils.java
+++ b/src/main/java/com/commafeed/backend/feed/FeedUtils.java
@@ -37,6 +37,8 @@ import com.commafeed.backend.feed.FeedEntryKeyword.Mode;
import com.commafeed.backend.model.FeedEntry;
import com.commafeed.backend.model.FeedSubscription;
import com.commafeed.frontend.model.Entry;
+import com.google.gwt.i18n.client.HasDirection.Direction;
+import com.google.gwt.i18n.shared.BidiUtils;
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import com.steadystate.css.parser.CSSOMParser;
@@ -326,7 +328,8 @@ public class FeedUtils {
return false;
}
- return EstimateDirection.isRTL(text);
+ Direction direction = BidiUtils.get().estimateDirection(text);
+ return direction == Direction.RTL;
}
public static String trimInvalidXmlCharacters(String xml) {
diff --git a/src/main/java/edu/uci/ics/crawler4j/url/URLCanonicalizer.java b/src/main/java/edu/uci/ics/crawler4j/url/URLCanonicalizer.java
deleted file mode 100644
index aaa57daf..00000000
--- a/src/main/java/edu/uci/ics/crawler4j/url/URLCanonicalizer.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.crawler4j.url;
-
-import java.net.MalformedURLException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.net.URL;
-import java.net.URLDecoder;
-import java.net.URLEncoder;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
-import org.apache.commons.lang3.StringUtils;
-
-/**
- * See http://en.wikipedia.org/wiki/URL_normalization for a reference Note: some parts of the code are adapted from:
- * http://stackoverflow.com/a/4057470/405418
- *
- * @author Yasser Ganjisaffar
- */
-public class URLCanonicalizer {
-
- public static String getCanonicalURL(String url) {
- return getCanonicalURL(url, null);
- }
-
- public static String getCanonicalURL(String href, String context) {
-
- try {
- URL canonicalURL = new URL(UrlResolver.resolveUrl(context == null ? "" : context, href));
-
- String host = canonicalURL.getHost().toLowerCase();
- if (StringUtils.isBlank(host)) {
- // This is an invalid Url.
- return null;
- }
-
- String path = canonicalURL.getPath();
-
- /*
- * Normalize: no empty segments (i.e., "//"), no segments equal to
- * ".", and no segments equal to ".." that are preceded by a segment
- * not equal to "..".
- */
- path = new URI(path).normalize().toString();
-
- /*
- * Convert '//' -> '/'
- */
- int idx = path.indexOf("//");
- while (idx >= 0) {
- path = path.replace("//", "/");
- idx = path.indexOf("//");
- }
-
- /*
- * Drop starting '/../'
- */
- while (path.startsWith("/../")) {
- path = path.substring(3);
- }
-
- /*
- * Trim
- */
- path = path.trim();
-
- final SortedMap params = createParameterMap(canonicalURL.getQuery());
- final String queryString;
-
- if (params != null && params.size() > 0) {
- String canonicalParams = canonicalize(params);
- queryString = (canonicalParams.isEmpty() ? "" : "?" + canonicalParams);
- } else {
- queryString = "";
- }
-
- /*
- * Add starting slash if needed
- */
- if (path.length() == 0) {
- path = "/" + path;
- }
-
- /*
- * Drop default port: example.com:80 -> example.com
- */
- int port = canonicalURL.getPort();
- if (port == canonicalURL.getDefaultPort()) {
- port = -1;
- }
-
- String protocol = canonicalURL.getProtocol().toLowerCase();
- String pathAndQueryString = normalizePath(path) + queryString;
-
- URL result = new URL(protocol, host, port, pathAndQueryString);
- return result.toExternalForm();
-
- } catch (MalformedURLException ex) {
- return null;
- } catch (URISyntaxException ex) {
- return null;
- }
- }
-
- /**
- * Takes a query string, separates the constituent name-value pairs, and stores them in a SortedMap ordered by lexicographical order.
- *
- * @return Null if there is no query string.
- */
- private static SortedMap createParameterMap(final String queryString) {
- if (queryString == null || queryString.isEmpty()) {
- return null;
- }
-
- final String[] pairs = queryString.split("&");
- final Map params = new HashMap(pairs.length);
-
- for (final String pair : pairs) {
- if (pair.length() == 0) {
- continue;
- }
-
- String[] tokens = pair.split("=", 2);
- switch (tokens.length) {
- case 1:
- if (pair.charAt(0) == '=') {
- params.put("", tokens[0]);
- } else {
- params.put(tokens[0], "");
- }
- break;
- case 2:
- params.put(tokens[0], tokens[1]);
- break;
- }
- }
- return new TreeMap(params);
- }
-
- /**
- * Canonicalize the query string.
- *
- * @param sortedParamMap
- * Parameter name-value pairs in lexicographical order.
- * @return Canonical form of query string.
- */
- private static String canonicalize(final SortedMap sortedParamMap) {
- if (sortedParamMap == null || sortedParamMap.isEmpty()) {
- return "";
- }
-
- final StringBuilder sb = new StringBuilder(100);
- for (Map.Entry pair : sortedParamMap.entrySet()) {
- final String key = pair.getKey().toLowerCase();
- if (key.equals("jsessionid") || key.equals("phpsessid") || key.equals("aspsessionid")) {
- continue;
- }
- if (sb.length() > 0) {
- sb.append('&');
- }
- sb.append(percentEncodeRfc3986(pair.getKey()));
- if (!pair.getValue().isEmpty()) {
- sb.append('=');
- sb.append(percentEncodeRfc3986(pair.getValue()));
- }
- }
- return sb.toString();
- }
-
- /**
- * Percent-encode values according the RFC 3986. The built-in Java URLEncoder does not encode according to the RFC, so we make the extra
- * replacements.
- *
- * @param string
- * Decoded string.
- * @return Encoded string per RFC 3986.
- */
- private static String percentEncodeRfc3986(String string) {
- try {
- string = string.replace("+", "%2B");
- string = URLDecoder.decode(string, "UTF-8");
- string = URLEncoder.encode(string, "UTF-8");
- return string.replace("+", "%20").replace("*", "%2A").replace("%7E", "~");
- } catch (Exception e) {
- return string;
- }
- }
-
- private static String normalizePath(final String path) {
- return path.replace("%7E", "~").replace(" ", "%20");
- }
-}
diff --git a/src/main/java/edu/uci/ics/crawler4j/url/UrlResolver.java b/src/main/java/edu/uci/ics/crawler4j/url/UrlResolver.java
deleted file mode 100644
index 5a6f65a7..00000000
--- a/src/main/java/edu/uci/ics/crawler4j/url/UrlResolver.java
+++ /dev/null
@@ -1,462 +0,0 @@
-/**
- * This class is adopted from Htmlunit with the following copyright:
- *
- * Copyright (c) 2002-2012 Gargoyle Software Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.crawler4j.url;
-
-
-public final class UrlResolver {
-
- /**
- * Resolves a given relative URL against a base URL. See
- * RFC1808
- * Section 4 for more details.
- *
- * @param baseUrl The base URL in which to resolve the specification.
- * @param relativeUrl The relative URL to resolve against the base URL.
- * @return the resolved specification.
- */
- public static String resolveUrl(final String baseUrl, final String relativeUrl) {
- if (baseUrl == null) {
- throw new IllegalArgumentException("Base URL must not be null");
- }
- if (relativeUrl == null) {
- throw new IllegalArgumentException("Relative URL must not be null");
- }
- final Url url = resolveUrl(parseUrl(baseUrl.trim()), relativeUrl.trim());
-
- return url.toString();
- }
-
- /**
- * Returns the index within the specified string of the first occurrence of
- * the specified search character.
- *
- * @param s the string to search
- * @param searchChar the character to search for
- * @param beginIndex the index at which to start the search
- * @param endIndex the index at which to stop the search
- * @return the index of the first occurrence of the character in the string or -1
- */
- private static int indexOf(final String s, final char searchChar, final int beginIndex, final int endIndex) {
- for (int i = beginIndex; i < endIndex; i++) {
- if (s.charAt(i) == searchChar) {
- return i;
- }
- }
- return -1;
- }
-
- /**
- * Parses a given specification using the algorithm depicted in
- * RFC1808:
- *
- * Section 2.4: Parsing a URL
- *
- * An accepted method for parsing URLs is useful to clarify the
- * generic-RL syntax of Section 2.2 and to describe the algorithm for
- * resolving relative URLs presented in Section 4. This section
- * describes the parsing rules for breaking down a URL (relative or
- * absolute) into the component parts described in Section 2.1. The
- * rules assume that the URL has already been separated from any
- * surrounding text and copied to a "parse string". The rules are
- * listed in the order in which they would be applied by the parser.
- *
- * @param spec The specification to parse.
- * @return the parsed specification.
- */
- private static Url parseUrl(final String spec) {
- final Url url = new Url();
- int startIndex = 0;
- int endIndex = spec.length();
-
- // Section 2.4.1: Parsing the Fragment Identifier
- //
- // If the parse string contains a crosshatch "#" character, then the
- // substring after the first (left-most) crosshatch "#" and up to the
- // end of the parse string is the identifier. If the
- // crosshatch is the last character, or no crosshatch is present, then
- // the fragment identifier is empty. The matched substring, including
- // the crosshatch character, is removed from the parse string before
- // continuing.
- //
- // Note that the fragment identifier is not considered part of the URL.
- // However, since it is often attached to the URL, parsers must be able
- // to recognize and set aside fragment identifiers as part of the
- // process.
- final int crosshatchIndex = indexOf(spec, '#', startIndex, endIndex);
-
- if (crosshatchIndex >= 0) {
- url.fragment_ = spec.substring(crosshatchIndex + 1, endIndex);
- endIndex = crosshatchIndex;
- }
- // Section 2.4.2: Parsing the Scheme
- //
- // If the parse string contains a colon ":" after the first character
- // and before any characters not allowed as part of a scheme name (i.e.,
- // any not an alphanumeric, plus "+", period ".", or hyphen "-"), the
- // of the URL is the substring of characters up to but not
- // including the first colon. These characters and the colon are then
- // removed from the parse string before continuing.
- final int colonIndex = indexOf(spec, ':', startIndex, endIndex);
-
- if (colonIndex > 0) {
- final String scheme = spec.substring(startIndex, colonIndex);
- if (isValidScheme(scheme)) {
- url.scheme_ = scheme;
- startIndex = colonIndex + 1;
- }
- }
- // Section 2.4.3: Parsing the Network Location/Login
- //
- // If the parse string begins with a double-slash "//", then the
- // substring of characters after the double-slash and up to, but not
- // including, the next slash "/" character is the network location/login
- // () of the URL. If no trailing slash "/" is present, the
- // entire remaining parse string is assigned to . The double-
- // slash and are removed from the parse string before
- // continuing.
- //
- // Note: We also accept a question mark "?" or a semicolon ";" character as
- // delimiters for the network location/login () of the URL.
- final int locationStartIndex;
- int locationEndIndex;
-
- if (spec.startsWith("//", startIndex)) {
- locationStartIndex = startIndex + 2;
- locationEndIndex = indexOf(spec, '/', locationStartIndex, endIndex);
- if (locationEndIndex >= 0) {
- startIndex = locationEndIndex;
- }
- }
- else {
- locationStartIndex = -1;
- locationEndIndex = -1;
- }
- // Section 2.4.4: Parsing the Query Information
- //
- // If the parse string contains a question mark "?" character, then the
- // substring after the first (left-most) question mark "?" and up to the
- // end of the parse string is the information. If the question
- // mark is the last character, or no question mark is present, then the
- // query information is empty. The matched substring, including the
- // question mark character, is removed from the parse string before
- // continuing.
- final int questionMarkIndex = indexOf(spec, '?', startIndex, endIndex);
-
- if (questionMarkIndex >= 0) {
- if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
- // The substring of characters after the double-slash and up to, but not
- // including, the question mark "?" character is the network location/login
- // () of the URL.
- locationEndIndex = questionMarkIndex;
- startIndex = questionMarkIndex;
- }
- url.query_ = spec.substring(questionMarkIndex + 1, endIndex);
- endIndex = questionMarkIndex;
- }
- // Section 2.4.5: Parsing the Parameters
- //
- // If the parse string contains a semicolon ";" character, then the
- // substring after the first (left-most) semicolon ";" and up to the end
- // of the parse string is the parameters (). If the semicolon
- // is the last character, or no semicolon is present, then is
- // empty. The matched substring, including the semicolon character, is
- // removed from the parse string before continuing.
- final int semicolonIndex = indexOf(spec, ';', startIndex, endIndex);
-
- if (semicolonIndex >= 0) {
- if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
- // The substring of characters after the double-slash and up to, but not
- // including, the semicolon ";" character is the network location/login
- // () of the URL.
- locationEndIndex = semicolonIndex;
- startIndex = semicolonIndex;
- }
- url.parameters_ = spec.substring(semicolonIndex + 1, endIndex);
- endIndex = semicolonIndex;
- }
- // Section 2.4.6: Parsing the Path
- //
- // After the above steps, all that is left of the parse string is the
- // URL and the slash "/" that may precede it. Even though the
- // initial slash is not part of the URL path, the parser must remember
- // whether or not it was present so that later processes can
- // differentiate between relative and absolute paths. Often this is
- // done by simply storing the preceding slash along with the path.
- if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
- // The entire remaining parse string is assigned to the network
- // location/login () of the URL.
- locationEndIndex = endIndex;
- }
- else if (startIndex < endIndex) {
- url.path_ = spec.substring(startIndex, endIndex);
- }
- // Set the network location/login () of the URL.
- if ((locationStartIndex >= 0) && (locationEndIndex >= 0)) {
- url.location_ = spec.substring(locationStartIndex, locationEndIndex);
- }
- return url;
- }
-
- /*
- * Returns true if specified string is a valid scheme name.
- */
- private static boolean isValidScheme(final String scheme) {
- final int length = scheme.length();
- if (length < 1) {
- return false;
- }
- char c = scheme.charAt(0);
- if (!Character.isLetter(c)) {
- return false;
- }
- for (int i = 1; i < length; i++) {
- c = scheme.charAt(i);
- if (!Character.isLetterOrDigit(c) && c != '.' && c != '+' && c != '-') {
- return false;
- }
- }
- return true;
- }
-
- /**
- * Resolves a given relative URL against a base URL using the algorithm
- * depicted in RFC1808:
- *
- * Section 4: Resolving Relative URLs
- *
- * This section describes an example algorithm for resolving URLs within
- * a context in which the URLs may be relative, such that the result is
- * always a URL in absolute form. Although this algorithm cannot
- * guarantee that the resulting URL will equal that intended by the
- * original author, it does guarantee that any valid URL (relative or
- * absolute) can be consistently transformed to an absolute form given a
- * valid base URL.
- *
- * @param baseUrl The base URL in which to resolve the specification.
- * @param relativeUrl The relative URL to resolve against the base URL.
- * @return the resolved specification.
- */
- private static Url resolveUrl(final Url baseUrl, final String relativeUrl) {
- final Url url = parseUrl(relativeUrl);
- // Step 1: The base URL is established according to the rules of
- // Section 3. If the base URL is the empty string (unknown),
- // the embedded URL is interpreted as an absolute URL and
- // we are done.
- if (baseUrl == null) {
- return url;
- }
- // Step 2: Both the base and embedded URLs are parsed into their
- // component parts as described in Section 2.4.
- // a) If the embedded URL is entirely empty, it inherits the
- // entire base URL (i.e., is set equal to the base URL)
- // and we are done.
- if (relativeUrl.length() == 0) {
- return new Url(baseUrl);
- }
- // b) If the embedded URL starts with a scheme name, it is
- // interpreted as an absolute URL and we are done.
- if (url.scheme_ != null) {
- return url;
- }
- // c) Otherwise, the embedded URL inherits the scheme of
- // the base URL.
- url.scheme_ = baseUrl.scheme_;
- // Step 3: If the embedded URL's is non-empty, we skip to
- // Step 7. Otherwise, the embedded URL inherits the
- // (if any) of the base URL.
- if (url.location_ != null) {
- return url;
- }
- url.location_ = baseUrl.location_;
- // Step 4: If the embedded URL path is preceded by a slash "/", the
- // path is not relative and we skip to Step 7.
- if ((url.path_ != null) && ((url.path_.length() > 0) && ('/' == url.path_.charAt(0)))) {
- url.path_ = removeLeadingSlashPoints(url.path_);
- return url;
- }
- // Step 5: If the embedded URL path is empty (and not preceded by a
- // slash), then the embedded URL inherits the base URL path,
- // and
- if (url.path_ == null) {
- url.path_ = baseUrl.path_;
- // a) if the embedded URL's is non-empty, we skip to
- // step 7; otherwise, it inherits the of the base
- // URL (if any) and
- if (url.parameters_ != null) {
- return url;
- }
- url.parameters_ = baseUrl.parameters_;
- // b) if the embedded URL's is non-empty, we skip to
- // step 7; otherwise, it inherits the of the base
- // URL (if any) and we skip to step 7.
- if (url.query_ != null) {
- return url;
- }
- url.query_ = baseUrl.query_;
- return url;
- }
- // Step 6: The last segment of the base URL's path (anything
- // following the rightmost slash "/", or the entire path if no
- // slash is present) is removed and the embedded URL's path is
- // appended in its place. The following operations are
- // then applied, in order, to the new path:
- final String basePath = baseUrl.path_;
- String path = "";
-
- if (basePath != null) {
- final int lastSlashIndex = basePath.lastIndexOf('/');
-
- if (lastSlashIndex >= 0) {
- path = basePath.substring(0, lastSlashIndex + 1);
- }
- }
- else {
- path = "/";
- }
- path = path.concat(url.path_);
- // a) All occurrences of "./", where "." is a complete path
- // segment, are removed.
- int pathSegmentIndex;
-
- while ((pathSegmentIndex = path.indexOf("/./")) >= 0) {
- path = path.substring(0, pathSegmentIndex + 1).concat(path.substring(pathSegmentIndex + 3));
- }
- // b) If the path ends with "." as a complete path segment,
- // that "." is removed.
- if (path.endsWith("/.")) {
- path = path.substring(0, path.length() - 1);
- }
- // c) All occurrences of "/../", where is a
- // complete path segment not equal to "..", are removed.
- // Removal of these path segments is performed iteratively,
- // removing the leftmost matching pattern on each iteration,
- // until no matching pattern remains.
- while ((pathSegmentIndex = path.indexOf("/../")) > 0) {
- final String pathSegment = path.substring(0, pathSegmentIndex);
- final int slashIndex = pathSegment.lastIndexOf('/');
-
- if (slashIndex < 0) {
- continue;
- }
- if (!"..".equals(pathSegment.substring(slashIndex))) {
- path = path.substring(0, slashIndex + 1).concat(path.substring(pathSegmentIndex + 4));
- }
- }
- // d) If the path ends with "/..", where is a
- // complete path segment not equal to "..", that
- // "/.." is removed.
- if (path.endsWith("/..")) {
- final String pathSegment = path.substring(0, path.length() - 3);
- final int slashIndex = pathSegment.lastIndexOf('/');
-
- if (slashIndex >= 0) {
- path = path.substring(0, slashIndex + 1);
- }
- }
-
- path = removeLeadingSlashPoints(path);
-
- url.path_ = path;
- // Step 7: The resulting URL components, including any inherited from
- // the base URL, are recombined to give the absolute form of
- // the embedded URL.
- return url;
- }
-
- /**
- * "/.." at the beginning should be removed as browsers do (not in RFC)
- */
- private static String removeLeadingSlashPoints(String path) {
- while (path.startsWith("/..")) {
- path = path.substring(3);
- }
-
- return path;
- }
-
- /**
- * Class Url represents a Uniform Resource Locator.
- *
- * @author Martin Tamme
- */
- private static class Url {
-
- String scheme_;
- String location_;
- String path_;
- String parameters_;
- String query_;
- String fragment_;
-
- /**
- * Creates a Url object.
- */
- public Url() {
- }
-
- /**
- * Creates a Url object from the specified
- * Url object.
- *
- * @param url a Url object.
- */
- public Url(final Url url) {
- scheme_ = url.scheme_;
- location_ = url.location_;
- path_ = url.path_;
- parameters_ = url.parameters_;
- query_ = url.query_;
- fragment_ = url.fragment_;
- }
-
- /**
- * Returns a string representation of the Url object.
- *
- * @return a string representation of the Url object.
- */
- @Override
- public String toString() {
- final StringBuilder sb = new StringBuilder();
-
- if (scheme_ != null) {
- sb.append(scheme_);
- sb.append(':');
- }
- if (location_ != null) {
- sb.append("//");
- sb.append(location_);
- }
- if (path_ != null) {
- sb.append(path_);
- }
- if (parameters_ != null) {
- sb.append(';');
- sb.append(parameters_);
- }
- if (query_ != null) {
- sb.append('?');
- sb.append(query_);
- }
- if (fragment_ != null) {
- sb.append('#');
- sb.append(fragment_);
- }
- return sb.toString();
- }
- }
-}
diff --git a/src/test/java/com/commafeed/backend/feed/EstimateDirectionTest.java b/src/test/java/com/commafeed/backend/feed/EstimateDirectionTest.java
deleted file mode 100644
index 3f8bdda0..00000000
--- a/src/test/java/com/commafeed/backend/feed/EstimateDirectionTest.java
+++ /dev/null
@@ -1,65 +0,0 @@
-package com.commafeed.backend.feed;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-import static com.commafeed.backend.feed.EstimateDirection.isRTL;
-
-/**
- * These tests are copied and simplified from GWT
- * https://github.com/google-web-toolkit/gwt/blob/master/user/test/com/google/gwt/i18n/shared/BidiUtilsTest.java Released under Apache 2.0
- * license, credit of it goes to Google and please use GWT wherever possible instead of this
- */
-public class EstimateDirectionTest {
-
- @Test
- public void testEstimateDirection() {
- Assert.assertEquals(false, isRTL(""));
- Assert.assertEquals(false, isRTL(" "));
- Assert.assertEquals(false, isRTL("! (...)"));
- Assert.assertEquals(false, isRTL("Pure Ascii content"));
- Assert.assertEquals(false, isRTL("-17.0%"));
- Assert.assertEquals(false, isRTL("http://foo/bar/"));
- Assert.assertEquals(false, isRTL("http://foo/bar/?s=\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0"
- + "\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0"
- + "\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0"));
- Assert.assertEquals(true, isRTL("\u05d0"));
- Assert.assertEquals(true, isRTL("\u05d0"));
- Assert.assertEquals(true, isRTL("9 \u05d0 -> 17.5, 23, 45, 19"));
- Assert.assertEquals(true, isRTL("http://foo/bar/ \u05d0 http://foo2/bar2/ http://foo3/bar3/"));
- Assert.assertEquals(true, isRTL("\u05d0\u05d9\u05df \u05de\u05de\u05e9 "
- + "\u05de\u05d4 \u05dc\u05e8\u05d0\u05d5\u05ea: "
- + "\u05dc\u05d0 \u05e6\u05d9\u05dc\u05de\u05ea\u05d9 "
- + "\u05d4\u05e8\u05d1\u05d4 \u05d5\u05d2\u05dd \u05d0"
- + "\u05dd \u05d4\u05d9\u05d9\u05ea\u05d9 \u05de\u05e6\u05dc"
- + "\u05dd, \u05d4\u05d9\u05d4 \u05e9\u05dd"));
- Assert.assertEquals(true, isRTL("\u05db\u05d0\u05df - http://geek.co.il/gallery/v/2007-06"
- + " - \u05d0\u05d9\u05df \u05de\u05de\u05e9 \u05de\u05d4 "
- + "\u05dc\u05e8\u05d0\u05d5\u05ea: \u05dc\u05d0 \u05e6"
- + "\u05d9\u05dc\u05de\u05ea\u05d9 \u05d4\u05e8\u05d1\u05d4 "
- + "\u05d5\u05d2\u05dd \u05d0\u05dd \u05d4\u05d9\u05d9\u05ea"
- + "\u05d9 \u05de\u05e6\u05dc\u05dd, \u05d4\u05d9\u05d4 "
- + "\u05e9\u05dd \u05d1\u05e2\u05d9\u05e7\u05e8 \u05d4\u05e8"
- + "\u05d1\u05d4 \u05d0\u05e0\u05e9\u05d9\u05dd. \u05de"
- + "\u05d4 \u05e9\u05db\u05df - \u05d0\u05e4\u05e9\u05e8 "
- + "\u05dc\u05e0\u05e6\u05dc \u05d0\u05ea \u05d4\u05d4 "
- + "\u05d3\u05d6\u05de\u05e0\u05d5\u05ea \u05dc\u05d4\u05e1"
- + "\u05ea\u05db\u05dc \u05e2\u05dc \u05db\u05de\u05d4 "
- + "\u05ea\u05de\u05d5\u05e0\u05d5\u05ea \u05de\u05e9\u05e2"
- + "\u05e9\u05e2\u05d5\u05ea \u05d9\u05e9\u05e0\u05d5\u05ea "
- + "\u05d9\u05d5\u05ea\u05e8 \u05e9\u05d9\u05e9 \u05dc"
- + "\u05d9 \u05d1\u05d0\u05ea\u05e8"));
- Assert.assertEquals(true, isRTL("CAPTCHA \u05de\u05e9\u05d5\u05db\u05dc\u05dc "
- + "\u05de\u05d3\u05d9?"));
- Assert.assertEquals(true, isRTL("Yes Prime Minister \u05e2\u05d3\u05db\u05d5\u05df. "
- + "\u05e9\u05d0\u05dc\u05d5 \u05d0\u05d5\u05ea\u05d9 "
- + "\u05de\u05d4 \u05d0\u05e0\u05d9 \u05e8\u05d5\u05e6"
- + "\u05d4 \u05de\u05ea\u05e0\u05d4 \u05dc\u05d7\u05d2"));
- Assert.assertEquals(true, isRTL("17.4.02 \u05e9\u05e2\u05d4:13-20 .15-00 .\u05dc\u05d0 "
- + "\u05d4\u05d9\u05d9\u05ea\u05d9 \u05db\u05d0\u05df."));
- Assert.assertEquals(true, isRTL("5710 5720 5730. \u05d4\u05d3\u05dc\u05ea. "
- + "\u05d4\u05e0\u05e9\u05d9\u05e7\u05d4"));
- Assert.assertEquals(true, isRTL("\u05d4\u05d3\u05dc\u05ea http://www.google.com "
- + "http://www.gmail.com"));
- }
-}
\ No newline at end of file