diff --git a/commafeed-server/pom.xml b/commafeed-server/pom.xml
index 25720d2c..82ddda2e 100644
--- a/commafeed-server/pom.xml
+++ b/commafeed-server/pom.xml
@@ -426,15 +426,9 @@
0.9.30
- edu.uci.ics
- crawler4j
- 3.5
-
-
- log4j
- log4j
-
-
+ org.netpreserve
+ urlcanon
+ 0.4.0
com.google.gwt
diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java
index 5de8adf8..2f144d39 100644
--- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java
+++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java
@@ -28,6 +28,8 @@ import org.jsoup.nodes.Entities.EscapeMode;
import org.jsoup.safety.Cleaner;
import org.jsoup.safety.Safelist;
import org.jsoup.select.Elements;
+import org.netpreserve.urlcanon.Canonicalizer;
+import org.netpreserve.urlcanon.ParsedUrl;
import org.w3c.css.sac.InputSource;
import org.w3c.dom.css.CSSStyleDeclaration;
@@ -41,7 +43,6 @@ import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import com.steadystate.css.parser.CSSOMParser;
-import edu.uci.ics.crawler4j.url.URLCanonicalizer;
import lombok.extern.slf4j.Slf4j;
/**
@@ -179,7 +180,10 @@ public class FeedUtils {
if (url == null) {
return null;
}
- String normalized = URLCanonicalizer.getCanonicalURL(url);
+
+ ParsedUrl parsedUrl = ParsedUrl.parseUrl(url);
+ Canonicalizer.AGGRESSIVE.canonicalize(parsedUrl);
+ String normalized = parsedUrl.toString();
if (normalized == null) {
normalized = url;
}