diff --git a/README.md b/README.md index 92010cc9..3efb9878 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ Deployment on your own server ----------------------------- For storage, you can either use an embedded HSQLDB database or an external MySQL or PostgreSQL database. -You also need Maven 3.x (and a Java JDK) installed in order to build the application. +You also need Maven 3.x (and a Java 1.7+ JDK) installed in order to build the application. To install maven and openjdk on Ubuntu, issue the following commands diff --git a/pom.xml b/pom.xml index 8e4d0b4b..002ea6d9 100644 --- a/pom.xml +++ b/pom.xml @@ -60,8 +60,8 @@ maven-compiler-plugin 3.1 - 1.6 - 1.6 + 1.7 + 1.7 -proc:none @@ -267,6 +267,11 @@ + + edu.uci.ics + crawler4j + 3.5 + org.jdom jdom diff --git a/src/main/java/com/commafeed/backend/feeds/FeedUtils.java b/src/main/java/com/commafeed/backend/feeds/FeedUtils.java index c124125b..2c379d94 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedUtils.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedUtils.java @@ -36,6 +36,8 @@ import com.google.gwt.i18n.client.HasDirection.Direction; import com.google.gwt.i18n.shared.BidiUtils; import com.steadystate.css.parser.CSSOMParser; +import edu.uci.ics.crawler4j.url.URLCanonicalizer; + public class FeedUtils { protected static Logger log = LoggerFactory.getLogger(FeedUtils.class); @@ -85,6 +87,13 @@ public class FeedUtils { return encoding; } + public static String normalizeURL(String url) { + if (url == null) { + return null; + } + return URLCanonicalizer.getCanonicalURL(url); + } + /** * Extract the declared encoding from the xml */ diff --git a/src/test/java/com/commafeed/backend/feeds/FeedUtilsTest.java b/src/test/java/com/commafeed/backend/feeds/FeedUtilsTest.java new file mode 100644 index 00000000..11854dff --- /dev/null +++ b/src/test/java/com/commafeed/backend/feeds/FeedUtilsTest.java @@ -0,0 +1,23 @@ +package com.commafeed.backend.feeds; + +import org.junit.Assert; +import org.junit.Test; + +public class FeedUtilsTest { + + @Test + public void testNormalization() { + String urla1 = "http://example.com/hello?a=1&b=2"; + String urla2 = "http://EXAmPLe.com/hello?a=1&b=2"; + String urla3 = "http://example.com/hello?b=2&a=1"; + + String urlb1 = "http://ftr.fivefilters.org/makefulltextfeed.php?url=http%3A%2F%2Ffeeds.howtogeek.com%2FHowToGeek&max=10&summary=1"; + String urlb2 = "http://ftr.fivefilters.org/makefulltextfeed.php?url=http://feeds.howtogeek.com/HowToGeek&max=10&summary=1"; + + Assert.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla2)); + Assert.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla3)); + + Assert.assertEquals(FeedUtils.normalizeURL(urlb1), FeedUtils.normalizeURL(urlb2)); + + } +}