initial feed url canonicalizer setup (requires java 1.7)

This commit is contained in:
Athou
2013-07-02 12:51:20 +02:00
parent 6e2d331286
commit d8a9022c97
4 changed files with 40 additions and 3 deletions

View File

@@ -36,7 +36,7 @@ Deployment on your own server
-----------------------------
For storage, you can either use an embedded HSQLDB database or an external MySQL or PostgreSQL database.
You also need Maven 3.x (and a Java JDK) installed in order to build the application.
You also need Maven 3.x (and a Java 1.7+ JDK) installed in order to build the application.
To install maven and openjdk on Ubuntu, issue the following commands

View File

@@ -60,8 +60,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.6</source>
<target>1.6</target>
<source>1.7</source>
<target>1.7</target>
<compilerArgument>-proc:none</compilerArgument>
</configuration>
</plugin>
@@ -267,6 +267,11 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>edu.uci.ics</groupId>
<artifactId>crawler4j</artifactId>
<version>3.5</version>
</dependency>
<dependency>
<groupId>org.jdom</groupId>
<artifactId>jdom</artifactId>

View File

@@ -36,6 +36,8 @@ import com.google.gwt.i18n.client.HasDirection.Direction;
import com.google.gwt.i18n.shared.BidiUtils;
import com.steadystate.css.parser.CSSOMParser;
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
public class FeedUtils {
protected static Logger log = LoggerFactory.getLogger(FeedUtils.class);
@@ -85,6 +87,13 @@ public class FeedUtils {
return encoding;
}
public static String normalizeURL(String url) {
if (url == null) {
return null;
}
return URLCanonicalizer.getCanonicalURL(url);
}
/**
* Extract the declared encoding from the xml
*/

View File

@@ -0,0 +1,23 @@
package com.commafeed.backend.feeds;
import org.junit.Assert;
import org.junit.Test;
public class FeedUtilsTest {
@Test
public void testNormalization() {
String urla1 = "http://example.com/hello?a=1&b=2";
String urla2 = "http://EXAmPLe.com/hello?a=1&b=2";
String urla3 = "http://example.com/hello?b=2&a=1";
String urlb1 = "http://ftr.fivefilters.org/makefulltextfeed.php?url=http%3A%2F%2Ffeeds.howtogeek.com%2FHowToGeek&max=10&summary=1";
String urlb2 = "http://ftr.fivefilters.org/makefulltextfeed.php?url=http://feeds.howtogeek.com/HowToGeek&max=10&summary=1";
Assert.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla2));
Assert.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla3));
Assert.assertEquals(FeedUtils.normalizeURL(urlb1), FeedUtils.normalizeURL(urlb2));
}
}