initial feed url canonicalizer setup (requires java 1.7)

This commit is contained in:
Athou
2013-07-02 12:51:20 +02:00
parent 6e2d331286
commit d8a9022c97
4 changed files with 40 additions and 3 deletions

View File

@@ -36,7 +36,7 @@ Deployment on your own server
----------------------------- -----------------------------
For storage, you can either use an embedded HSQLDB database or an external MySQL or PostgreSQL database. For storage, you can either use an embedded HSQLDB database or an external MySQL or PostgreSQL database.
You also need Maven 3.x (and a Java JDK) installed in order to build the application. You also need Maven 3.x (and a Java 1.7+ JDK) installed in order to build the application.
To install maven and openjdk on Ubuntu, issue the following commands To install maven and openjdk on Ubuntu, issue the following commands

View File

@@ -60,8 +60,8 @@
<artifactId>maven-compiler-plugin</artifactId> <artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version> <version>3.1</version>
<configuration> <configuration>
<source>1.6</source> <source>1.7</source>
<target>1.6</target> <target>1.7</target>
<compilerArgument>-proc:none</compilerArgument> <compilerArgument>-proc:none</compilerArgument>
</configuration> </configuration>
</plugin> </plugin>
@@ -267,6 +267,11 @@
</exclusion> </exclusion>
</exclusions> </exclusions>
</dependency> </dependency>
<dependency>
<groupId>edu.uci.ics</groupId>
<artifactId>crawler4j</artifactId>
<version>3.5</version>
</dependency>
<dependency> <dependency>
<groupId>org.jdom</groupId> <groupId>org.jdom</groupId>
<artifactId>jdom</artifactId> <artifactId>jdom</artifactId>

View File

@@ -36,6 +36,8 @@ import com.google.gwt.i18n.client.HasDirection.Direction;
import com.google.gwt.i18n.shared.BidiUtils; import com.google.gwt.i18n.shared.BidiUtils;
import com.steadystate.css.parser.CSSOMParser; import com.steadystate.css.parser.CSSOMParser;
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
public class FeedUtils { public class FeedUtils {
protected static Logger log = LoggerFactory.getLogger(FeedUtils.class); protected static Logger log = LoggerFactory.getLogger(FeedUtils.class);
@@ -85,6 +87,13 @@ public class FeedUtils {
return encoding; return encoding;
} }
public static String normalizeURL(String url) {
if (url == null) {
return null;
}
return URLCanonicalizer.getCanonicalURL(url);
}
/** /**
* Extract the declared encoding from the xml * Extract the declared encoding from the xml
*/ */

View File

@@ -0,0 +1,23 @@
package com.commafeed.backend.feeds;
import org.junit.Assert;
import org.junit.Test;
public class FeedUtilsTest {
@Test
public void testNormalization() {
String urla1 = "http://example.com/hello?a=1&b=2";
String urla2 = "http://EXAmPLe.com/hello?a=1&b=2";
String urla3 = "http://example.com/hello?b=2&a=1";
String urlb1 = "http://ftr.fivefilters.org/makefulltextfeed.php?url=http%3A%2F%2Ffeeds.howtogeek.com%2FHowToGeek&max=10&summary=1";
String urlb2 = "http://ftr.fivefilters.org/makefulltextfeed.php?url=http://feeds.howtogeek.com/HowToGeek&max=10&summary=1";
Assert.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla2));
Assert.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla3));
Assert.assertEquals(FeedUtils.normalizeURL(urlb1), FeedUtils.normalizeURL(urlb2));
}
}