initial feed url canonicalizer setup (requires java 1.7)

This commit is contained in:
Athou
2013-07-02 12:51:20 +02:00
parent 6e2d331286
commit d8a9022c97
4 changed files with 40 additions and 3 deletions

View File

@@ -36,6 +36,8 @@ import com.google.gwt.i18n.client.HasDirection.Direction;
import com.google.gwt.i18n.shared.BidiUtils;
import com.steadystate.css.parser.CSSOMParser;
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
public class FeedUtils {
protected static Logger log = LoggerFactory.getLogger(FeedUtils.class);
@@ -85,6 +87,13 @@ public class FeedUtils {
return encoding;
}
public static String normalizeURL(String url) {
if (url == null) {
return null;
}
return URLCanonicalizer.getCanonicalURL(url);
}
/**
* Extract the declared encoding from the xml
*/

View File

@@ -0,0 +1,23 @@
package com.commafeed.backend.feeds;
import org.junit.Assert;
import org.junit.Test;
public class FeedUtilsTest {
@Test
public void testNormalization() {
String urla1 = "http://example.com/hello?a=1&b=2";
String urla2 = "http://EXAmPLe.com/hello?a=1&b=2";
String urla3 = "http://example.com/hello?b=2&a=1";
String urlb1 = "http://ftr.fivefilters.org/makefulltextfeed.php?url=http%3A%2F%2Ffeeds.howtogeek.com%2FHowToGeek&max=10&summary=1";
String urlb2 = "http://ftr.fivefilters.org/makefulltextfeed.php?url=http://feeds.howtogeek.com/HowToGeek&max=10&summary=1";
Assert.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla2));
Assert.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla3));
Assert.assertEquals(FeedUtils.normalizeURL(urlb1), FeedUtils.normalizeURL(urlb2));
}
}