feedburner special handling for normalization

This commit is contained in:
Athou
2013-07-03 11:42:36 +02:00
parent e2f1774e1d
commit aab83043bd
2 changed files with 27 additions and 1 deletions

View File

@@ -7,6 +7,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.ObjectUtils;
@@ -99,13 +100,32 @@ public class FeedUtils {
if (normalized == null) {
return url;
}
// convert to lower case, the url probably won't work in some cases
// after that but we don't care we just want to compare urls to avoid
// duplicates
normalized = normalized.toLowerCase();
// store all urls as http
if (normalized.startsWith("https")) {
normalized = "http" + normalized.substring(5);
}
// remove the www. part
normalized = normalized.replace("//www.", "//");
normalized = normalized.replace("feeds2.feedburner.com", "feeds.feedburner.com");
// feedproxy redirects to feedburner
normalized = normalized.replace("feedproxy.google.com",
"feeds.feedburner.com");
// feedburner feeds have a special treatment
if (normalized.contains("feedburner.com")) {
normalized = normalized.replace("feeds2.feedburner.com",
"feeds.feedburner.com");
normalized = normalized.split(Pattern.quote("?"))[0];
normalized = StringUtils.removeEnd(normalized, "/");
}
return normalized;
}

View File

@@ -18,6 +18,9 @@ public class FeedUtilsTest {
String urlc1 = "http://feeds.feedburner.com/Frandroid";
String urlc2 = "http://feeds2.feedburner.com/frandroid";
String urlc3 = "http://feedproxy.google.com/frandroid";
String urlc4 = "http://feeds.feedburner.com/Frandroid/";
String urlc5 = "http://feeds.feedburner.com/Frandroid?format=rss";
Assert.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla2));
Assert.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla3));
@@ -27,6 +30,9 @@ public class FeedUtilsTest {
Assert.assertEquals(FeedUtils.normalizeURL(urlb1), FeedUtils.normalizeURL(urlb2));
Assert.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc2));
Assert.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc3));
Assert.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc4));
Assert.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc5));
}
}