initial feed url canonicalizer setup (requires java 1.7)

This commit is contained in:
Athou
2013-07-02 12:51:20 +02:00
parent 6e2d331286
commit d8a9022c97
4 changed files with 40 additions and 3 deletions

View File

@@ -36,6 +36,8 @@ import com.google.gwt.i18n.client.HasDirection.Direction;
import com.google.gwt.i18n.shared.BidiUtils;
import com.steadystate.css.parser.CSSOMParser;
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
public class FeedUtils {
protected static Logger log = LoggerFactory.getLogger(FeedUtils.class);
@@ -85,6 +87,13 @@ public class FeedUtils {
return encoding;
}
public static String normalizeURL(String url) {
if (url == null) {
return null;
}
return URLCanonicalizer.getCanonicalURL(url);
}
/**
* Extract the declared encoding from the xml
*/