handle invalid feeds having unescaped html entities

This commit is contained in:
Athou
2014-03-01 18:19:49 +01:00
parent e727ee414b
commit f4f3d9ca48
3 changed files with 275 additions and 0 deletions

View File

@@ -129,6 +129,14 @@ public class FeedUtils {
}
return encoding;
}
public static String replaceHtmlEntitiesWithNumericEntities(String source){
String result = source;
for(String entity : HtmlEntities.NUMERIC_MAPPING.keySet()){
result = result.replace(entity, HtmlEntities.NUMERIC_MAPPING.get(entity));
}
return result;
}
/**
* Normalize the url. The resulting url is not meant to be fetched but rather used as a mean to identify a feed and avoid duplicates