From 1f15cabfa5ce575cb028c2613f8e0ab18eff77ac Mon Sep 17 00:00:00 2001 From: Athou Date: Sun, 21 Apr 2013 18:38:28 +0200 Subject: [PATCH] strip down utf8 4-byte characters --- .../backend/services/FeedUpdateService.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/main/java/com/commafeed/backend/services/FeedUpdateService.java b/src/main/java/com/commafeed/backend/services/FeedUpdateService.java index 68d77152..b9059d44 100644 --- a/src/main/java/com/commafeed/backend/services/FeedUpdateService.java +++ b/src/main/java/com/commafeed/backend/services/FeedUpdateService.java @@ -102,6 +102,7 @@ public class FeedUpdateService { private String handleContent(String content) { if (StringUtils.isNotBlank(content)) { + content = trimUnicodeSurrogateCharacters(content); Whitelist whitelist = Whitelist.relaxed(); whitelist.addEnforcedAttribute("a", "target", "_blank"); @@ -114,4 +115,15 @@ public class FeedUpdateService { } return content; } + + private String trimUnicodeSurrogateCharacters(String text) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < text.length(); i++) { + char ch = text.charAt(i); + if (!Character.isHighSurrogate(ch) && !Character.isLowSurrogate(ch)) { + sb.append(ch); + } + } + return sb.toString(); + } }