From faaf18f5cf64f41cef0c674f6a4750e9df6ae186 Mon Sep 17 00:00:00 2001 From: Athou Date: Mon, 22 Apr 2013 13:20:17 +0200 Subject: [PATCH] trim unicode characters from url if any --- .../backend/feeds/FeedRefreshWorker.java | 3 +- .../commafeed/backend/feeds/FeedUtils.java | 37 +++++++++++++++++++ .../backend/services/FeedUpdateService.java | 35 ++---------------- 3 files changed, 42 insertions(+), 33 deletions(-) create mode 100644 src/main/java/com/commafeed/backend/feeds/FeedUtils.java diff --git a/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java b/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java index 0b4c56cd..14184598 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java @@ -123,7 +123,8 @@ public class FeedRefreshWorker { feed.setDisabledUntil(disabledUntil); if (fetchedFeed != null) { - feed.setLink(fetchedFeed.getLink()); + feed.setLink(FeedUtils.trimUnicodeSurrogateCharacters(fetchedFeed + .getLink())); feed.setLastModifiedHeader(fetchedFeed.getLastModifiedHeader()); feed.setEtagHeader(fetchedFeed.getEtagHeader()); feedUpdateService.updateEntries(feed, fetchedFeed.getEntries()); diff --git a/src/main/java/com/commafeed/backend/feeds/FeedUtils.java b/src/main/java/com/commafeed/backend/feeds/FeedUtils.java new file mode 100644 index 00000000..a8966c5f --- /dev/null +++ b/src/main/java/com/commafeed/backend/feeds/FeedUtils.java @@ -0,0 +1,37 @@ +package com.commafeed.backend.feeds; + +import org.apache.commons.lang.StringUtils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document.OutputSettings; +import org.jsoup.nodes.Entities.EscapeMode; +import org.jsoup.safety.Whitelist; + +public class FeedUtils { + + public static String handleContent(String content) { + if (StringUtils.isNotBlank(content)) { + content = trimUnicodeSurrogateCharacters(content); + Whitelist whitelist = Whitelist.relaxed(); + whitelist.addEnforcedAttribute("a", "target", "_blank"); + + whitelist.addTags("iframe"); + whitelist.addAttributes("iframe", "src", "height", "width", + "allowfullscreen", "frameborder"); + + content = Jsoup.clean(content, "", whitelist, + new OutputSettings().escapeMode(EscapeMode.base)); + } + return content; + } + + public static String trimUnicodeSurrogateCharacters(String text) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < text.length(); i++) { + char ch = text.charAt(i); + if (!Character.isHighSurrogate(ch) && !Character.isLowSurrogate(ch)) { + sb.append(ch); + } + } + return sb.toString(); + } +} diff --git a/src/main/java/com/commafeed/backend/services/FeedUpdateService.java b/src/main/java/com/commafeed/backend/services/FeedUpdateService.java index b9059d44..341ed2c6 100644 --- a/src/main/java/com/commafeed/backend/services/FeedUpdateService.java +++ b/src/main/java/com/commafeed/backend/services/FeedUpdateService.java @@ -10,15 +10,12 @@ import javax.inject.Inject; import org.apache.commons.lang.ObjectUtils; import org.apache.commons.lang.StringUtils; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document.OutputSettings; -import org.jsoup.nodes.Entities.EscapeMode; -import org.jsoup.safety.Whitelist; import com.commafeed.backend.dao.FeedDAO; import com.commafeed.backend.dao.FeedEntryDAO; import com.commafeed.backend.dao.FeedEntryStatusDAO; import com.commafeed.backend.dao.FeedSubscriptionDAO; +import com.commafeed.backend.feeds.FeedUtils; import com.commafeed.backend.model.Feed; import com.commafeed.backend.model.FeedEntry; import com.commafeed.backend.model.FeedEntryContent; @@ -60,9 +57,9 @@ public class FeedUpdateService { } if (foundEntry == null) { FeedEntryContent content = entry.getContent(); - content.setContent(handleContent(content.getContent())); + content.setContent(FeedUtils.handleContent(content.getContent())); - String title = handleContent(content.getTitle()); + String title = FeedUtils.handleContent(content.getTitle()); if (title != null) { content.setTitle(title.substring(0, Math.min(2048, title.length()))); @@ -100,30 +97,4 @@ public class FeedUpdateService { } - private String handleContent(String content) { - if (StringUtils.isNotBlank(content)) { - content = trimUnicodeSurrogateCharacters(content); - Whitelist whitelist = Whitelist.relaxed(); - whitelist.addEnforcedAttribute("a", "target", "_blank"); - - whitelist.addTags("iframe"); - whitelist.addAttributes("iframe", "src", "height", "width", - "allowfullscreen", "frameborder"); - - content = Jsoup.clean(content, "", whitelist, - new OutputSettings().escapeMode(EscapeMode.base)); - } - return content; - } - - private String trimUnicodeSurrogateCharacters(String text) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < text.length(); i++) { - char ch = text.charAt(i); - if (!Character.isHighSurrogate(ch) && !Character.isLowSurrogate(ch)) { - sb.append(ch); - } - } - return sb.toString(); - } }