From 5be7c487ad2fceec69dbef125f0346344a65f5cd Mon Sep 17 00:00:00 2001 From: Athou Date: Sun, 14 Apr 2013 18:51:12 +0200 Subject: [PATCH] optimizations --- .../commafeed/backend/feeds/FeedFetcher.java | 15 ++++---- .../commafeed/backend/feeds/FeedParser.java | 35 ++++++------------- .../backend/feeds/FeedRefreshWorker.java | 2 +- .../backend/services/FeedUpdateService.java | 24 +++++++++++++ .../rest/resources/SubscriptionsREST.java | 3 +- src/main/webapp/js/controllers.js | 1 + 6 files changed, 47 insertions(+), 33 deletions(-) diff --git a/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java b/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java index 811952f1..f1f75268 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java @@ -26,16 +26,19 @@ public class FeedFetcher { @Inject HttpGetter getter; - public Feed fetch(String feedUrl) throws FeedException, - ClientProtocolException, IOException { + public Feed fetch(String feedUrl, boolean extractFeedUrlFromHtml) + throws FeedException, ClientProtocolException, IOException { log.debug("Fetching feed {}", feedUrl); Feed feed = null; byte[] content = getter.getBinary(feedUrl); - String extractedUrl = extractFeedUrl(StringUtils.newStringUtf8(content)); - if (extractedUrl != null) { - content = getter.getBinary(extractedUrl); - feedUrl = extractedUrl; + if (extractFeedUrlFromHtml) { + String extractedUrl = extractFeedUrl(StringUtils + .newStringUtf8(content)); + if (extractedUrl != null) { + content = getter.getBinary(extractedUrl); + feedUrl = extractedUrl; + } } feed = parser.parse(feedUrl, content); diff --git a/src/main/java/com/commafeed/backend/feeds/FeedParser.java b/src/main/java/com/commafeed/backend/feeds/FeedParser.java index 93fbf1c7..9bfa55dc 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedParser.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedParser.java @@ -8,10 +8,6 @@ import java.util.List; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.SystemUtils; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document.OutputSettings; -import org.jsoup.nodes.Entities.EscapeMode; -import org.jsoup.safety.Whitelist; import org.xml.sax.InputSource; import com.commafeed.backend.model.Feed; @@ -29,6 +25,12 @@ import com.sun.syndication.io.SyndFeedInput; public class FeedParser { + private static final Function CONTENT_TO_STRING = new Function() { + public String apply(SyndContent content) { + return content.getValue(); + } + }; + @SuppressWarnings("unchecked") public Feed parse(String feedUrl, byte[] xml) throws FeedException { Feed feed = new Feed(); @@ -55,8 +57,8 @@ public class FeedParser { entry.setUpdated(getUpdateDate(item)); FeedEntryContent content = new FeedEntryContent(); - content.setContent(handleContent(getContent(item))); - content.setTitle(handleContent(item.getTitle())); + content.setContent(getContent(item)); + content.setTitle(item.getTitle()); SyndEnclosure enclosure = (SyndEnclosure) Iterables.getFirst( item.getEnclosures(), null); if (enclosure != null) { @@ -94,27 +96,10 @@ public class FeedParser { .getDescription().getValue(); } else { content = StringUtils.join(Collections2.transform( - item.getContents(), new Function() { - public String apply(SyndContent content) { - return content.getValue(); - } - }), SystemUtils.LINE_SEPARATOR); + item.getContents(), CONTENT_TO_STRING), + SystemUtils.LINE_SEPARATOR); } return content; } - private String handleContent(String content) { - if (StringUtils.isNotBlank(content)) { - Whitelist whitelist = Whitelist.relaxed(); - whitelist.addEnforcedAttribute("a", "target", "_blank"); - - whitelist.addTags("iframe"); - whitelist.addAttributes("iframe", "src", "height", "width", - "allowfullscreen", "frameborder"); - - content = Jsoup.clean(content, "", whitelist, - new OutputSettings().escapeMode(EscapeMode.extended)); - } - return content; - } } diff --git a/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java b/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java index a91e4611..c5fbdcf9 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedRefreshWorker.java @@ -81,7 +81,7 @@ public class FeedRefreshWorker { Feed fetchedFeed = null; try { - fetchedFeed = fetcher.fetch(feed.getUrl()); + fetchedFeed = fetcher.fetch(feed.getUrl(), false); } catch (Exception e) { message = "Unable to refresh feed " + feed.getUrl() + " : " + e.getMessage(); diff --git a/src/main/java/com/commafeed/backend/services/FeedUpdateService.java b/src/main/java/com/commafeed/backend/services/FeedUpdateService.java index 04b46d79..f2d8c28a 100644 --- a/src/main/java/com/commafeed/backend/services/FeedUpdateService.java +++ b/src/main/java/com/commafeed/backend/services/FeedUpdateService.java @@ -10,6 +10,10 @@ import javax.inject.Inject; import org.apache.commons.lang.ObjectUtils; import org.apache.commons.lang.StringUtils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document.OutputSettings; +import org.jsoup.nodes.Entities.EscapeMode; +import org.jsoup.safety.Whitelist; import com.commafeed.backend.dao.FeedDAO; import com.commafeed.backend.dao.FeedEntryDAO; @@ -17,6 +21,7 @@ import com.commafeed.backend.dao.FeedEntryStatusDAO; import com.commafeed.backend.dao.FeedSubscriptionDAO; import com.commafeed.backend.model.Feed; import com.commafeed.backend.model.FeedEntry; +import com.commafeed.backend.model.FeedEntryContent; import com.commafeed.backend.model.FeedEntryStatus; import com.commafeed.backend.model.FeedSubscription; import com.google.common.collect.Lists; @@ -54,6 +59,10 @@ public class FeedUpdateService { } } if (foundEntry == null) { + FeedEntryContent content = entry.getContent(); + content.setContent(handleContent(content.getContent())); + content.setTitle(handleContent(content.getTitle())); + entry.setInserted(Calendar.getInstance().getTime()); addFeedToEntry(entry, feed); } else { @@ -85,4 +94,19 @@ public class FeedUpdateService { } } + + private String handleContent(String content) { + if (StringUtils.isNotBlank(content)) { + Whitelist whitelist = Whitelist.relaxed(); + whitelist.addEnforcedAttribute("a", "target", "_blank"); + + whitelist.addTags("iframe"); + whitelist.addAttributes("iframe", "src", "height", "width", + "allowfullscreen", "frameborder"); + + content = Jsoup.clean(content, "", whitelist, + new OutputSettings().escapeMode(EscapeMode.extended)); + } + return content; + } } diff --git a/src/main/java/com/commafeed/frontend/rest/resources/SubscriptionsREST.java b/src/main/java/com/commafeed/frontend/rest/resources/SubscriptionsREST.java index 46b81259..5cbc8842 100644 --- a/src/main/java/com/commafeed/frontend/rest/resources/SubscriptionsREST.java +++ b/src/main/java/com/commafeed/frontend/rest/resources/SubscriptionsREST.java @@ -42,7 +42,7 @@ public class SubscriptionsREST extends AbstractREST { url = prependHttp(url); Feed feed = null; try { - feed = feedFetcher.fetch(url); + feed = feedFetcher.fetch(url, true); } catch (Exception e) { throw new WebApplicationException(e, Response .status(Status.INTERNAL_SERVER_ERROR) @@ -59,6 +59,7 @@ public class SubscriptionsREST extends AbstractREST { Preconditions.checkNotNull(req.getUrl()); String url = prependHttp(req.getUrl()); + url = fetchFeed(url).getUrl(); FeedCategory category = EntriesREST.ALL.equals(req.getCategoryId()) ? null : feedCategoryDAO diff --git a/src/main/webapp/js/controllers.js b/src/main/webapp/js/controllers.js index ebe1febc..be11bc4b 100644 --- a/src/main/webapp/js/controllers.js +++ b/src/main/webapp/js/controllers.js @@ -46,6 +46,7 @@ module.controller('SubscribeCtrl', function($scope, SubscriptionService) { url : $scope.sub.url }, function(data) { $scope.sub.title = data.title; + $scope.sub.url = data.url; }); } };