optimizations

This commit is contained in:
Athou
2013-04-14 18:51:12 +02:00
parent 86edd54a21
commit 5be7c487ad
6 changed files with 47 additions and 33 deletions

View File

@@ -26,16 +26,19 @@ public class FeedFetcher {
@Inject @Inject
HttpGetter getter; HttpGetter getter;
public Feed fetch(String feedUrl) throws FeedException, public Feed fetch(String feedUrl, boolean extractFeedUrlFromHtml)
ClientProtocolException, IOException { throws FeedException, ClientProtocolException, IOException {
log.debug("Fetching feed {}", feedUrl); log.debug("Fetching feed {}", feedUrl);
Feed feed = null; Feed feed = null;
byte[] content = getter.getBinary(feedUrl); byte[] content = getter.getBinary(feedUrl);
String extractedUrl = extractFeedUrl(StringUtils.newStringUtf8(content)); if (extractFeedUrlFromHtml) {
if (extractedUrl != null) { String extractedUrl = extractFeedUrl(StringUtils
content = getter.getBinary(extractedUrl); .newStringUtf8(content));
feedUrl = extractedUrl; if (extractedUrl != null) {
content = getter.getBinary(extractedUrl);
feedUrl = extractedUrl;
}
} }
feed = parser.parse(feedUrl, content); feed = parser.parse(feedUrl, content);

View File

@@ -8,10 +8,6 @@ import java.util.List;
import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.SystemUtils; import org.apache.commons.lang.SystemUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document.OutputSettings;
import org.jsoup.nodes.Entities.EscapeMode;
import org.jsoup.safety.Whitelist;
import org.xml.sax.InputSource; import org.xml.sax.InputSource;
import com.commafeed.backend.model.Feed; import com.commafeed.backend.model.Feed;
@@ -29,6 +25,12 @@ import com.sun.syndication.io.SyndFeedInput;
public class FeedParser { public class FeedParser {
private static final Function<SyndContent, String> CONTENT_TO_STRING = new Function<SyndContent, String>() {
public String apply(SyndContent content) {
return content.getValue();
}
};
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public Feed parse(String feedUrl, byte[] xml) throws FeedException { public Feed parse(String feedUrl, byte[] xml) throws FeedException {
Feed feed = new Feed(); Feed feed = new Feed();
@@ -55,8 +57,8 @@ public class FeedParser {
entry.setUpdated(getUpdateDate(item)); entry.setUpdated(getUpdateDate(item));
FeedEntryContent content = new FeedEntryContent(); FeedEntryContent content = new FeedEntryContent();
content.setContent(handleContent(getContent(item))); content.setContent(getContent(item));
content.setTitle(handleContent(item.getTitle())); content.setTitle(item.getTitle());
SyndEnclosure enclosure = (SyndEnclosure) Iterables.getFirst( SyndEnclosure enclosure = (SyndEnclosure) Iterables.getFirst(
item.getEnclosures(), null); item.getEnclosures(), null);
if (enclosure != null) { if (enclosure != null) {
@@ -94,27 +96,10 @@ public class FeedParser {
.getDescription().getValue(); .getDescription().getValue();
} else { } else {
content = StringUtils.join(Collections2.transform( content = StringUtils.join(Collections2.transform(
item.getContents(), new Function<SyndContent, String>() { item.getContents(), CONTENT_TO_STRING),
public String apply(SyndContent content) { SystemUtils.LINE_SEPARATOR);
return content.getValue();
}
}), SystemUtils.LINE_SEPARATOR);
} }
return content; return content;
} }
private String handleContent(String content) {
if (StringUtils.isNotBlank(content)) {
Whitelist whitelist = Whitelist.relaxed();
whitelist.addEnforcedAttribute("a", "target", "_blank");
whitelist.addTags("iframe");
whitelist.addAttributes("iframe", "src", "height", "width",
"allowfullscreen", "frameborder");
content = Jsoup.clean(content, "", whitelist,
new OutputSettings().escapeMode(EscapeMode.extended));
}
return content;
}
} }

View File

@@ -81,7 +81,7 @@ public class FeedRefreshWorker {
Feed fetchedFeed = null; Feed fetchedFeed = null;
try { try {
fetchedFeed = fetcher.fetch(feed.getUrl()); fetchedFeed = fetcher.fetch(feed.getUrl(), false);
} catch (Exception e) { } catch (Exception e) {
message = "Unable to refresh feed " + feed.getUrl() + " : " message = "Unable to refresh feed " + feed.getUrl() + " : "
+ e.getMessage(); + e.getMessage();

View File

@@ -10,6 +10,10 @@ import javax.inject.Inject;
import org.apache.commons.lang.ObjectUtils; import org.apache.commons.lang.ObjectUtils;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document.OutputSettings;
import org.jsoup.nodes.Entities.EscapeMode;
import org.jsoup.safety.Whitelist;
import com.commafeed.backend.dao.FeedDAO; import com.commafeed.backend.dao.FeedDAO;
import com.commafeed.backend.dao.FeedEntryDAO; import com.commafeed.backend.dao.FeedEntryDAO;
@@ -17,6 +21,7 @@ import com.commafeed.backend.dao.FeedEntryStatusDAO;
import com.commafeed.backend.dao.FeedSubscriptionDAO; import com.commafeed.backend.dao.FeedSubscriptionDAO;
import com.commafeed.backend.model.Feed; import com.commafeed.backend.model.Feed;
import com.commafeed.backend.model.FeedEntry; import com.commafeed.backend.model.FeedEntry;
import com.commafeed.backend.model.FeedEntryContent;
import com.commafeed.backend.model.FeedEntryStatus; import com.commafeed.backend.model.FeedEntryStatus;
import com.commafeed.backend.model.FeedSubscription; import com.commafeed.backend.model.FeedSubscription;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
@@ -54,6 +59,10 @@ public class FeedUpdateService {
} }
} }
if (foundEntry == null) { if (foundEntry == null) {
FeedEntryContent content = entry.getContent();
content.setContent(handleContent(content.getContent()));
content.setTitle(handleContent(content.getTitle()));
entry.setInserted(Calendar.getInstance().getTime()); entry.setInserted(Calendar.getInstance().getTime());
addFeedToEntry(entry, feed); addFeedToEntry(entry, feed);
} else { } else {
@@ -85,4 +94,19 @@ public class FeedUpdateService {
} }
} }
private String handleContent(String content) {
if (StringUtils.isNotBlank(content)) {
Whitelist whitelist = Whitelist.relaxed();
whitelist.addEnforcedAttribute("a", "target", "_blank");
whitelist.addTags("iframe");
whitelist.addAttributes("iframe", "src", "height", "width",
"allowfullscreen", "frameborder");
content = Jsoup.clean(content, "", whitelist,
new OutputSettings().escapeMode(EscapeMode.extended));
}
return content;
}
} }

View File

@@ -42,7 +42,7 @@ public class SubscriptionsREST extends AbstractREST {
url = prependHttp(url); url = prependHttp(url);
Feed feed = null; Feed feed = null;
try { try {
feed = feedFetcher.fetch(url); feed = feedFetcher.fetch(url, true);
} catch (Exception e) { } catch (Exception e) {
throw new WebApplicationException(e, Response throw new WebApplicationException(e, Response
.status(Status.INTERNAL_SERVER_ERROR) .status(Status.INTERNAL_SERVER_ERROR)
@@ -59,6 +59,7 @@ public class SubscriptionsREST extends AbstractREST {
Preconditions.checkNotNull(req.getUrl()); Preconditions.checkNotNull(req.getUrl());
String url = prependHttp(req.getUrl()); String url = prependHttp(req.getUrl());
url = fetchFeed(url).getUrl();
FeedCategory category = EntriesREST.ALL.equals(req.getCategoryId()) ? null FeedCategory category = EntriesREST.ALL.equals(req.getCategoryId()) ? null
: feedCategoryDAO : feedCategoryDAO

View File

@@ -46,6 +46,7 @@ module.controller('SubscribeCtrl', function($scope, SubscriptionService) {
url : $scope.sub.url url : $scope.sub.url
}, function(data) { }, function(data) {
$scope.sub.title = data.title; $scope.sub.title = data.title;
$scope.sub.url = data.url;
}); });
} }
}; };