optimizations

This commit is contained in:
Athou
2013-04-14 18:51:12 +02:00
parent 86edd54a21
commit 5be7c487ad
6 changed files with 47 additions and 33 deletions

View File

@@ -26,16 +26,19 @@ public class FeedFetcher {
@Inject
HttpGetter getter;
public Feed fetch(String feedUrl) throws FeedException,
ClientProtocolException, IOException {
public Feed fetch(String feedUrl, boolean extractFeedUrlFromHtml)
throws FeedException, ClientProtocolException, IOException {
log.debug("Fetching feed {}", feedUrl);
Feed feed = null;
byte[] content = getter.getBinary(feedUrl);
String extractedUrl = extractFeedUrl(StringUtils.newStringUtf8(content));
if (extractedUrl != null) {
content = getter.getBinary(extractedUrl);
feedUrl = extractedUrl;
if (extractFeedUrlFromHtml) {
String extractedUrl = extractFeedUrl(StringUtils
.newStringUtf8(content));
if (extractedUrl != null) {
content = getter.getBinary(extractedUrl);
feedUrl = extractedUrl;
}
}
feed = parser.parse(feedUrl, content);

View File

@@ -8,10 +8,6 @@ import java.util.List;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.SystemUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document.OutputSettings;
import org.jsoup.nodes.Entities.EscapeMode;
import org.jsoup.safety.Whitelist;
import org.xml.sax.InputSource;
import com.commafeed.backend.model.Feed;
@@ -29,6 +25,12 @@ import com.sun.syndication.io.SyndFeedInput;
public class FeedParser {
private static final Function<SyndContent, String> CONTENT_TO_STRING = new Function<SyndContent, String>() {
public String apply(SyndContent content) {
return content.getValue();
}
};
@SuppressWarnings("unchecked")
public Feed parse(String feedUrl, byte[] xml) throws FeedException {
Feed feed = new Feed();
@@ -55,8 +57,8 @@ public class FeedParser {
entry.setUpdated(getUpdateDate(item));
FeedEntryContent content = new FeedEntryContent();
content.setContent(handleContent(getContent(item)));
content.setTitle(handleContent(item.getTitle()));
content.setContent(getContent(item));
content.setTitle(item.getTitle());
SyndEnclosure enclosure = (SyndEnclosure) Iterables.getFirst(
item.getEnclosures(), null);
if (enclosure != null) {
@@ -94,27 +96,10 @@ public class FeedParser {
.getDescription().getValue();
} else {
content = StringUtils.join(Collections2.transform(
item.getContents(), new Function<SyndContent, String>() {
public String apply(SyndContent content) {
return content.getValue();
}
}), SystemUtils.LINE_SEPARATOR);
item.getContents(), CONTENT_TO_STRING),
SystemUtils.LINE_SEPARATOR);
}
return content;
}
private String handleContent(String content) {
if (StringUtils.isNotBlank(content)) {
Whitelist whitelist = Whitelist.relaxed();
whitelist.addEnforcedAttribute("a", "target", "_blank");
whitelist.addTags("iframe");
whitelist.addAttributes("iframe", "src", "height", "width",
"allowfullscreen", "frameborder");
content = Jsoup.clean(content, "", whitelist,
new OutputSettings().escapeMode(EscapeMode.extended));
}
return content;
}
}

View File

@@ -81,7 +81,7 @@ public class FeedRefreshWorker {
Feed fetchedFeed = null;
try {
fetchedFeed = fetcher.fetch(feed.getUrl());
fetchedFeed = fetcher.fetch(feed.getUrl(), false);
} catch (Exception e) {
message = "Unable to refresh feed " + feed.getUrl() + " : "
+ e.getMessage();

View File

@@ -10,6 +10,10 @@ import javax.inject.Inject;
import org.apache.commons.lang.ObjectUtils;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document.OutputSettings;
import org.jsoup.nodes.Entities.EscapeMode;
import org.jsoup.safety.Whitelist;
import com.commafeed.backend.dao.FeedDAO;
import com.commafeed.backend.dao.FeedEntryDAO;
@@ -17,6 +21,7 @@ import com.commafeed.backend.dao.FeedEntryStatusDAO;
import com.commafeed.backend.dao.FeedSubscriptionDAO;
import com.commafeed.backend.model.Feed;
import com.commafeed.backend.model.FeedEntry;
import com.commafeed.backend.model.FeedEntryContent;
import com.commafeed.backend.model.FeedEntryStatus;
import com.commafeed.backend.model.FeedSubscription;
import com.google.common.collect.Lists;
@@ -54,6 +59,10 @@ public class FeedUpdateService {
}
}
if (foundEntry == null) {
FeedEntryContent content = entry.getContent();
content.setContent(handleContent(content.getContent()));
content.setTitle(handleContent(content.getTitle()));
entry.setInserted(Calendar.getInstance().getTime());
addFeedToEntry(entry, feed);
} else {
@@ -85,4 +94,19 @@ public class FeedUpdateService {
}
}
private String handleContent(String content) {
if (StringUtils.isNotBlank(content)) {
Whitelist whitelist = Whitelist.relaxed();
whitelist.addEnforcedAttribute("a", "target", "_blank");
whitelist.addTags("iframe");
whitelist.addAttributes("iframe", "src", "height", "width",
"allowfullscreen", "frameborder");
content = Jsoup.clean(content, "", whitelist,
new OutputSettings().escapeMode(EscapeMode.extended));
}
return content;
}
}

View File

@@ -42,7 +42,7 @@ public class SubscriptionsREST extends AbstractREST {
url = prependHttp(url);
Feed feed = null;
try {
feed = feedFetcher.fetch(url);
feed = feedFetcher.fetch(url, true);
} catch (Exception e) {
throw new WebApplicationException(e, Response
.status(Status.INTERNAL_SERVER_ERROR)
@@ -59,6 +59,7 @@ public class SubscriptionsREST extends AbstractREST {
Preconditions.checkNotNull(req.getUrl());
String url = prependHttp(req.getUrl());
url = fetchFeed(url).getUrl();
FeedCategory category = EntriesREST.ALL.equals(req.getCategoryId()) ? null
: feedCategoryDAO

View File

@@ -46,6 +46,7 @@ module.controller('SubscribeCtrl', function($scope, SubscriptionService) {
url : $scope.sub.url
}, function(data) {
$scope.sub.title = data.title;
$scope.sub.url = data.url;
});
}
};