mirror of
https://github.com/Athou/commafeed.git
synced 2026-03-21 21:37:29 +00:00
optimizations
This commit is contained in:
@@ -26,16 +26,19 @@ public class FeedFetcher {
|
||||
@Inject
|
||||
HttpGetter getter;
|
||||
|
||||
public Feed fetch(String feedUrl) throws FeedException,
|
||||
ClientProtocolException, IOException {
|
||||
public Feed fetch(String feedUrl, boolean extractFeedUrlFromHtml)
|
||||
throws FeedException, ClientProtocolException, IOException {
|
||||
log.debug("Fetching feed {}", feedUrl);
|
||||
Feed feed = null;
|
||||
|
||||
byte[] content = getter.getBinary(feedUrl);
|
||||
String extractedUrl = extractFeedUrl(StringUtils.newStringUtf8(content));
|
||||
if (extractedUrl != null) {
|
||||
content = getter.getBinary(extractedUrl);
|
||||
feedUrl = extractedUrl;
|
||||
if (extractFeedUrlFromHtml) {
|
||||
String extractedUrl = extractFeedUrl(StringUtils
|
||||
.newStringUtf8(content));
|
||||
if (extractedUrl != null) {
|
||||
content = getter.getBinary(extractedUrl);
|
||||
feedUrl = extractedUrl;
|
||||
}
|
||||
}
|
||||
feed = parser.parse(feedUrl, content);
|
||||
|
||||
|
||||
@@ -8,10 +8,6 @@ import java.util.List;
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang.SystemUtils;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document.OutputSettings;
|
||||
import org.jsoup.nodes.Entities.EscapeMode;
|
||||
import org.jsoup.safety.Whitelist;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
import com.commafeed.backend.model.Feed;
|
||||
@@ -29,6 +25,12 @@ import com.sun.syndication.io.SyndFeedInput;
|
||||
|
||||
public class FeedParser {
|
||||
|
||||
private static final Function<SyndContent, String> CONTENT_TO_STRING = new Function<SyndContent, String>() {
|
||||
public String apply(SyndContent content) {
|
||||
return content.getValue();
|
||||
}
|
||||
};
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public Feed parse(String feedUrl, byte[] xml) throws FeedException {
|
||||
Feed feed = new Feed();
|
||||
@@ -55,8 +57,8 @@ public class FeedParser {
|
||||
entry.setUpdated(getUpdateDate(item));
|
||||
|
||||
FeedEntryContent content = new FeedEntryContent();
|
||||
content.setContent(handleContent(getContent(item)));
|
||||
content.setTitle(handleContent(item.getTitle()));
|
||||
content.setContent(getContent(item));
|
||||
content.setTitle(item.getTitle());
|
||||
SyndEnclosure enclosure = (SyndEnclosure) Iterables.getFirst(
|
||||
item.getEnclosures(), null);
|
||||
if (enclosure != null) {
|
||||
@@ -94,27 +96,10 @@ public class FeedParser {
|
||||
.getDescription().getValue();
|
||||
} else {
|
||||
content = StringUtils.join(Collections2.transform(
|
||||
item.getContents(), new Function<SyndContent, String>() {
|
||||
public String apply(SyndContent content) {
|
||||
return content.getValue();
|
||||
}
|
||||
}), SystemUtils.LINE_SEPARATOR);
|
||||
item.getContents(), CONTENT_TO_STRING),
|
||||
SystemUtils.LINE_SEPARATOR);
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
private String handleContent(String content) {
|
||||
if (StringUtils.isNotBlank(content)) {
|
||||
Whitelist whitelist = Whitelist.relaxed();
|
||||
whitelist.addEnforcedAttribute("a", "target", "_blank");
|
||||
|
||||
whitelist.addTags("iframe");
|
||||
whitelist.addAttributes("iframe", "src", "height", "width",
|
||||
"allowfullscreen", "frameborder");
|
||||
|
||||
content = Jsoup.clean(content, "", whitelist,
|
||||
new OutputSettings().escapeMode(EscapeMode.extended));
|
||||
}
|
||||
return content;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,7 +81,7 @@ public class FeedRefreshWorker {
|
||||
|
||||
Feed fetchedFeed = null;
|
||||
try {
|
||||
fetchedFeed = fetcher.fetch(feed.getUrl());
|
||||
fetchedFeed = fetcher.fetch(feed.getUrl(), false);
|
||||
} catch (Exception e) {
|
||||
message = "Unable to refresh feed " + feed.getUrl() + " : "
|
||||
+ e.getMessage();
|
||||
|
||||
@@ -10,6 +10,10 @@ import javax.inject.Inject;
|
||||
|
||||
import org.apache.commons.lang.ObjectUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document.OutputSettings;
|
||||
import org.jsoup.nodes.Entities.EscapeMode;
|
||||
import org.jsoup.safety.Whitelist;
|
||||
|
||||
import com.commafeed.backend.dao.FeedDAO;
|
||||
import com.commafeed.backend.dao.FeedEntryDAO;
|
||||
@@ -17,6 +21,7 @@ import com.commafeed.backend.dao.FeedEntryStatusDAO;
|
||||
import com.commafeed.backend.dao.FeedSubscriptionDAO;
|
||||
import com.commafeed.backend.model.Feed;
|
||||
import com.commafeed.backend.model.FeedEntry;
|
||||
import com.commafeed.backend.model.FeedEntryContent;
|
||||
import com.commafeed.backend.model.FeedEntryStatus;
|
||||
import com.commafeed.backend.model.FeedSubscription;
|
||||
import com.google.common.collect.Lists;
|
||||
@@ -54,6 +59,10 @@ public class FeedUpdateService {
|
||||
}
|
||||
}
|
||||
if (foundEntry == null) {
|
||||
FeedEntryContent content = entry.getContent();
|
||||
content.setContent(handleContent(content.getContent()));
|
||||
content.setTitle(handleContent(content.getTitle()));
|
||||
|
||||
entry.setInserted(Calendar.getInstance().getTime());
|
||||
addFeedToEntry(entry, feed);
|
||||
} else {
|
||||
@@ -85,4 +94,19 @@ public class FeedUpdateService {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private String handleContent(String content) {
|
||||
if (StringUtils.isNotBlank(content)) {
|
||||
Whitelist whitelist = Whitelist.relaxed();
|
||||
whitelist.addEnforcedAttribute("a", "target", "_blank");
|
||||
|
||||
whitelist.addTags("iframe");
|
||||
whitelist.addAttributes("iframe", "src", "height", "width",
|
||||
"allowfullscreen", "frameborder");
|
||||
|
||||
content = Jsoup.clean(content, "", whitelist,
|
||||
new OutputSettings().escapeMode(EscapeMode.extended));
|
||||
}
|
||||
return content;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ public class SubscriptionsREST extends AbstractREST {
|
||||
url = prependHttp(url);
|
||||
Feed feed = null;
|
||||
try {
|
||||
feed = feedFetcher.fetch(url);
|
||||
feed = feedFetcher.fetch(url, true);
|
||||
} catch (Exception e) {
|
||||
throw new WebApplicationException(e, Response
|
||||
.status(Status.INTERNAL_SERVER_ERROR)
|
||||
@@ -59,6 +59,7 @@ public class SubscriptionsREST extends AbstractREST {
|
||||
Preconditions.checkNotNull(req.getUrl());
|
||||
|
||||
String url = prependHttp(req.getUrl());
|
||||
url = fetchFeed(url).getUrl();
|
||||
|
||||
FeedCategory category = EntriesREST.ALL.equals(req.getCategoryId()) ? null
|
||||
: feedCategoryDAO
|
||||
|
||||
@@ -46,6 +46,7 @@ module.controller('SubscribeCtrl', function($scope, SubscriptionService) {
|
||||
url : $scope.sub.url
|
||||
}, function(data) {
|
||||
$scope.sub.title = data.title;
|
||||
$scope.sub.url = data.url;
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user