forked from Archives/Athou_commafeed
optimizations
This commit is contained in:
@@ -26,16 +26,19 @@ public class FeedFetcher {
|
|||||||
@Inject
|
@Inject
|
||||||
HttpGetter getter;
|
HttpGetter getter;
|
||||||
|
|
||||||
public Feed fetch(String feedUrl) throws FeedException,
|
public Feed fetch(String feedUrl, boolean extractFeedUrlFromHtml)
|
||||||
ClientProtocolException, IOException {
|
throws FeedException, ClientProtocolException, IOException {
|
||||||
log.debug("Fetching feed {}", feedUrl);
|
log.debug("Fetching feed {}", feedUrl);
|
||||||
Feed feed = null;
|
Feed feed = null;
|
||||||
|
|
||||||
byte[] content = getter.getBinary(feedUrl);
|
byte[] content = getter.getBinary(feedUrl);
|
||||||
String extractedUrl = extractFeedUrl(StringUtils.newStringUtf8(content));
|
if (extractFeedUrlFromHtml) {
|
||||||
if (extractedUrl != null) {
|
String extractedUrl = extractFeedUrl(StringUtils
|
||||||
content = getter.getBinary(extractedUrl);
|
.newStringUtf8(content));
|
||||||
feedUrl = extractedUrl;
|
if (extractedUrl != null) {
|
||||||
|
content = getter.getBinary(extractedUrl);
|
||||||
|
feedUrl = extractedUrl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
feed = parser.parse(feedUrl, content);
|
feed = parser.parse(feedUrl, content);
|
||||||
|
|
||||||
|
|||||||
@@ -8,10 +8,6 @@ import java.util.List;
|
|||||||
import org.apache.commons.codec.digest.DigestUtils;
|
import org.apache.commons.codec.digest.DigestUtils;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.commons.lang.SystemUtils;
|
import org.apache.commons.lang.SystemUtils;
|
||||||
import org.jsoup.Jsoup;
|
|
||||||
import org.jsoup.nodes.Document.OutputSettings;
|
|
||||||
import org.jsoup.nodes.Entities.EscapeMode;
|
|
||||||
import org.jsoup.safety.Whitelist;
|
|
||||||
import org.xml.sax.InputSource;
|
import org.xml.sax.InputSource;
|
||||||
|
|
||||||
import com.commafeed.backend.model.Feed;
|
import com.commafeed.backend.model.Feed;
|
||||||
@@ -29,6 +25,12 @@ import com.sun.syndication.io.SyndFeedInput;
|
|||||||
|
|
||||||
public class FeedParser {
|
public class FeedParser {
|
||||||
|
|
||||||
|
private static final Function<SyndContent, String> CONTENT_TO_STRING = new Function<SyndContent, String>() {
|
||||||
|
public String apply(SyndContent content) {
|
||||||
|
return content.getValue();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public Feed parse(String feedUrl, byte[] xml) throws FeedException {
|
public Feed parse(String feedUrl, byte[] xml) throws FeedException {
|
||||||
Feed feed = new Feed();
|
Feed feed = new Feed();
|
||||||
@@ -55,8 +57,8 @@ public class FeedParser {
|
|||||||
entry.setUpdated(getUpdateDate(item));
|
entry.setUpdated(getUpdateDate(item));
|
||||||
|
|
||||||
FeedEntryContent content = new FeedEntryContent();
|
FeedEntryContent content = new FeedEntryContent();
|
||||||
content.setContent(handleContent(getContent(item)));
|
content.setContent(getContent(item));
|
||||||
content.setTitle(handleContent(item.getTitle()));
|
content.setTitle(item.getTitle());
|
||||||
SyndEnclosure enclosure = (SyndEnclosure) Iterables.getFirst(
|
SyndEnclosure enclosure = (SyndEnclosure) Iterables.getFirst(
|
||||||
item.getEnclosures(), null);
|
item.getEnclosures(), null);
|
||||||
if (enclosure != null) {
|
if (enclosure != null) {
|
||||||
@@ -94,27 +96,10 @@ public class FeedParser {
|
|||||||
.getDescription().getValue();
|
.getDescription().getValue();
|
||||||
} else {
|
} else {
|
||||||
content = StringUtils.join(Collections2.transform(
|
content = StringUtils.join(Collections2.transform(
|
||||||
item.getContents(), new Function<SyndContent, String>() {
|
item.getContents(), CONTENT_TO_STRING),
|
||||||
public String apply(SyndContent content) {
|
SystemUtils.LINE_SEPARATOR);
|
||||||
return content.getValue();
|
|
||||||
}
|
|
||||||
}), SystemUtils.LINE_SEPARATOR);
|
|
||||||
}
|
}
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String handleContent(String content) {
|
|
||||||
if (StringUtils.isNotBlank(content)) {
|
|
||||||
Whitelist whitelist = Whitelist.relaxed();
|
|
||||||
whitelist.addEnforcedAttribute("a", "target", "_blank");
|
|
||||||
|
|
||||||
whitelist.addTags("iframe");
|
|
||||||
whitelist.addAttributes("iframe", "src", "height", "width",
|
|
||||||
"allowfullscreen", "frameborder");
|
|
||||||
|
|
||||||
content = Jsoup.clean(content, "", whitelist,
|
|
||||||
new OutputSettings().escapeMode(EscapeMode.extended));
|
|
||||||
}
|
|
||||||
return content;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -81,7 +81,7 @@ public class FeedRefreshWorker {
|
|||||||
|
|
||||||
Feed fetchedFeed = null;
|
Feed fetchedFeed = null;
|
||||||
try {
|
try {
|
||||||
fetchedFeed = fetcher.fetch(feed.getUrl());
|
fetchedFeed = fetcher.fetch(feed.getUrl(), false);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
message = "Unable to refresh feed " + feed.getUrl() + " : "
|
message = "Unable to refresh feed " + feed.getUrl() + " : "
|
||||||
+ e.getMessage();
|
+ e.getMessage();
|
||||||
|
|||||||
@@ -10,6 +10,10 @@ import javax.inject.Inject;
|
|||||||
|
|
||||||
import org.apache.commons.lang.ObjectUtils;
|
import org.apache.commons.lang.ObjectUtils;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document.OutputSettings;
|
||||||
|
import org.jsoup.nodes.Entities.EscapeMode;
|
||||||
|
import org.jsoup.safety.Whitelist;
|
||||||
|
|
||||||
import com.commafeed.backend.dao.FeedDAO;
|
import com.commafeed.backend.dao.FeedDAO;
|
||||||
import com.commafeed.backend.dao.FeedEntryDAO;
|
import com.commafeed.backend.dao.FeedEntryDAO;
|
||||||
@@ -17,6 +21,7 @@ import com.commafeed.backend.dao.FeedEntryStatusDAO;
|
|||||||
import com.commafeed.backend.dao.FeedSubscriptionDAO;
|
import com.commafeed.backend.dao.FeedSubscriptionDAO;
|
||||||
import com.commafeed.backend.model.Feed;
|
import com.commafeed.backend.model.Feed;
|
||||||
import com.commafeed.backend.model.FeedEntry;
|
import com.commafeed.backend.model.FeedEntry;
|
||||||
|
import com.commafeed.backend.model.FeedEntryContent;
|
||||||
import com.commafeed.backend.model.FeedEntryStatus;
|
import com.commafeed.backend.model.FeedEntryStatus;
|
||||||
import com.commafeed.backend.model.FeedSubscription;
|
import com.commafeed.backend.model.FeedSubscription;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
@@ -54,6 +59,10 @@ public class FeedUpdateService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (foundEntry == null) {
|
if (foundEntry == null) {
|
||||||
|
FeedEntryContent content = entry.getContent();
|
||||||
|
content.setContent(handleContent(content.getContent()));
|
||||||
|
content.setTitle(handleContent(content.getTitle()));
|
||||||
|
|
||||||
entry.setInserted(Calendar.getInstance().getTime());
|
entry.setInserted(Calendar.getInstance().getTime());
|
||||||
addFeedToEntry(entry, feed);
|
addFeedToEntry(entry, feed);
|
||||||
} else {
|
} else {
|
||||||
@@ -85,4 +94,19 @@ public class FeedUpdateService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String handleContent(String content) {
|
||||||
|
if (StringUtils.isNotBlank(content)) {
|
||||||
|
Whitelist whitelist = Whitelist.relaxed();
|
||||||
|
whitelist.addEnforcedAttribute("a", "target", "_blank");
|
||||||
|
|
||||||
|
whitelist.addTags("iframe");
|
||||||
|
whitelist.addAttributes("iframe", "src", "height", "width",
|
||||||
|
"allowfullscreen", "frameborder");
|
||||||
|
|
||||||
|
content = Jsoup.clean(content, "", whitelist,
|
||||||
|
new OutputSettings().escapeMode(EscapeMode.extended));
|
||||||
|
}
|
||||||
|
return content;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ public class SubscriptionsREST extends AbstractREST {
|
|||||||
url = prependHttp(url);
|
url = prependHttp(url);
|
||||||
Feed feed = null;
|
Feed feed = null;
|
||||||
try {
|
try {
|
||||||
feed = feedFetcher.fetch(url);
|
feed = feedFetcher.fetch(url, true);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new WebApplicationException(e, Response
|
throw new WebApplicationException(e, Response
|
||||||
.status(Status.INTERNAL_SERVER_ERROR)
|
.status(Status.INTERNAL_SERVER_ERROR)
|
||||||
@@ -59,6 +59,7 @@ public class SubscriptionsREST extends AbstractREST {
|
|||||||
Preconditions.checkNotNull(req.getUrl());
|
Preconditions.checkNotNull(req.getUrl());
|
||||||
|
|
||||||
String url = prependHttp(req.getUrl());
|
String url = prependHttp(req.getUrl());
|
||||||
|
url = fetchFeed(url).getUrl();
|
||||||
|
|
||||||
FeedCategory category = EntriesREST.ALL.equals(req.getCategoryId()) ? null
|
FeedCategory category = EntriesREST.ALL.equals(req.getCategoryId()) ? null
|
||||||
: feedCategoryDAO
|
: feedCategoryDAO
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ module.controller('SubscribeCtrl', function($scope, SubscriptionService) {
|
|||||||
url : $scope.sub.url
|
url : $scope.sub.url
|
||||||
}, function(data) {
|
}, function(data) {
|
||||||
$scope.sub.title = data.title;
|
$scope.sub.title = data.title;
|
||||||
|
$scope.sub.url = data.url;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user