2013-03-20 20:33:42 +01:00
|
|
|
package com.commafeed.backend.feeds;
|
|
|
|
|
|
2013-03-27 09:37:15 +01:00
|
|
|
import javax.ejb.Stateless;
|
2013-03-20 20:33:42 +01:00
|
|
|
import javax.inject.Inject;
|
|
|
|
|
|
2013-03-23 00:26:23 +01:00
|
|
|
import org.apache.http.HttpEntity;
|
|
|
|
|
import org.apache.http.HttpResponse;
|
2013-03-20 20:33:42 +01:00
|
|
|
import org.apache.http.client.HttpClient;
|
|
|
|
|
import org.apache.http.client.methods.HttpGet;
|
|
|
|
|
import org.apache.http.impl.client.DefaultHttpClient;
|
2013-03-27 09:37:15 +01:00
|
|
|
import org.apache.http.params.HttpConnectionParams;
|
2013-03-23 00:26:23 +01:00
|
|
|
import org.apache.http.params.HttpProtocolParams;
|
|
|
|
|
import org.apache.http.util.EntityUtils;
|
2013-03-31 09:53:19 +02:00
|
|
|
import org.jsoup.Jsoup;
|
|
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
|
import org.jsoup.select.Elements;
|
2013-03-20 20:33:42 +01:00
|
|
|
import org.slf4j.Logger;
|
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
|
2013-03-23 16:17:19 +01:00
|
|
|
import com.commafeed.backend.model.Feed;
|
2013-03-27 09:37:15 +01:00
|
|
|
import com.sun.syndication.io.FeedException;
|
2013-03-20 20:33:42 +01:00
|
|
|
|
2013-03-27 09:37:15 +01:00
|
|
|
@Stateless
|
2013-03-20 20:33:42 +01:00
|
|
|
public class FeedFetcher {
|
|
|
|
|
|
|
|
|
|
private static Logger log = LoggerFactory.getLogger(FeedFetcher.class);
|
|
|
|
|
|
|
|
|
|
@Inject
|
|
|
|
|
FeedParser parser;
|
|
|
|
|
|
2013-03-27 09:37:15 +01:00
|
|
|
public Feed fetch(String feedUrl) throws FeedException {
|
2013-03-20 20:33:42 +01:00
|
|
|
log.debug("Fetching feed {}", feedUrl);
|
|
|
|
|
Feed feed = null;
|
|
|
|
|
|
|
|
|
|
HttpClient httpclient = new DefaultHttpClient();
|
2013-03-23 00:26:23 +01:00
|
|
|
HttpProtocolParams.setContentCharset(httpclient.getParams(), "UTF-8");
|
2013-03-27 09:37:15 +01:00
|
|
|
HttpConnectionParams
|
|
|
|
|
.setConnectionTimeout(httpclient.getParams(), 15000);
|
|
|
|
|
HttpConnectionParams.setSoTimeout(httpclient.getParams(), 15000);
|
2013-03-23 00:26:23 +01:00
|
|
|
|
2013-03-20 20:33:42 +01:00
|
|
|
try {
|
|
|
|
|
HttpGet httpget = new HttpGet(feedUrl);
|
2013-03-23 00:26:23 +01:00
|
|
|
HttpResponse response = httpclient.execute(httpget);
|
|
|
|
|
HttpEntity entity = response.getEntity();
|
|
|
|
|
String content = EntityUtils.toString(entity, "UTF-8");
|
2013-03-31 09:53:19 +02:00
|
|
|
|
|
|
|
|
String extractedUrl = extractFeedUrl(content);
|
|
|
|
|
if (extractedUrl != null) {
|
|
|
|
|
feed = fetch(extractedUrl);
|
|
|
|
|
} else {
|
|
|
|
|
feed = parser.parse(feedUrl, content);
|
|
|
|
|
}
|
2013-03-20 20:33:42 +01:00
|
|
|
} catch (Exception e) {
|
2013-03-27 09:37:15 +01:00
|
|
|
throw new FeedException(e.getMessage(), e);
|
2013-03-20 20:33:42 +01:00
|
|
|
} finally {
|
|
|
|
|
httpclient.getConnectionManager().shutdown();
|
|
|
|
|
}
|
2013-03-27 09:37:15 +01:00
|
|
|
return feed;
|
2013-03-20 20:33:42 +01:00
|
|
|
}
|
|
|
|
|
|
2013-03-31 09:53:19 +02:00
|
|
|
private String extractFeedUrl(String html) {
|
2013-03-31 14:30:44 +02:00
|
|
|
String foundUrl = null;
|
2013-03-31 09:53:19 +02:00
|
|
|
|
2013-03-31 14:30:44 +02:00
|
|
|
Document doc = Jsoup.parse(html);
|
|
|
|
|
String root = doc.children().get(0).tagName();
|
|
|
|
|
if ("html".equals(root)) {
|
|
|
|
|
Elements rss = doc.select("link[type=application/rss+xml]");
|
|
|
|
|
Elements atom = doc.select("link[type=application/atom+xml]");
|
|
|
|
|
if (!rss.isEmpty()) {
|
|
|
|
|
foundUrl = rss.get(0).attr("abs:href").toString();
|
|
|
|
|
} else if (!atom.isEmpty()) {
|
|
|
|
|
foundUrl = atom.get(0).attr("abs:href").toString();
|
|
|
|
|
}
|
2013-03-31 09:53:19 +02:00
|
|
|
}
|
2013-03-31 14:30:44 +02:00
|
|
|
return foundUrl;
|
2013-03-31 09:53:19 +02:00
|
|
|
}
|
2013-03-20 20:33:42 +01:00
|
|
|
}
|