Files
Athou_commafeed/src/main/java/com/commafeed/backend/feeds/FeedFetcher.java
2013-03-31 14:56:35 +02:00

72 lines
2.2 KiB
Java

package com.commafeed.backend.feeds;
import javax.ejb.Stateless;
import javax.inject.Inject;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.commafeed.backend.model.Feed;
import com.sun.syndication.io.FeedException;
@Stateless
public class FeedFetcher {
private static Logger log = LoggerFactory.getLogger(FeedFetcher.class);
@Inject
FeedParser parser;
public Feed fetch(String feedUrl) throws FeedException {
log.debug("Fetching feed {}", feedUrl);
Feed feed = null;
HttpClient httpclient = new DefaultHttpClient();
HttpProtocolParams.setContentCharset(httpclient.getParams(), "UTF-8");
HttpConnectionParams
.setConnectionTimeout(httpclient.getParams(), 15000);
HttpConnectionParams.setSoTimeout(httpclient.getParams(), 15000);
try {
HttpGet httpget = new HttpGet(feedUrl);
HttpResponse response = httpclient.execute(httpget);
HttpEntity entity = response.getEntity();
String content = EntityUtils.toString(entity, "UTF-8");
feed = parser.parse(feedUrl, content);
} catch (Exception e) {
throw new FeedException(e.getMessage(), e);
} finally {
httpclient.getConnectionManager().shutdown();
}
return feed;
}
public String extractFeedUrl(String html) {
String foundUrl = null;
Document doc = Jsoup.parse(html);
String root = doc.children().get(0).tagName();
if ("html".equals(root)) {
Elements rss = doc.select("link[type=application/rss+xml]");
Elements atom = doc.select("link[type=application/atom+xml]");
if (!rss.isEmpty()) {
foundUrl = rss.get(0).attr("abs:href").toString();
} else if (!atom.isEmpty()) {
foundUrl = atom.get(0).attr("abs:href").toString();
}
}
return foundUrl;
}
}