use the url of the feed as the base url to resolve relative entry links when the declared link in the feed is relative

This commit is contained in:
Athou
2013-10-03 12:42:05 +02:00
parent b615847b09
commit 57abee6cf0
3 changed files with 46 additions and 12 deletions

View File

@@ -82,7 +82,7 @@ public class FeedParser {
continue;
}
entry.setGuid(FeedUtils.truncate(guid, 2048));
entry.setUrl(FeedUtils.truncate(FeedUtils.toAbsoluteUrl(item.getLink(), feed.getLink()), 2048));
entry.setUrl(FeedUtils.truncate(FeedUtils.toAbsoluteUrl(item.getLink(), feed.getLink(), feed.getUrlAfterRedirect()), 2048));
entry.setUpdated(validateDate(getEntryUpdateDate(item), true));
FeedEntryContent content = new FeedEntryContent();

View File

@@ -1,6 +1,8 @@
package com.commafeed.backend.feeds;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
@@ -15,6 +17,7 @@ import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.time.DateUtils;
import org.apache.commons.math.stat.descriptive.SummaryStatistics;
import org.apache.wicket.request.UrlUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Document.OutputSettings;
@@ -389,17 +392,37 @@ public class FeedUtils {
return url;
}
public static String toAbsoluteUrl(String url, String baseUrl) {
/**
*
* @param url
* the url of the entry
* @param feedLink
* the url of the feed as described in the feed
* @param feedUrl
* the url of the feed that we used to fetch the feed
* @return an absolute url pointing to the entry
*/
public static String toAbsoluteUrl(String url, String feedLink, String feedUrl) {
url = StringUtils.trimToNull(StringUtils.normalizeSpace(url));
if (baseUrl == null || url == null || url.startsWith("http")) {
if (url == null || url.startsWith("http")) {
return url;
}
if (url.startsWith("/") == false) {
url = "/" + url;
String baseUrl = (feedLink == null || UrlUtils.isRelative(feedLink)) ? feedUrl : feedLink;
if (baseUrl == null) {
return url;
}
return baseUrl + url;
String result = null;
try {
result = new URL(new URL(baseUrl), url).toString();
} catch (MalformedURLException e) {
log.debug("could not parse url : " + e.getMessage(), e);
result = url;
}
return result;
}
public static String getFaviconUrl(FeedSubscription subscription, String publicUrl) {

View File

@@ -15,14 +15,13 @@ public class FeedUtilsTest {
String urlb1 = "http://ftr.fivefilters.org/makefulltextfeed.php?url=http%3A%2F%2Ffeeds.howtogeek.com%2FHowToGeek&max=10&summary=1";
String urlb2 = "http://ftr.fivefilters.org/makefulltextfeed.php?url=http://feeds.howtogeek.com/HowToGeek&max=10&summary=1";
String urlc1 = "http://feeds.feedburner.com/Frandroid";
String urlc2 = "http://feeds2.feedburner.com/frandroid";
String urlc3 = "http://feedproxy.google.com/frandroid";
String urlc4 = "http://feeds.feedburner.com/Frandroid/";
String urlc5 = "http://feeds.feedburner.com/Frandroid?format=rss";
String urld1 = "http://fivefilters.org/content-only/makefulltextfeed.php?url=http://feeds.feedburner.com/Frandroid";
String urld2 = "http://fivefilters.org/content-only/makefulltextfeed.php?url=http://feeds2.feedburner.com/Frandroid";
@@ -32,14 +31,26 @@ public class FeedUtilsTest {
Assert.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla5));
Assert.assertEquals(FeedUtils.normalizeURL(urlb1), FeedUtils.normalizeURL(urlb2));
Assert.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc2));
Assert.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc3));
Assert.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc4));
Assert.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc5));
Assert.assertNotEquals(FeedUtils.normalizeURL(urld1), FeedUtils.normalizeURL(urld2));
}
}
@Test
public void testToAbsoluteUrl() {
String expected = "http://a.com/blog/entry/1";
Assert.assertEquals(expected, FeedUtils.toAbsoluteUrl("http://a.com/blog/entry/1", "http://a.com/feed/", "http://a.com/feed/"));
Assert.assertEquals(expected, FeedUtils.toAbsoluteUrl("http://a.com/blog/entry/1", "http://a.com/feed", "http://a.com/feed"));
Assert.assertEquals(expected, FeedUtils.toAbsoluteUrl("../blog/entry/1", "http://a.com/feed/", "http://a.com/feed/"));
Assert.assertEquals(expected, FeedUtils.toAbsoluteUrl("../blog/entry/1", "feed.xml", "http://a.com/feed/feed.xml"));
Assert.assertEquals("http://ergoemacs.org/emacs/elisp_all_about_lines.html",
FeedUtils.toAbsoluteUrl("elisp_all_about_lines.html", "blog.xml", "http://ergoemacs.org/emacs/blog.xml"));
}
}