use the url of the feed as the base url to resolve relative entry links when the declared link in the feed is relative

This commit is contained in:
Athou
2013-10-03 12:42:05 +02:00
parent b615847b09
commit 57abee6cf0
3 changed files with 46 additions and 12 deletions

View File

@@ -82,7 +82,7 @@ public class FeedParser {
continue; continue;
} }
entry.setGuid(FeedUtils.truncate(guid, 2048)); entry.setGuid(FeedUtils.truncate(guid, 2048));
entry.setUrl(FeedUtils.truncate(FeedUtils.toAbsoluteUrl(item.getLink(), feed.getLink()), 2048)); entry.setUrl(FeedUtils.truncate(FeedUtils.toAbsoluteUrl(item.getLink(), feed.getLink(), feed.getUrlAfterRedirect()), 2048));
entry.setUpdated(validateDate(getEntryUpdateDate(item), true)); entry.setUpdated(validateDate(getEntryUpdateDate(item), true));
FeedEntryContent content = new FeedEntryContent(); FeedEntryContent content = new FeedEntryContent();

View File

@@ -1,6 +1,8 @@
package com.commafeed.backend.feeds; package com.commafeed.backend.feeds;
import java.io.StringReader; import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Date; import java.util.Date;
@@ -15,6 +17,7 @@ import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.time.DateUtils; import org.apache.commons.lang3.time.DateUtils;
import org.apache.commons.math.stat.descriptive.SummaryStatistics; import org.apache.commons.math.stat.descriptive.SummaryStatistics;
import org.apache.wicket.request.UrlUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Document.OutputSettings; import org.jsoup.nodes.Document.OutputSettings;
@@ -389,17 +392,37 @@ public class FeedUtils {
return url; return url;
} }
public static String toAbsoluteUrl(String url, String baseUrl) { /**
*
* @param url
* the url of the entry
* @param feedLink
* the url of the feed as described in the feed
* @param feedUrl
* the url of the feed that we used to fetch the feed
* @return an absolute url pointing to the entry
*/
public static String toAbsoluteUrl(String url, String feedLink, String feedUrl) {
url = StringUtils.trimToNull(StringUtils.normalizeSpace(url)); url = StringUtils.trimToNull(StringUtils.normalizeSpace(url));
if (baseUrl == null || url == null || url.startsWith("http")) { if (url == null || url.startsWith("http")) {
return url; return url;
} }
if (url.startsWith("/") == false) { String baseUrl = (feedLink == null || UrlUtils.isRelative(feedLink)) ? feedUrl : feedLink;
url = "/" + url;
if (baseUrl == null) {
return url;
} }
return baseUrl + url; String result = null;
try {
result = new URL(new URL(baseUrl), url).toString();
} catch (MalformedURLException e) {
log.debug("could not parse url : " + e.getMessage(), e);
result = url;
}
return result;
} }
public static String getFaviconUrl(FeedSubscription subscription, String publicUrl) { public static String getFaviconUrl(FeedSubscription subscription, String publicUrl) {

View File

@@ -22,7 +22,6 @@ public class FeedUtilsTest {
String urlc4 = "http://feeds.feedburner.com/Frandroid/"; String urlc4 = "http://feeds.feedburner.com/Frandroid/";
String urlc5 = "http://feeds.feedburner.com/Frandroid?format=rss"; String urlc5 = "http://feeds.feedburner.com/Frandroid?format=rss";
String urld1 = "http://fivefilters.org/content-only/makefulltextfeed.php?url=http://feeds.feedburner.com/Frandroid"; String urld1 = "http://fivefilters.org/content-only/makefulltextfeed.php?url=http://feeds.feedburner.com/Frandroid";
String urld2 = "http://fivefilters.org/content-only/makefulltextfeed.php?url=http://feeds2.feedburner.com/Frandroid"; String urld2 = "http://fivefilters.org/content-only/makefulltextfeed.php?url=http://feeds2.feedburner.com/Frandroid";
@@ -41,5 +40,17 @@ public class FeedUtilsTest {
Assert.assertNotEquals(FeedUtils.normalizeURL(urld1), FeedUtils.normalizeURL(urld2)); Assert.assertNotEquals(FeedUtils.normalizeURL(urld1), FeedUtils.normalizeURL(urld2));
} }
}
@Test
public void testToAbsoluteUrl() {
String expected = "http://a.com/blog/entry/1";
Assert.assertEquals(expected, FeedUtils.toAbsoluteUrl("http://a.com/blog/entry/1", "http://a.com/feed/", "http://a.com/feed/"));
Assert.assertEquals(expected, FeedUtils.toAbsoluteUrl("http://a.com/blog/entry/1", "http://a.com/feed", "http://a.com/feed"));
Assert.assertEquals(expected, FeedUtils.toAbsoluteUrl("../blog/entry/1", "http://a.com/feed/", "http://a.com/feed/"));
Assert.assertEquals(expected, FeedUtils.toAbsoluteUrl("../blog/entry/1", "feed.xml", "http://a.com/feed/feed.xml"));
Assert.assertEquals("http://ergoemacs.org/emacs/elisp_all_about_lines.html",
FeedUtils.toAbsoluteUrl("elisp_all_about_lines.html", "blog.xml", "http://ergoemacs.org/emacs/blog.xml"));
}
}