diff --git a/src/main/java/com/commafeed/backend/feeds/FeedParser.java b/src/main/java/com/commafeed/backend/feeds/FeedParser.java index b04dbec4..4410ac28 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedParser.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedParser.java @@ -82,7 +82,7 @@ public class FeedParser { continue; } entry.setGuid(FeedUtils.truncate(guid, 2048)); - entry.setUrl(FeedUtils.truncate(FeedUtils.toAbsoluteUrl(item.getLink(), feed.getLink()), 2048)); + entry.setUrl(FeedUtils.truncate(FeedUtils.toAbsoluteUrl(item.getLink(), feed.getLink(), feed.getUrlAfterRedirect()), 2048)); entry.setUpdated(validateDate(getEntryUpdateDate(item), true)); FeedEntryContent content = new FeedEntryContent(); diff --git a/src/main/java/com/commafeed/backend/feeds/FeedUtils.java b/src/main/java/com/commafeed/backend/feeds/FeedUtils.java index 8fb68fb6..20b97765 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedUtils.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedUtils.java @@ -1,6 +1,8 @@ package com.commafeed.backend.feeds; import java.io.StringReader; +import java.net.MalformedURLException; +import java.net.URL; import java.util.Arrays; import java.util.Collections; import java.util.Date; @@ -15,6 +17,7 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.time.DateUtils; import org.apache.commons.math.stat.descriptive.SummaryStatistics; +import org.apache.wicket.request.UrlUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Document.OutputSettings; @@ -389,17 +392,37 @@ public class FeedUtils { return url; } - public static String toAbsoluteUrl(String url, String baseUrl) { + /** + * + * @param url + * the url of the entry + * @param feedLink + * the url of the feed as described in the feed + * @param feedUrl + * the url of the feed that we used to fetch the feed + * @return an absolute url pointing to the entry + */ + public static String toAbsoluteUrl(String url, String feedLink, String feedUrl) { url = StringUtils.trimToNull(StringUtils.normalizeSpace(url)); - if (baseUrl == null || url == null || url.startsWith("http")) { + if (url == null || url.startsWith("http")) { return url; } - if (url.startsWith("/") == false) { - url = "/" + url; + String baseUrl = (feedLink == null || UrlUtils.isRelative(feedLink)) ? feedUrl : feedLink; + + if (baseUrl == null) { + return url; } - return baseUrl + url; + String result = null; + try { + result = new URL(new URL(baseUrl), url).toString(); + } catch (MalformedURLException e) { + log.debug("could not parse url : " + e.getMessage(), e); + result = url; + } + + return result; } public static String getFaviconUrl(FeedSubscription subscription, String publicUrl) { diff --git a/src/test/java/com/commafeed/backend/feeds/FeedUtilsTest.java b/src/test/java/com/commafeed/backend/feeds/FeedUtilsTest.java index 3285acdc..4c5208f6 100644 --- a/src/test/java/com/commafeed/backend/feeds/FeedUtilsTest.java +++ b/src/test/java/com/commafeed/backend/feeds/FeedUtilsTest.java @@ -15,14 +15,13 @@ public class FeedUtilsTest { String urlb1 = "http://ftr.fivefilters.org/makefulltextfeed.php?url=http%3A%2F%2Ffeeds.howtogeek.com%2FHowToGeek&max=10&summary=1"; String urlb2 = "http://ftr.fivefilters.org/makefulltextfeed.php?url=http://feeds.howtogeek.com/HowToGeek&max=10&summary=1"; - + String urlc1 = "http://feeds.feedburner.com/Frandroid"; String urlc2 = "http://feeds2.feedburner.com/frandroid"; String urlc3 = "http://feedproxy.google.com/frandroid"; String urlc4 = "http://feeds.feedburner.com/Frandroid/"; String urlc5 = "http://feeds.feedburner.com/Frandroid?format=rss"; - - + String urld1 = "http://fivefilters.org/content-only/makefulltextfeed.php?url=http://feeds.feedburner.com/Frandroid"; String urld2 = "http://fivefilters.org/content-only/makefulltextfeed.php?url=http://feeds2.feedburner.com/Frandroid"; @@ -32,14 +31,26 @@ public class FeedUtilsTest { Assert.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla5)); Assert.assertEquals(FeedUtils.normalizeURL(urlb1), FeedUtils.normalizeURL(urlb2)); - + Assert.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc2)); Assert.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc3)); Assert.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc4)); Assert.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc5)); - + Assert.assertNotEquals(FeedUtils.normalizeURL(urld1), FeedUtils.normalizeURL(urld2)); } -} + @Test + public void testToAbsoluteUrl() { + String expected = "http://a.com/blog/entry/1"; + Assert.assertEquals(expected, FeedUtils.toAbsoluteUrl("http://a.com/blog/entry/1", "http://a.com/feed/", "http://a.com/feed/")); + Assert.assertEquals(expected, FeedUtils.toAbsoluteUrl("http://a.com/blog/entry/1", "http://a.com/feed", "http://a.com/feed")); + Assert.assertEquals(expected, FeedUtils.toAbsoluteUrl("../blog/entry/1", "http://a.com/feed/", "http://a.com/feed/")); + Assert.assertEquals(expected, FeedUtils.toAbsoluteUrl("../blog/entry/1", "feed.xml", "http://a.com/feed/feed.xml")); + + Assert.assertEquals("http://ergoemacs.org/emacs/elisp_all_about_lines.html", + FeedUtils.toAbsoluteUrl("elisp_all_about_lines.html", "blog.xml", "http://ergoemacs.org/emacs/blog.xml")); + + } +}