diff --git a/commafeed-server/src/main/java/com/commafeed/backend/Urls.java b/commafeed-server/src/main/java/com/commafeed/backend/Urls.java new file mode 100644 index 00000000..191dd2c1 --- /dev/null +++ b/commafeed-server/src/main/java/com/commafeed/backend/Urls.java @@ -0,0 +1,95 @@ +package com.commafeed.backend; + +import java.net.URI; +import java.util.regex.Pattern; + +import org.apache.commons.lang3.StringUtils; +import org.netpreserve.urlcanon.Canonicalizer; +import org.netpreserve.urlcanon.ParsedUrl; + +import lombok.experimental.UtilityClass; + +@UtilityClass +public class Urls { + + private static final String ESCAPED_QUESTION_MARK = Pattern.quote("?"); + + public static boolean isHttp(String url) { + return url.startsWith("http://"); + } + + public static boolean isHttps(String url) { + return url.startsWith("https://"); + } + + public static boolean isAbsolute(String url) { + return isHttp(url) || isHttps(url); + } + + /** + * + * @param relativeUrl + * the url of the entry + * @param feedLink + * the url of the feed as described in the feed + * @param feedUrl + * the url of the feed that we used to fetch the feed + * @return an absolute url pointing to the entry + */ + public static String toAbsolute(String relativeUrl, String feedLink, String feedUrl) { + String baseUrl = (feedLink != null && isAbsolute(feedLink)) ? feedLink : feedUrl; + if (baseUrl == null) { + return null; + } + + return URI.create(baseUrl).resolve(relativeUrl).toString(); + } + + public static String removeTrailingSlash(String url) { + if (url.endsWith("/")) { + url = url.substring(0, url.length() - 1); + } + return url; + } + + /** + * Normalize the url. The resulting url is not meant to be fetched but rather used as a mean to identify a feed and avoid duplicates + */ + public static String normalize(String url) { + if (url == null) { + return null; + } + + ParsedUrl parsedUrl = ParsedUrl.parseUrl(url); + Canonicalizer.AGGRESSIVE.canonicalize(parsedUrl); + String normalized = parsedUrl.toString(); + if (normalized == null) { + normalized = url; + } + + // convert to lower case, the url probably won't work in some cases + // after that but we don't care we just want to compare urls to avoid + // duplicates + normalized = normalized.toLowerCase(); + + // store all urls as http + if (normalized.startsWith("https")) { + normalized = "http" + normalized.substring(5); + } + + // remove the www. part + normalized = normalized.replace("//www.", "//"); + + // feedproxy redirects to feedburner + normalized = normalized.replace("feedproxy.google.com", "feeds.feedburner.com"); + + // feedburner feeds have a special treatment + if (normalized.split(ESCAPED_QUESTION_MARK)[0].contains("feedburner.com")) { + normalized = normalized.replace("feeds2.feedburner.com", "feeds.feedburner.com"); + normalized = normalized.split(ESCAPED_QUESTION_MARK)[0]; + normalized = StringUtils.removeEnd(normalized, "/"); + } + + return normalized; + } +} diff --git a/commafeed-server/src/main/java/com/commafeed/backend/favicon/DefaultFaviconFetcher.java b/commafeed-server/src/main/java/com/commafeed/backend/favicon/DefaultFaviconFetcher.java index f04c4628..9e4c4216 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/favicon/DefaultFaviconFetcher.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/favicon/DefaultFaviconFetcher.java @@ -10,7 +10,7 @@ import org.jsoup.select.Elements; import com.commafeed.backend.HttpGetter; import com.commafeed.backend.HttpGetter.HttpResult; -import com.commafeed.backend.feed.FeedUtils; +import com.commafeed.backend.Urls; import com.commafeed.backend.model.Feed; import lombok.RequiredArgsConstructor; @@ -68,7 +68,7 @@ public class DefaultFaviconFetcher extends AbstractFaviconFetcher { String contentType = null; try { - url = FeedUtils.removeTrailingSlash(url) + "/favicon.ico"; + url = Urls.removeTrailingSlash(url) + "/favicon.ico"; log.debug("getting root icon at {}", url); HttpResult result = getter.get(url); bytes = result.getContent(); diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java index 5c021ea0..31d3d6bc 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java @@ -1,8 +1,6 @@ package com.commafeed.backend.feed; -import java.net.URI; import java.util.List; -import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; import org.apache.hc.client5.http.utils.Base64; @@ -10,8 +8,6 @@ import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import org.netpreserve.urlcanon.Canonicalizer; -import org.netpreserve.urlcanon.ParsedUrl; import com.commafeed.backend.feed.FeedEntryKeyword.Mode; import com.commafeed.backend.feed.parser.TextDirectionDetector; @@ -29,8 +25,6 @@ import lombok.extern.slf4j.Slf4j; @Slf4j public class FeedUtils { - private static final String ESCAPED_QUESTION_MARK = Pattern.quote("?"); - public static String truncate(String string, int length) { if (string != null) { string = string.substring(0, Math.min(length, string.length())); @@ -38,59 +32,6 @@ public class FeedUtils { return string; } - public static boolean isHttp(String url) { - return url.startsWith("http://"); - } - - public static boolean isHttps(String url) { - return url.startsWith("https://"); - } - - public static boolean isAbsoluteUrl(String url) { - return isHttp(url) || isHttps(url); - } - - /** - * Normalize the url. The resulting url is not meant to be fetched but rather used as a mean to identify a feed and avoid duplicates - */ - public static String normalizeURL(String url) { - if (url == null) { - return null; - } - - ParsedUrl parsedUrl = ParsedUrl.parseUrl(url); - Canonicalizer.AGGRESSIVE.canonicalize(parsedUrl); - String normalized = parsedUrl.toString(); - if (normalized == null) { - normalized = url; - } - - // convert to lower case, the url probably won't work in some cases - // after that but we don't care we just want to compare urls to avoid - // duplicates - normalized = normalized.toLowerCase(); - - // store all urls as http - if (normalized.startsWith("https")) { - normalized = "http" + normalized.substring(5); - } - - // remove the www. part - normalized = normalized.replace("//www.", "//"); - - // feedproxy redirects to feedburner - normalized = normalized.replace("feedproxy.google.com", "feeds.feedburner.com"); - - // feedburner feeds have a special treatment - if (normalized.split(ESCAPED_QUESTION_MARK)[0].contains("feedburner.com")) { - normalized = normalized.replace("feeds2.feedburner.com", "feeds.feedburner.com"); - normalized = normalized.split(ESCAPED_QUESTION_MARK)[0]; - normalized = StringUtils.removeEnd(normalized, "/"); - } - - return normalized; - } - public static boolean isRTL(String title, String content) { String text = StringUtils.isNotBlank(content) ? content : title; if (StringUtils.isBlank(text)) { @@ -105,32 +46,6 @@ public class FeedUtils { return TextDirectionDetector.detect(stripped) == TextDirectionDetector.Direction.RIGHT_TO_LEFT; } - public static String removeTrailingSlash(String url) { - if (url.endsWith("/")) { - url = url.substring(0, url.length() - 1); - } - return url; - } - - /** - * - * @param relativeUrl - * the url of the entry - * @param feedLink - * the url of the feed as described in the feed - * @param feedUrl - * the url of the feed that we used to fetch the feed - * @return an absolute url pointing to the entry - */ - public static String toAbsoluteUrl(String relativeUrl, String feedLink, String feedUrl) { - String baseUrl = (feedLink != null && isAbsoluteUrl(feedLink)) ? feedLink : feedUrl; - if (baseUrl == null) { - return null; - } - - return URI.create(baseUrl).resolve(relativeUrl).toString(); - } - public static String getFaviconUrl(FeedSubscription subscription) { return "rest/feed/favicon/" + subscription.getId(); } diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedParser.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedParser.java index db4ddfa2..188ec477 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedParser.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedParser.java @@ -19,7 +19,7 @@ import org.jdom2.Element; import org.jdom2.Namespace; import org.xml.sax.InputSource; -import com.commafeed.backend.feed.FeedUtils; +import com.commafeed.backend.Urls; import com.commafeed.backend.feed.parser.FeedParserResult.Content; import com.commafeed.backend.feed.parser.FeedParserResult.Enclosure; import com.commafeed.backend.feed.parser.FeedParserResult.Entry; @@ -119,7 +119,7 @@ public class FeedParser { } String url = buildEntryUrl(feed, feedUrl, item); - if (StringUtils.isBlank(url) && FeedUtils.isAbsoluteUrl(guid)) { + if (StringUtils.isBlank(url) && Urls.isAbsolute(guid)) { // if link is empty but guid is used as url, use guid url = guid; } @@ -165,14 +165,14 @@ public class FeedParser { private String buildEntryUrl(SyndFeed feed, String feedUrl, SyndEntry item) { String url = StringUtils.trimToNull(StringUtils.normalizeSpace(item.getLink())); - if (url == null || FeedUtils.isAbsoluteUrl(url)) { + if (url == null || Urls.isAbsolute(url)) { // url is absolute, nothing to do return url; } // url is relative, trying to resolve it String feedLink = StringUtils.trimToNull(StringUtils.normalizeSpace(feed.getLink())); - return FeedUtils.toAbsoluteUrl(url, feedLink, feedUrl); + return Urls.toAbsolute(url, feedLink, feedUrl); } private Instant toValidInstant(Date date, boolean nullToNow) { diff --git a/commafeed-server/src/main/java/com/commafeed/backend/service/FeedService.java b/commafeed-server/src/main/java/com/commafeed/backend/service/FeedService.java index 3034e522..32135e4b 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/service/FeedService.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/service/FeedService.java @@ -8,6 +8,7 @@ import java.util.Objects; import jakarta.inject.Singleton; import com.commafeed.backend.Digests; +import com.commafeed.backend.Urls; import com.commafeed.backend.dao.FeedDAO; import com.commafeed.backend.favicon.AbstractFaviconFetcher; import com.commafeed.backend.favicon.Favicon; @@ -33,7 +34,7 @@ public class FeedService { } public synchronized Feed findOrCreate(String url) { - String normalizedUrl = FeedUtils.normalizeURL(url); + String normalizedUrl = Urls.normalize(url); String normalizedUrlHash = Digests.sha1Hex(normalizedUrl); Feed feed = feedDAO.findByUrl(normalizedUrl, normalizedUrlHash); if (feed == null) { @@ -48,7 +49,7 @@ public class FeedService { } public void update(Feed feed) { - String normalized = FeedUtils.normalizeURL(feed.getUrl()); + String normalized = Urls.normalize(feed.getUrl()); feed.setNormalizedUrl(normalized); feed.setNormalizedUrlHash(Digests.sha1Hex(normalized)); feed.setLastUpdated(Instant.now()); diff --git a/commafeed-server/src/main/java/com/commafeed/backend/service/FeedSubscriptionService.java b/commafeed-server/src/main/java/com/commafeed/backend/service/FeedSubscriptionService.java index 2912e354..fa9c8cc5 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/service/FeedSubscriptionService.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/service/FeedSubscriptionService.java @@ -8,6 +8,7 @@ import java.util.stream.Collectors; import jakarta.inject.Singleton; import com.commafeed.CommaFeedConfiguration; +import com.commafeed.backend.Urls; import com.commafeed.backend.dao.FeedEntryStatusDAO; import com.commafeed.backend.dao.FeedSubscriptionDAO; import com.commafeed.backend.feed.FeedRefreshEngine; @@ -67,7 +68,7 @@ public class FeedSubscriptionService { Feed feed = feedService.findOrCreate(url); // upgrade feed to https if it was using http - if (FeedUtils.isHttp(feed.getUrl()) && FeedUtils.isHttps(url)) { + if (Urls.isHttp(feed.getUrl()) && Urls.isHttps(url)) { feed.setUrl(url); } diff --git a/commafeed-server/src/main/java/com/commafeed/frontend/resource/UserREST.java b/commafeed-server/src/main/java/com/commafeed/frontend/resource/UserREST.java index 0d548a19..f646a31f 100644 --- a/commafeed-server/src/main/java/com/commafeed/frontend/resource/UserREST.java +++ b/commafeed-server/src/main/java/com/commafeed/frontend/resource/UserREST.java @@ -32,10 +32,10 @@ import org.apache.hc.core5.net.URIBuilder; import com.commafeed.CommaFeedConfiguration; import com.commafeed.CommaFeedConstants; import com.commafeed.backend.Digests; +import com.commafeed.backend.Urls; import com.commafeed.backend.dao.UserDAO; import com.commafeed.backend.dao.UserRoleDAO; import com.commafeed.backend.dao.UserSettingsDAO; -import com.commafeed.backend.feed.FeedUtils; import com.commafeed.backend.model.User; import com.commafeed.backend.model.UserRole; import com.commafeed.backend.model.UserRole.Role; @@ -309,7 +309,7 @@ public class UserREST { } private String buildEmailContent(User user) throws URISyntaxException, MalformedURLException { - String publicUrl = FeedUtils.removeTrailingSlash(uri.getBaseUri().toString()); + String publicUrl = Urls.removeTrailingSlash(uri.getBaseUri().toString()); publicUrl += "/rest/user/passwordResetCallback"; return String.format( "You asked for password recovery for account '%s', follow this link to change your password. Ignore this if you didn't request a password recovery.", diff --git a/commafeed-server/src/test/java/com/commafeed/backend/feed/FeedUtilsTest.java b/commafeed-server/src/test/java/com/commafeed/backend/UrlsTest.java similarity index 51% rename from commafeed-server/src/test/java/com/commafeed/backend/feed/FeedUtilsTest.java rename to commafeed-server/src/test/java/com/commafeed/backend/UrlsTest.java index 96dd2fb9..04e26ba8 100644 --- a/commafeed-server/src/test/java/com/commafeed/backend/feed/FeedUtilsTest.java +++ b/commafeed-server/src/test/java/com/commafeed/backend/UrlsTest.java @@ -1,9 +1,9 @@ -package com.commafeed.backend.feed; +package com.commafeed.backend; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -class FeedUtilsTest { +class UrlsTest { @Test void testNormalization() { @@ -25,19 +25,19 @@ class FeedUtilsTest { String urld1 = "http://fivefilters.org/content-only/makefulltextfeed.php?url=http://feeds.feedburner.com/Frandroid"; String urld2 = "http://fivefilters.org/content-only/makefulltextfeed.php?url=http://feeds2.feedburner.com/Frandroid"; - Assertions.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla2)); - Assertions.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla3)); - Assertions.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla4)); - Assertions.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla5)); + Assertions.assertEquals(Urls.normalize(urla1), Urls.normalize(urla2)); + Assertions.assertEquals(Urls.normalize(urla1), Urls.normalize(urla3)); + Assertions.assertEquals(Urls.normalize(urla1), Urls.normalize(urla4)); + Assertions.assertEquals(Urls.normalize(urla1), Urls.normalize(urla5)); - Assertions.assertEquals(FeedUtils.normalizeURL(urlb1), FeedUtils.normalizeURL(urlb2)); + Assertions.assertEquals(Urls.normalize(urlb1), Urls.normalize(urlb2)); - Assertions.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc2)); - Assertions.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc3)); - Assertions.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc4)); - Assertions.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc5)); + Assertions.assertEquals(Urls.normalize(urlc1), Urls.normalize(urlc2)); + Assertions.assertEquals(Urls.normalize(urlc1), Urls.normalize(urlc3)); + Assertions.assertEquals(Urls.normalize(urlc1), Urls.normalize(urlc4)); + Assertions.assertEquals(Urls.normalize(urlc1), Urls.normalize(urlc5)); - Assertions.assertNotEquals(FeedUtils.normalizeURL(urld1), FeedUtils.normalizeURL(urld2)); + Assertions.assertNotEquals(Urls.normalize(urld1), Urls.normalize(urld2)); } @@ -46,36 +46,36 @@ class FeedUtilsTest { String expected = "http://a.com/blog/entry/1"; // usual cases - Assertions.assertEquals(expected, FeedUtils.toAbsoluteUrl("http://a.com/blog/entry/1", "http://a.com/feed/", "http://a.com/feed/")); - Assertions.assertEquals(expected, FeedUtils.toAbsoluteUrl("http://a.com/blog/entry/1", "http://a.com/feed", "http://a.com/feed")); + Assertions.assertEquals(expected, Urls.toAbsolute("http://a.com/blog/entry/1", "http://a.com/feed/", "http://a.com/feed/")); + Assertions.assertEquals(expected, Urls.toAbsolute("http://a.com/blog/entry/1", "http://a.com/feed", "http://a.com/feed")); // relative links - Assertions.assertEquals(expected, FeedUtils.toAbsoluteUrl("../blog/entry/1", "http://a.com/feed/", "http://a.com/feed/")); - Assertions.assertEquals(expected, FeedUtils.toAbsoluteUrl("../blog/entry/1", "feed.xml", "http://a.com/feed/feed.xml")); + Assertions.assertEquals(expected, Urls.toAbsolute("../blog/entry/1", "http://a.com/feed/", "http://a.com/feed/")); + Assertions.assertEquals(expected, Urls.toAbsolute("../blog/entry/1", "feed.xml", "http://a.com/feed/feed.xml")); // root-relative links - Assertions.assertEquals(expected, FeedUtils.toAbsoluteUrl("/blog/entry/1", "/feed", "http://a.com/feed")); + Assertions.assertEquals(expected, Urls.toAbsolute("/blog/entry/1", "/feed", "http://a.com/feed")); // real cases - Assertions.assertEquals("https://github.com/erusev/parsedown/releases/tag/1.3.0", FeedUtils.toAbsoluteUrl( + Assertions.assertEquals("https://github.com/erusev/parsedown/releases/tag/1.3.0", Urls.toAbsolute( "/erusev/parsedown/releases/tag/1.3.0", "/erusev/parsedown/releases", "https://github.com/erusev/parsedown/tags.atom")); Assertions.assertEquals("http://ergoemacs.org/emacs/elisp_all_about_lines.html", - FeedUtils.toAbsoluteUrl("elisp_all_about_lines.html", "blog.xml", "http://ergoemacs.org/emacs/blog.xml")); + Urls.toAbsolute("elisp_all_about_lines.html", "blog.xml", "http://ergoemacs.org/emacs/blog.xml")); } @Test void testRemoveTrailingSlash() { final String url = "http://localhost/"; - final String result = FeedUtils.removeTrailingSlash(url); + final String result = Urls.removeTrailingSlash(url); Assertions.assertEquals("http://localhost", result); } @Test void testRemoveTrailingSlashLastSlashOnly() { final String url = "http://localhost//"; - final String result = FeedUtils.removeTrailingSlash(url); + final String result = Urls.removeTrailingSlash(url); Assertions.assertEquals("http://localhost/", result); } -} +} \ No newline at end of file