forked from Archives/Athou_commafeed
extract url utils from FeedUtils
This commit is contained in:
@@ -0,0 +1,95 @@
|
||||
package com.commafeed.backend;
|
||||
|
||||
import java.net.URI;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.netpreserve.urlcanon.Canonicalizer;
|
||||
import org.netpreserve.urlcanon.ParsedUrl;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class Urls {
|
||||
|
||||
private static final String ESCAPED_QUESTION_MARK = Pattern.quote("?");
|
||||
|
||||
public static boolean isHttp(String url) {
|
||||
return url.startsWith("http://");
|
||||
}
|
||||
|
||||
public static boolean isHttps(String url) {
|
||||
return url.startsWith("https://");
|
||||
}
|
||||
|
||||
public static boolean isAbsolute(String url) {
|
||||
return isHttp(url) || isHttps(url);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param relativeUrl
|
||||
* the url of the entry
|
||||
* @param feedLink
|
||||
* the url of the feed as described in the feed
|
||||
* @param feedUrl
|
||||
* the url of the feed that we used to fetch the feed
|
||||
* @return an absolute url pointing to the entry
|
||||
*/
|
||||
public static String toAbsolute(String relativeUrl, String feedLink, String feedUrl) {
|
||||
String baseUrl = (feedLink != null && isAbsolute(feedLink)) ? feedLink : feedUrl;
|
||||
if (baseUrl == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return URI.create(baseUrl).resolve(relativeUrl).toString();
|
||||
}
|
||||
|
||||
public static String removeTrailingSlash(String url) {
|
||||
if (url.endsWith("/")) {
|
||||
url = url.substring(0, url.length() - 1);
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize the url. The resulting url is not meant to be fetched but rather used as a mean to identify a feed and avoid duplicates
|
||||
*/
|
||||
public static String normalize(String url) {
|
||||
if (url == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
ParsedUrl parsedUrl = ParsedUrl.parseUrl(url);
|
||||
Canonicalizer.AGGRESSIVE.canonicalize(parsedUrl);
|
||||
String normalized = parsedUrl.toString();
|
||||
if (normalized == null) {
|
||||
normalized = url;
|
||||
}
|
||||
|
||||
// convert to lower case, the url probably won't work in some cases
|
||||
// after that but we don't care we just want to compare urls to avoid
|
||||
// duplicates
|
||||
normalized = normalized.toLowerCase();
|
||||
|
||||
// store all urls as http
|
||||
if (normalized.startsWith("https")) {
|
||||
normalized = "http" + normalized.substring(5);
|
||||
}
|
||||
|
||||
// remove the www. part
|
||||
normalized = normalized.replace("//www.", "//");
|
||||
|
||||
// feedproxy redirects to feedburner
|
||||
normalized = normalized.replace("feedproxy.google.com", "feeds.feedburner.com");
|
||||
|
||||
// feedburner feeds have a special treatment
|
||||
if (normalized.split(ESCAPED_QUESTION_MARK)[0].contains("feedburner.com")) {
|
||||
normalized = normalized.replace("feeds2.feedburner.com", "feeds.feedburner.com");
|
||||
normalized = normalized.split(ESCAPED_QUESTION_MARK)[0];
|
||||
normalized = StringUtils.removeEnd(normalized, "/");
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
}
|
||||
@@ -10,7 +10,7 @@ import org.jsoup.select.Elements;
|
||||
|
||||
import com.commafeed.backend.HttpGetter;
|
||||
import com.commafeed.backend.HttpGetter.HttpResult;
|
||||
import com.commafeed.backend.feed.FeedUtils;
|
||||
import com.commafeed.backend.Urls;
|
||||
import com.commafeed.backend.model.Feed;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
@@ -68,7 +68,7 @@ public class DefaultFaviconFetcher extends AbstractFaviconFetcher {
|
||||
String contentType = null;
|
||||
|
||||
try {
|
||||
url = FeedUtils.removeTrailingSlash(url) + "/favicon.ico";
|
||||
url = Urls.removeTrailingSlash(url) + "/favicon.ico";
|
||||
log.debug("getting root icon at {}", url);
|
||||
HttpResult result = getter.get(url);
|
||||
bytes = result.getContent();
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
package com.commafeed.backend.feed;
|
||||
|
||||
import java.net.URI;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hc.client5.http.utils.Base64;
|
||||
@@ -10,8 +8,6 @@ import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.netpreserve.urlcanon.Canonicalizer;
|
||||
import org.netpreserve.urlcanon.ParsedUrl;
|
||||
|
||||
import com.commafeed.backend.feed.FeedEntryKeyword.Mode;
|
||||
import com.commafeed.backend.feed.parser.TextDirectionDetector;
|
||||
@@ -29,8 +25,6 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@Slf4j
|
||||
public class FeedUtils {
|
||||
|
||||
private static final String ESCAPED_QUESTION_MARK = Pattern.quote("?");
|
||||
|
||||
public static String truncate(String string, int length) {
|
||||
if (string != null) {
|
||||
string = string.substring(0, Math.min(length, string.length()));
|
||||
@@ -38,59 +32,6 @@ public class FeedUtils {
|
||||
return string;
|
||||
}
|
||||
|
||||
public static boolean isHttp(String url) {
|
||||
return url.startsWith("http://");
|
||||
}
|
||||
|
||||
public static boolean isHttps(String url) {
|
||||
return url.startsWith("https://");
|
||||
}
|
||||
|
||||
public static boolean isAbsoluteUrl(String url) {
|
||||
return isHttp(url) || isHttps(url);
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize the url. The resulting url is not meant to be fetched but rather used as a mean to identify a feed and avoid duplicates
|
||||
*/
|
||||
public static String normalizeURL(String url) {
|
||||
if (url == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
ParsedUrl parsedUrl = ParsedUrl.parseUrl(url);
|
||||
Canonicalizer.AGGRESSIVE.canonicalize(parsedUrl);
|
||||
String normalized = parsedUrl.toString();
|
||||
if (normalized == null) {
|
||||
normalized = url;
|
||||
}
|
||||
|
||||
// convert to lower case, the url probably won't work in some cases
|
||||
// after that but we don't care we just want to compare urls to avoid
|
||||
// duplicates
|
||||
normalized = normalized.toLowerCase();
|
||||
|
||||
// store all urls as http
|
||||
if (normalized.startsWith("https")) {
|
||||
normalized = "http" + normalized.substring(5);
|
||||
}
|
||||
|
||||
// remove the www. part
|
||||
normalized = normalized.replace("//www.", "//");
|
||||
|
||||
// feedproxy redirects to feedburner
|
||||
normalized = normalized.replace("feedproxy.google.com", "feeds.feedburner.com");
|
||||
|
||||
// feedburner feeds have a special treatment
|
||||
if (normalized.split(ESCAPED_QUESTION_MARK)[0].contains("feedburner.com")) {
|
||||
normalized = normalized.replace("feeds2.feedburner.com", "feeds.feedburner.com");
|
||||
normalized = normalized.split(ESCAPED_QUESTION_MARK)[0];
|
||||
normalized = StringUtils.removeEnd(normalized, "/");
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
public static boolean isRTL(String title, String content) {
|
||||
String text = StringUtils.isNotBlank(content) ? content : title;
|
||||
if (StringUtils.isBlank(text)) {
|
||||
@@ -105,32 +46,6 @@ public class FeedUtils {
|
||||
return TextDirectionDetector.detect(stripped) == TextDirectionDetector.Direction.RIGHT_TO_LEFT;
|
||||
}
|
||||
|
||||
public static String removeTrailingSlash(String url) {
|
||||
if (url.endsWith("/")) {
|
||||
url = url.substring(0, url.length() - 1);
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param relativeUrl
|
||||
* the url of the entry
|
||||
* @param feedLink
|
||||
* the url of the feed as described in the feed
|
||||
* @param feedUrl
|
||||
* the url of the feed that we used to fetch the feed
|
||||
* @return an absolute url pointing to the entry
|
||||
*/
|
||||
public static String toAbsoluteUrl(String relativeUrl, String feedLink, String feedUrl) {
|
||||
String baseUrl = (feedLink != null && isAbsoluteUrl(feedLink)) ? feedLink : feedUrl;
|
||||
if (baseUrl == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return URI.create(baseUrl).resolve(relativeUrl).toString();
|
||||
}
|
||||
|
||||
public static String getFaviconUrl(FeedSubscription subscription) {
|
||||
return "rest/feed/favicon/" + subscription.getId();
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ import org.jdom2.Element;
|
||||
import org.jdom2.Namespace;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
import com.commafeed.backend.feed.FeedUtils;
|
||||
import com.commafeed.backend.Urls;
|
||||
import com.commafeed.backend.feed.parser.FeedParserResult.Content;
|
||||
import com.commafeed.backend.feed.parser.FeedParserResult.Enclosure;
|
||||
import com.commafeed.backend.feed.parser.FeedParserResult.Entry;
|
||||
@@ -119,7 +119,7 @@ public class FeedParser {
|
||||
}
|
||||
|
||||
String url = buildEntryUrl(feed, feedUrl, item);
|
||||
if (StringUtils.isBlank(url) && FeedUtils.isAbsoluteUrl(guid)) {
|
||||
if (StringUtils.isBlank(url) && Urls.isAbsolute(guid)) {
|
||||
// if link is empty but guid is used as url, use guid
|
||||
url = guid;
|
||||
}
|
||||
@@ -165,14 +165,14 @@ public class FeedParser {
|
||||
|
||||
private String buildEntryUrl(SyndFeed feed, String feedUrl, SyndEntry item) {
|
||||
String url = StringUtils.trimToNull(StringUtils.normalizeSpace(item.getLink()));
|
||||
if (url == null || FeedUtils.isAbsoluteUrl(url)) {
|
||||
if (url == null || Urls.isAbsolute(url)) {
|
||||
// url is absolute, nothing to do
|
||||
return url;
|
||||
}
|
||||
|
||||
// url is relative, trying to resolve it
|
||||
String feedLink = StringUtils.trimToNull(StringUtils.normalizeSpace(feed.getLink()));
|
||||
return FeedUtils.toAbsoluteUrl(url, feedLink, feedUrl);
|
||||
return Urls.toAbsolute(url, feedLink, feedUrl);
|
||||
}
|
||||
|
||||
private Instant toValidInstant(Date date, boolean nullToNow) {
|
||||
|
||||
@@ -8,6 +8,7 @@ import java.util.Objects;
|
||||
import jakarta.inject.Singleton;
|
||||
|
||||
import com.commafeed.backend.Digests;
|
||||
import com.commafeed.backend.Urls;
|
||||
import com.commafeed.backend.dao.FeedDAO;
|
||||
import com.commafeed.backend.favicon.AbstractFaviconFetcher;
|
||||
import com.commafeed.backend.favicon.Favicon;
|
||||
@@ -33,7 +34,7 @@ public class FeedService {
|
||||
}
|
||||
|
||||
public synchronized Feed findOrCreate(String url) {
|
||||
String normalizedUrl = FeedUtils.normalizeURL(url);
|
||||
String normalizedUrl = Urls.normalize(url);
|
||||
String normalizedUrlHash = Digests.sha1Hex(normalizedUrl);
|
||||
Feed feed = feedDAO.findByUrl(normalizedUrl, normalizedUrlHash);
|
||||
if (feed == null) {
|
||||
@@ -48,7 +49,7 @@ public class FeedService {
|
||||
}
|
||||
|
||||
public void update(Feed feed) {
|
||||
String normalized = FeedUtils.normalizeURL(feed.getUrl());
|
||||
String normalized = Urls.normalize(feed.getUrl());
|
||||
feed.setNormalizedUrl(normalized);
|
||||
feed.setNormalizedUrlHash(Digests.sha1Hex(normalized));
|
||||
feed.setLastUpdated(Instant.now());
|
||||
|
||||
@@ -8,6 +8,7 @@ import java.util.stream.Collectors;
|
||||
import jakarta.inject.Singleton;
|
||||
|
||||
import com.commafeed.CommaFeedConfiguration;
|
||||
import com.commafeed.backend.Urls;
|
||||
import com.commafeed.backend.dao.FeedEntryStatusDAO;
|
||||
import com.commafeed.backend.dao.FeedSubscriptionDAO;
|
||||
import com.commafeed.backend.feed.FeedRefreshEngine;
|
||||
@@ -67,7 +68,7 @@ public class FeedSubscriptionService {
|
||||
Feed feed = feedService.findOrCreate(url);
|
||||
|
||||
// upgrade feed to https if it was using http
|
||||
if (FeedUtils.isHttp(feed.getUrl()) && FeedUtils.isHttps(url)) {
|
||||
if (Urls.isHttp(feed.getUrl()) && Urls.isHttps(url)) {
|
||||
feed.setUrl(url);
|
||||
}
|
||||
|
||||
|
||||
@@ -32,10 +32,10 @@ import org.apache.hc.core5.net.URIBuilder;
|
||||
import com.commafeed.CommaFeedConfiguration;
|
||||
import com.commafeed.CommaFeedConstants;
|
||||
import com.commafeed.backend.Digests;
|
||||
import com.commafeed.backend.Urls;
|
||||
import com.commafeed.backend.dao.UserDAO;
|
||||
import com.commafeed.backend.dao.UserRoleDAO;
|
||||
import com.commafeed.backend.dao.UserSettingsDAO;
|
||||
import com.commafeed.backend.feed.FeedUtils;
|
||||
import com.commafeed.backend.model.User;
|
||||
import com.commafeed.backend.model.UserRole;
|
||||
import com.commafeed.backend.model.UserRole.Role;
|
||||
@@ -309,7 +309,7 @@ public class UserREST {
|
||||
}
|
||||
|
||||
private String buildEmailContent(User user) throws URISyntaxException, MalformedURLException {
|
||||
String publicUrl = FeedUtils.removeTrailingSlash(uri.getBaseUri().toString());
|
||||
String publicUrl = Urls.removeTrailingSlash(uri.getBaseUri().toString());
|
||||
publicUrl += "/rest/user/passwordResetCallback";
|
||||
return String.format(
|
||||
"You asked for password recovery for account '%s', <a href='%s'>follow this link</a> to change your password. Ignore this if you didn't request a password recovery.",
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
package com.commafeed.backend.feed;
|
||||
package com.commafeed.backend;
|
||||
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class FeedUtilsTest {
|
||||
class UrlsTest {
|
||||
|
||||
@Test
|
||||
void testNormalization() {
|
||||
@@ -25,19 +25,19 @@ class FeedUtilsTest {
|
||||
String urld1 = "http://fivefilters.org/content-only/makefulltextfeed.php?url=http://feeds.feedburner.com/Frandroid";
|
||||
String urld2 = "http://fivefilters.org/content-only/makefulltextfeed.php?url=http://feeds2.feedburner.com/Frandroid";
|
||||
|
||||
Assertions.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla2));
|
||||
Assertions.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla3));
|
||||
Assertions.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla4));
|
||||
Assertions.assertEquals(FeedUtils.normalizeURL(urla1), FeedUtils.normalizeURL(urla5));
|
||||
Assertions.assertEquals(Urls.normalize(urla1), Urls.normalize(urla2));
|
||||
Assertions.assertEquals(Urls.normalize(urla1), Urls.normalize(urla3));
|
||||
Assertions.assertEquals(Urls.normalize(urla1), Urls.normalize(urla4));
|
||||
Assertions.assertEquals(Urls.normalize(urla1), Urls.normalize(urla5));
|
||||
|
||||
Assertions.assertEquals(FeedUtils.normalizeURL(urlb1), FeedUtils.normalizeURL(urlb2));
|
||||
Assertions.assertEquals(Urls.normalize(urlb1), Urls.normalize(urlb2));
|
||||
|
||||
Assertions.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc2));
|
||||
Assertions.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc3));
|
||||
Assertions.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc4));
|
||||
Assertions.assertEquals(FeedUtils.normalizeURL(urlc1), FeedUtils.normalizeURL(urlc5));
|
||||
Assertions.assertEquals(Urls.normalize(urlc1), Urls.normalize(urlc2));
|
||||
Assertions.assertEquals(Urls.normalize(urlc1), Urls.normalize(urlc3));
|
||||
Assertions.assertEquals(Urls.normalize(urlc1), Urls.normalize(urlc4));
|
||||
Assertions.assertEquals(Urls.normalize(urlc1), Urls.normalize(urlc5));
|
||||
|
||||
Assertions.assertNotEquals(FeedUtils.normalizeURL(urld1), FeedUtils.normalizeURL(urld2));
|
||||
Assertions.assertNotEquals(Urls.normalize(urld1), Urls.normalize(urld2));
|
||||
|
||||
}
|
||||
|
||||
@@ -46,36 +46,36 @@ class FeedUtilsTest {
|
||||
String expected = "http://a.com/blog/entry/1";
|
||||
|
||||
// usual cases
|
||||
Assertions.assertEquals(expected, FeedUtils.toAbsoluteUrl("http://a.com/blog/entry/1", "http://a.com/feed/", "http://a.com/feed/"));
|
||||
Assertions.assertEquals(expected, FeedUtils.toAbsoluteUrl("http://a.com/blog/entry/1", "http://a.com/feed", "http://a.com/feed"));
|
||||
Assertions.assertEquals(expected, Urls.toAbsolute("http://a.com/blog/entry/1", "http://a.com/feed/", "http://a.com/feed/"));
|
||||
Assertions.assertEquals(expected, Urls.toAbsolute("http://a.com/blog/entry/1", "http://a.com/feed", "http://a.com/feed"));
|
||||
|
||||
// relative links
|
||||
Assertions.assertEquals(expected, FeedUtils.toAbsoluteUrl("../blog/entry/1", "http://a.com/feed/", "http://a.com/feed/"));
|
||||
Assertions.assertEquals(expected, FeedUtils.toAbsoluteUrl("../blog/entry/1", "feed.xml", "http://a.com/feed/feed.xml"));
|
||||
Assertions.assertEquals(expected, Urls.toAbsolute("../blog/entry/1", "http://a.com/feed/", "http://a.com/feed/"));
|
||||
Assertions.assertEquals(expected, Urls.toAbsolute("../blog/entry/1", "feed.xml", "http://a.com/feed/feed.xml"));
|
||||
|
||||
// root-relative links
|
||||
Assertions.assertEquals(expected, FeedUtils.toAbsoluteUrl("/blog/entry/1", "/feed", "http://a.com/feed"));
|
||||
Assertions.assertEquals(expected, Urls.toAbsolute("/blog/entry/1", "/feed", "http://a.com/feed"));
|
||||
|
||||
// real cases
|
||||
Assertions.assertEquals("https://github.com/erusev/parsedown/releases/tag/1.3.0", FeedUtils.toAbsoluteUrl(
|
||||
Assertions.assertEquals("https://github.com/erusev/parsedown/releases/tag/1.3.0", Urls.toAbsolute(
|
||||
"/erusev/parsedown/releases/tag/1.3.0", "/erusev/parsedown/releases", "https://github.com/erusev/parsedown/tags.atom"));
|
||||
Assertions.assertEquals("http://ergoemacs.org/emacs/elisp_all_about_lines.html",
|
||||
FeedUtils.toAbsoluteUrl("elisp_all_about_lines.html", "blog.xml", "http://ergoemacs.org/emacs/blog.xml"));
|
||||
Urls.toAbsolute("elisp_all_about_lines.html", "blog.xml", "http://ergoemacs.org/emacs/blog.xml"));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testRemoveTrailingSlash() {
|
||||
final String url = "http://localhost/";
|
||||
final String result = FeedUtils.removeTrailingSlash(url);
|
||||
final String result = Urls.removeTrailingSlash(url);
|
||||
Assertions.assertEquals("http://localhost", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testRemoveTrailingSlashLastSlashOnly() {
|
||||
final String url = "http://localhost//";
|
||||
final String result = FeedUtils.removeTrailingSlash(url);
|
||||
final String result = Urls.removeTrailingSlash(url);
|
||||
Assertions.assertEquals("http://localhost/", result);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user