From 9c058cf6d6ed6db2350a4034c62a49da290136cc Mon Sep 17 00:00:00 2001 From: Athou Date: Mon, 17 Nov 2025 06:39:33 +0100 Subject: [PATCH] disable xml entity expansion limits enabled in JDK24+ (#1961) --- .../backend/feed/parser/EncodingDetector.java | 2 +- .../backend/feed/parser/FeedCleaner.java | 2 +- .../commafeed/backend/feed/parser/FeedParser.java | 15 ++++++++++++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/EncodingDetector.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/EncodingDetector.java index 721ae4f0..c6b46d28 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/EncodingDetector.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/EncodingDetector.java @@ -11,7 +11,7 @@ import com.ibm.icu.text.CharsetDetector; import com.ibm.icu.text.CharsetMatch; @Singleton -class EncodingDetector { +public class EncodingDetector { /** * Detect feed encoding by using the declared encoding in the xml processing instruction and by detecting the characters used in the diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedCleaner.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedCleaner.java index 23236f92..97aabb4a 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedCleaner.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedCleaner.java @@ -11,7 +11,7 @@ import org.apache.commons.lang3.StringUtils; import org.jdom2.Verifier; @Singleton -class FeedCleaner { +public class FeedCleaner { private static final Pattern DOCTYPE_PATTERN = Pattern.compile("]*>", Pattern.CASE_INSENSITIVE); diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedParser.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedParser.java index f61c1557..f6d48f0e 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedParser.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedParser.java @@ -14,6 +14,7 @@ import jakarta.inject.Singleton; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.SystemProperties; import org.apache.commons.math3.stat.descriptive.SummaryStatistics; import org.jdom2.Element; import org.jdom2.Namespace; @@ -38,12 +39,9 @@ import com.rometools.rome.feed.synd.SyndLink; import com.rometools.rome.feed.synd.SyndLinkImpl; import com.rometools.rome.io.SyndFeedInput; -import lombok.RequiredArgsConstructor; - /** * Parses raw xml into a FeedParserResult object */ -@RequiredArgsConstructor @Singleton public class FeedParser { @@ -55,6 +53,17 @@ public class FeedParser { private final EncodingDetector encodingDetector; private final FeedCleaner feedCleaner; + public FeedParser(EncodingDetector encodingDetector, FeedCleaner feedCleaner) { + this.encodingDetector = encodingDetector; + this.feedCleaner = feedCleaner; + + // disable entity expansion limits added in JDK24+ (#1961) + // we already strip doctype declarations in FeedCleaner to prevent xxe attacks + // we also already limit the size of feeds we download in HttpGetter + System.setProperty(SystemProperties.JDK_XML_MAX_GENERAL_ENTITY_SIZE_LIMIT, "0"); + System.setProperty(SystemProperties.JDK_XML_TOTAL_ENTITY_SIZE_LIMIT, "0"); + } + public FeedParserResult parse(String feedUrl, byte[] xml) throws FeedParsingException { try { Charset encoding = encodingDetector.getEncoding(xml);