diff --git a/commafeed-server/config.dev.yml b/commafeed-server/config.dev.yml index 2ae7c6ad..600490ea 100644 --- a/commafeed-server/config.dev.yml +++ b/commafeed-server/config.dev.yml @@ -73,6 +73,9 @@ app: # limit the number of feeds a user can subscribe to, 0 to disable maxFeedsPerUser: 0 + # don't parse feeds that are too large to prevent memory issues + maxFeedResponseSize: 5M + # cache service to use, possible values are 'noop' and 'redis' cache: noop diff --git a/commafeed-server/config.docker-warmup.yml b/commafeed-server/config.docker-warmup.yml index 8657c5eb..7a120526 100644 --- a/commafeed-server/config.docker-warmup.yml +++ b/commafeed-server/config.docker-warmup.yml @@ -73,6 +73,9 @@ app: # limit the number of feeds a user can subscribe to, 0 to disable maxFeedsPerUser: 0 + # don't parse feeds that are too large to prevent memory issues + maxFeedResponseSize: 5M + # cache service to use, possible values are 'noop' and 'redis' cache: noop diff --git a/commafeed-server/config.yml.example b/commafeed-server/config.yml.example index 96950ab3..8568b50d 100644 --- a/commafeed-server/config.yml.example +++ b/commafeed-server/config.yml.example @@ -73,6 +73,9 @@ app: # limit the number of feeds a user can subscribe to, 0 to disable maxFeedsPerUser: 0 + # don't parse feeds that are too large to prevent memory issues + maxFeedResponseSize: 5M + # cache service to use, possible values are 'noop' and 'redis' cache: noop diff --git a/commafeed-server/src/main/java/com/commafeed/CommaFeedConfiguration.java b/commafeed-server/src/main/java/com/commafeed/CommaFeedConfiguration.java index 2ecf54da..845c01cd 100644 --- a/commafeed-server/src/main/java/com/commafeed/CommaFeedConfiguration.java +++ b/commafeed-server/src/main/java/com/commafeed/CommaFeedConfiguration.java @@ -12,6 +12,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import io.dropwizard.core.Configuration; import io.dropwizard.db.DataSourceFactory; +import io.dropwizard.util.DataSize; import io.dropwizard.util.Duration; import jakarta.validation.Valid; import jakarta.validation.constraints.Min; @@ -154,6 +155,10 @@ public class CommaFeedConfiguration extends Configuration { @Valid private Integer maxFeedsPerUser = 0; + @NotNull + @Valid + private DataSize maxFeedResponseSize = DataSize.megabytes(5); + @NotNull @Min(0) @Valid diff --git a/commafeed-server/src/main/java/com/commafeed/backend/HttpGetter.java b/commafeed-server/src/main/java/com/commafeed/backend/HttpGetter.java index e253ab07..a2d64a95 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/HttpGetter.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/HttpGetter.java @@ -1,6 +1,7 @@ package com.commafeed.backend; import java.io.IOException; +import java.io.InputStream; import java.net.URI; import java.util.ArrayList; import java.util.List; @@ -20,7 +21,6 @@ import org.apache.hc.core5.http.ClassicHttpRequest; import org.apache.hc.core5.http.Header; import org.apache.hc.core5.http.HttpEntity; import org.apache.hc.core5.http.NameValuePair; -import org.apache.hc.core5.http.io.entity.EntityUtils; import org.apache.hc.core5.http.io.support.ClassicRequestBuilder; import org.apache.hc.core5.http.message.BasicHeader; import org.apache.hc.core5.util.TimeValue; @@ -29,8 +29,10 @@ import org.eclipse.jetty.http.HttpStatus; import com.commafeed.CommaFeedConfiguration; import com.google.common.collect.Iterables; +import com.google.common.io.ByteStreams; import com.google.common.net.HttpHeaders; +import io.dropwizard.util.DataSize; import jakarta.inject.Inject; import jakarta.inject.Singleton; import lombok.Getter; @@ -41,19 +43,20 @@ import nl.altindag.ssl.apache5.util.Apache5SslUtils; /** * Smart HTTP getter: handles gzip, ssl, last modified and etag headers - * */ @Singleton @Slf4j public class HttpGetter { private final CloseableHttpClient client; + private final DataSize maxResponseSize; @Inject public HttpGetter(CommaFeedConfiguration config) { String userAgent = Optional.ofNullable(config.getApplicationSettings().getUserAgent()) .orElseGet(() -> String.format("CommaFeed/%s (https://github.com/Athou/commafeed)", config.getVersion())); this.client = newClient(userAgent, config.getApplicationSettings().getBackgroundThreads()); + this.maxResponseSize = config.getApplicationSettings().getMaxFeedResponseSize(); } public HttpResult getBinary(String url, int timeout) throws IOException, NotModifiedException { @@ -61,7 +64,6 @@ public class HttpGetter { } /** - * * @param url * the url to retrive * @param lastModified @@ -87,6 +89,7 @@ public class HttpGetter { context.setRequestConfig(RequestConfig.custom().setResponseTimeout(timeout, TimeUnit.MILLISECONDS).build()); HttpResponse response = client.execute(request, context, resp -> { + byte[] content = resp.getEntity() == null ? null : toByteArray(resp.getEntity(), maxResponseSize.toBytes()); int code = resp.getCode(); String lastModifiedHeader = Optional.ofNullable(resp.getFirstHeader(HttpHeaders.LAST_MODIFIED)) .map(NameValuePair::getValue) @@ -97,7 +100,6 @@ public class HttpGetter { .map(StringUtils::trimToNull) .orElse(null); - byte[] content = resp.getEntity() == null ? null : EntityUtils.toByteArray(resp.getEntity()); String contentType = Optional.ofNullable(resp.getEntity()).map(HttpEntity::getContentType).orElse(null); String urlAfterRedirect = Optional.ofNullable(context.getRedirectLocations()) .map(RedirectLocations::getAll) @@ -130,6 +132,25 @@ public class HttpGetter { response.getUrlAfterRedirect()); } + private static byte[] toByteArray(HttpEntity entity, long maxBytes) throws IOException { + if (entity.getContentLength() > maxBytes) { + throw new IOException( + "Response size (%s bytes) exceeds the maximum allowed size (%s bytes)".formatted(entity.getContentLength(), maxBytes)); + } + + try (InputStream input = entity.getContent()) { + if (input == null) { + return null; + } + + byte[] bytes = ByteStreams.limit(input, maxBytes).readAllBytes(); + if (bytes.length == maxBytes) { + throw new IOException("Response size exceeds the maximum allowed size (%s bytes)".formatted(maxBytes)); + } + return bytes; + } + } + private static CloseableHttpClient newClient(String userAgent, int poolSize) { SSLFactory sslFactory = SSLFactory.builder().withUnsafeTrustMaterial().withUnsafeHostnameVerifier().build(); diff --git a/commafeed-server/src/test/java/com/commafeed/backend/HttpGetterTest.java b/commafeed-server/src/test/java/com/commafeed/backend/HttpGetterTest.java index 964275fc..8b0655cb 100644 --- a/commafeed-server/src/test/java/com/commafeed/backend/HttpGetterTest.java +++ b/commafeed-server/src/test/java/com/commafeed/backend/HttpGetterTest.java @@ -2,6 +2,7 @@ package com.commafeed.backend; import java.io.IOException; import java.net.SocketTimeoutException; +import java.util.Arrays; import java.util.Objects; import java.util.concurrent.atomic.AtomicInteger; @@ -28,6 +29,8 @@ import com.commafeed.backend.HttpGetter.HttpResult; import com.commafeed.backend.HttpGetter.NotModifiedException; import com.google.common.net.HttpHeaders; +import io.dropwizard.util.DataSize; + @ExtendWith(MockServerExtension.class) class HttpGetterTest { @@ -48,6 +51,7 @@ class HttpGetterTest { ApplicationSettings settings = new ApplicationSettings(); settings.setUserAgent("http-getter-test"); settings.setBackgroundThreads(3); + settings.setMaxFeedResponseSize(DataSize.kilobytes(1)); CommaFeedConfiguration config = new CommaFeedConfiguration(); config.setApplicationSettings(settings); @@ -169,4 +173,14 @@ class HttpGetterTest { Assertions.assertEquals(2, calls.get()); } + @Test + void largeFeedWithContentLengthHeader() { + byte[] bytes = new byte[(int) DataSize.kilobytes(10).toBytes()]; + Arrays.fill(bytes, (byte) 1); + this.mockServerClient.when(HttpRequest.request().withMethod("GET")).respond(HttpResponse.response().withBody(bytes)); + + IOException e = Assertions.assertThrows(IOException.class, () -> getter.getBinary(this.feedUrl, TIMEOUT)); + Assertions.assertEquals("Response size (10000 bytes) exceeds the maximum allowed size (1000 bytes)", e.getMessage()); + } + } \ No newline at end of file diff --git a/commafeed-server/src/test/resources/config.test.yml b/commafeed-server/src/test/resources/config.test.yml index 0049f8ba..209f3efc 100644 --- a/commafeed-server/src/test/resources/config.test.yml +++ b/commafeed-server/src/test/resources/config.test.yml @@ -72,6 +72,9 @@ app: # limit the number of feeds a user can subscribe to, 0 to disable maxFeedsPerUser: 0 + + # don't parse feeds that are too large to prevent memory issues + maxFeedResponseSize: 5M # cache service to use, possible values are 'noop' and 'redis' cache: noop