don't parse feeds that are too large to prevent memory issues

This commit is contained in:
Athou
2024-07-16 20:34:18 +02:00
parent b3545b60ea
commit b17a17ba10
7 changed files with 56 additions and 4 deletions

View File

@@ -73,6 +73,9 @@ app:
# limit the number of feeds a user can subscribe to, 0 to disable
maxFeedsPerUser: 0
# don't parse feeds that are too large to prevent memory issues
maxFeedResponseSize: 5M
# cache service to use, possible values are 'noop' and 'redis'
cache: noop

View File

@@ -73,6 +73,9 @@ app:
# limit the number of feeds a user can subscribe to, 0 to disable
maxFeedsPerUser: 0
# don't parse feeds that are too large to prevent memory issues
maxFeedResponseSize: 5M
# cache service to use, possible values are 'noop' and 'redis'
cache: noop

View File

@@ -73,6 +73,9 @@ app:
# limit the number of feeds a user can subscribe to, 0 to disable
maxFeedsPerUser: 0
# don't parse feeds that are too large to prevent memory issues
maxFeedResponseSize: 5M
# cache service to use, possible values are 'noop' and 'redis'
cache: noop

View File

@@ -12,6 +12,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import io.dropwizard.core.Configuration;
import io.dropwizard.db.DataSourceFactory;
import io.dropwizard.util.DataSize;
import io.dropwizard.util.Duration;
import jakarta.validation.Valid;
import jakarta.validation.constraints.Min;
@@ -154,6 +155,10 @@ public class CommaFeedConfiguration extends Configuration {
@Valid
private Integer maxFeedsPerUser = 0;
@NotNull
@Valid
private DataSize maxFeedResponseSize = DataSize.megabytes(5);
@NotNull
@Min(0)
@Valid

View File

@@ -1,6 +1,7 @@
package com.commafeed.backend;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
@@ -20,7 +21,6 @@ import org.apache.hc.core5.http.ClassicHttpRequest;
import org.apache.hc.core5.http.Header;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.NameValuePair;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.support.ClassicRequestBuilder;
import org.apache.hc.core5.http.message.BasicHeader;
import org.apache.hc.core5.util.TimeValue;
@@ -29,8 +29,10 @@ import org.eclipse.jetty.http.HttpStatus;
import com.commafeed.CommaFeedConfiguration;
import com.google.common.collect.Iterables;
import com.google.common.io.ByteStreams;
import com.google.common.net.HttpHeaders;
import io.dropwizard.util.DataSize;
import jakarta.inject.Inject;
import jakarta.inject.Singleton;
import lombok.Getter;
@@ -41,19 +43,20 @@ import nl.altindag.ssl.apache5.util.Apache5SslUtils;
/**
* Smart HTTP getter: handles gzip, ssl, last modified and etag headers
*
*/
@Singleton
@Slf4j
public class HttpGetter {
private final CloseableHttpClient client;
private final DataSize maxResponseSize;
@Inject
public HttpGetter(CommaFeedConfiguration config) {
String userAgent = Optional.ofNullable(config.getApplicationSettings().getUserAgent())
.orElseGet(() -> String.format("CommaFeed/%s (https://github.com/Athou/commafeed)", config.getVersion()));
this.client = newClient(userAgent, config.getApplicationSettings().getBackgroundThreads());
this.maxResponseSize = config.getApplicationSettings().getMaxFeedResponseSize();
}
public HttpResult getBinary(String url, int timeout) throws IOException, NotModifiedException {
@@ -61,7 +64,6 @@ public class HttpGetter {
}
/**
*
* @param url
* the url to retrive
* @param lastModified
@@ -87,6 +89,7 @@ public class HttpGetter {
context.setRequestConfig(RequestConfig.custom().setResponseTimeout(timeout, TimeUnit.MILLISECONDS).build());
HttpResponse response = client.execute(request, context, resp -> {
byte[] content = resp.getEntity() == null ? null : toByteArray(resp.getEntity(), maxResponseSize.toBytes());
int code = resp.getCode();
String lastModifiedHeader = Optional.ofNullable(resp.getFirstHeader(HttpHeaders.LAST_MODIFIED))
.map(NameValuePair::getValue)
@@ -97,7 +100,6 @@ public class HttpGetter {
.map(StringUtils::trimToNull)
.orElse(null);
byte[] content = resp.getEntity() == null ? null : EntityUtils.toByteArray(resp.getEntity());
String contentType = Optional.ofNullable(resp.getEntity()).map(HttpEntity::getContentType).orElse(null);
String urlAfterRedirect = Optional.ofNullable(context.getRedirectLocations())
.map(RedirectLocations::getAll)
@@ -130,6 +132,25 @@ public class HttpGetter {
response.getUrlAfterRedirect());
}
private static byte[] toByteArray(HttpEntity entity, long maxBytes) throws IOException {
if (entity.getContentLength() > maxBytes) {
throw new IOException(
"Response size (%s bytes) exceeds the maximum allowed size (%s bytes)".formatted(entity.getContentLength(), maxBytes));
}
try (InputStream input = entity.getContent()) {
if (input == null) {
return null;
}
byte[] bytes = ByteStreams.limit(input, maxBytes).readAllBytes();
if (bytes.length == maxBytes) {
throw new IOException("Response size exceeds the maximum allowed size (%s bytes)".formatted(maxBytes));
}
return bytes;
}
}
private static CloseableHttpClient newClient(String userAgent, int poolSize) {
SSLFactory sslFactory = SSLFactory.builder().withUnsafeTrustMaterial().withUnsafeHostnameVerifier().build();

View File

@@ -2,6 +2,7 @@ package com.commafeed.backend;
import java.io.IOException;
import java.net.SocketTimeoutException;
import java.util.Arrays;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicInteger;
@@ -28,6 +29,8 @@ import com.commafeed.backend.HttpGetter.HttpResult;
import com.commafeed.backend.HttpGetter.NotModifiedException;
import com.google.common.net.HttpHeaders;
import io.dropwizard.util.DataSize;
@ExtendWith(MockServerExtension.class)
class HttpGetterTest {
@@ -48,6 +51,7 @@ class HttpGetterTest {
ApplicationSettings settings = new ApplicationSettings();
settings.setUserAgent("http-getter-test");
settings.setBackgroundThreads(3);
settings.setMaxFeedResponseSize(DataSize.kilobytes(1));
CommaFeedConfiguration config = new CommaFeedConfiguration();
config.setApplicationSettings(settings);
@@ -169,4 +173,14 @@ class HttpGetterTest {
Assertions.assertEquals(2, calls.get());
}
@Test
void largeFeedWithContentLengthHeader() {
byte[] bytes = new byte[(int) DataSize.kilobytes(10).toBytes()];
Arrays.fill(bytes, (byte) 1);
this.mockServerClient.when(HttpRequest.request().withMethod("GET")).respond(HttpResponse.response().withBody(bytes));
IOException e = Assertions.assertThrows(IOException.class, () -> getter.getBinary(this.feedUrl, TIMEOUT));
Assertions.assertEquals("Response size (10000 bytes) exceeds the maximum allowed size (1000 bytes)", e.getMessage());
}
}

View File

@@ -72,6 +72,9 @@ app:
# limit the number of feeds a user can subscribe to, 0 to disable
maxFeedsPerUser: 0
# don't parse feeds that are too large to prevent memory issues
maxFeedResponseSize: 5M
# cache service to use, possible values are 'noop' and 'redis'
cache: noop