From d9ccdf1caf427358e79dbd96cafe55a01bbb96d8 Mon Sep 17 00:00:00 2001 From: Athou Date: Mon, 18 Dec 2023 10:46:30 +0100 Subject: [PATCH] use java standard http client because apache http clients should be reused because they support pooling but we don't need that --- commafeed-server/pom.xml | 12 -- .../backend/ContentEncodingInterceptor.java | 60 ------ .../com/commafeed/backend/HttpGetter.java | 200 +++++++----------- .../commafeed/backend/feed/FeedFetcher.java | 2 +- .../com/commafeed/backend/HttpGetterTest.java | 42 ++-- .../backend/feed/FeedFetcherTest.java | 4 +- 6 files changed, 101 insertions(+), 219 deletions(-) delete mode 100644 commafeed-server/src/main/java/com/commafeed/backend/ContentEncodingInterceptor.java diff --git a/commafeed-server/pom.xml b/commafeed-server/pom.xml index 98a85a7c..5e2c4ca0 100644 --- a/commafeed-server/pom.xml +++ b/commafeed-server/pom.xml @@ -279,18 +279,6 @@ 1.1.1 - - org.apache.httpcomponents - httpclient - - - commons-logging - commons-logging - - - 4.5.14 - - io.swagger.core.v3 swagger-annotations diff --git a/commafeed-server/src/main/java/com/commafeed/backend/ContentEncodingInterceptor.java b/commafeed-server/src/main/java/com/commafeed/backend/ContentEncodingInterceptor.java deleted file mode 100644 index 09a6f4b5..00000000 --- a/commafeed-server/src/main/java/com/commafeed/backend/ContentEncodingInterceptor.java +++ /dev/null @@ -1,60 +0,0 @@ -package com.commafeed.backend; - -import java.io.IOException; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Locale; -import java.util.Set; - -import org.apache.http.Header; -import org.apache.http.HeaderElement; -import org.apache.http.HttpEntity; -import org.apache.http.HttpException; -import org.apache.http.HttpResponse; -import org.apache.http.HttpResponseInterceptor; -import org.apache.http.entity.HttpEntityWrapper; -import org.apache.http.protocol.HttpContext; - -class ContentEncodingInterceptor implements HttpResponseInterceptor { - - private static final Set ALLOWED_CONTENT_ENCODINGS = new HashSet<>(Arrays.asList("gzip", "x-gzip", "deflate", "identity")); - - @Override - public void process(HttpResponse response, HttpContext context) throws HttpException, IOException { - if (hasContent(response)) { - Header contentEncodingHeader = response.getEntity().getContentEncoding(); - if (contentEncodingHeader != null && containsUnsupportedEncodings(contentEncodingHeader)) { - overrideContentEncoding(response); - } - } - } - - private boolean containsUnsupportedEncodings(Header contentEncodingHeader) { - HeaderElement[] codecs = contentEncodingHeader.getElements(); - - for (final HeaderElement codec : codecs) { - String codecName = codec.getName().toLowerCase(Locale.US); - if (!ALLOWED_CONTENT_ENCODINGS.contains(codecName)) { - return true; - } - } - - return false; - } - - private void overrideContentEncoding(HttpResponse response) { - HttpEntity wrapped = new HttpEntityWrapper(response.getEntity()) { - @Override - public Header getContentEncoding() { - return null; - } - }; - - response.setEntity(wrapped); - } - - private boolean hasContent(HttpResponse response) { - return response.getEntity() != null && response.getEntity().getContentLength() != 0; - } - -} diff --git a/commafeed-server/src/main/java/com/commafeed/backend/HttpGetter.java b/commafeed-server/src/main/java/com/commafeed/backend/HttpGetter.java index 77fa366e..ca3977ae 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/HttpGetter.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/HttpGetter.java @@ -1,30 +1,20 @@ package com.commafeed.backend; import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpClient.Redirect; +import java.net.http.HttpClient.Version; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.time.Duration; +import java.util.Optional; -import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.http.Consts; -import org.apache.http.Header; -import org.apache.http.HttpEntity; -import org.apache.http.HttpHeaders; -import org.apache.http.HttpHost; -import org.apache.http.HttpResponseInterceptor; -import org.apache.http.HttpStatus; -import org.apache.http.client.HttpResponseException; -import org.apache.http.client.config.CookieSpecs; -import org.apache.http.client.config.RequestConfig; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpUriRequest; -import org.apache.http.client.protocol.HttpClientContext; -import org.apache.http.config.ConnectionConfig; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClientBuilder; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.util.EntityUtils; +import org.eclipse.jetty.http.HttpStatus; import com.commafeed.CommaFeedConfiguration; +import com.google.common.net.HttpHeaders; import jakarta.inject.Inject; import jakarta.inject.Singleton; @@ -39,25 +29,17 @@ import nl.altindag.ssl.SSLFactory; @Singleton public class HttpGetter { - private static final String ACCEPT_LANGUAGE = "en"; - private static final String PRAGMA_NO_CACHE = "No-cache"; - private static final String CACHE_CONTROL_NO_CACHE = "no-cache"; - - private static final HttpResponseInterceptor REMOVE_INCORRECT_CONTENT_ENCODING = new ContentEncodingInterceptor(); - - private static final SSLFactory SSL_FACTORY = SSLFactory.builder().withUnsafeTrustMaterial().withUnsafeHostnameVerifier().build(); - - private String userAgent; + private final HttpClient client; + private final String userAgent; @Inject public HttpGetter(CommaFeedConfiguration config) { - this.userAgent = config.getApplicationSettings().getUserAgent(); - if (this.userAgent == null) { - this.userAgent = String.format("CommaFeed/%s (https://github.com/Athou/commafeed)", config.getVersion()); - } + this.client = newClient(); + this.userAgent = Optional.ofNullable(config.getApplicationSettings().getUserAgent()) + .orElseGet(() -> String.format("CommaFeed/%s (https://github.com/Athou/commafeed)", config.getVersion())); } - public HttpResult getBinary(String url, int timeout) throws IOException, NotModifiedException { + public HttpResult getBinary(String url, int timeout) throws IOException, NotModifiedException, InterruptedException { return getBinary(url, null, null, timeout); } @@ -72,101 +54,59 @@ public class HttpGetter { * @throws NotModifiedException * if the url hasn't changed since we asked for it last time */ - public HttpResult getBinary(String url, String lastModified, String eTag, int timeout) throws IOException, NotModifiedException { - HttpResult result = null; + public HttpResult getBinary(String url, String lastModified, String eTag, int timeout) + throws IOException, NotModifiedException, InterruptedException { long start = System.currentTimeMillis(); - CloseableHttpClient client = newClient(timeout); - CloseableHttpResponse response = null; - try { - HttpGet httpget = new HttpGet(url); - HttpClientContext context = HttpClientContext.create(); - - httpget.addHeader(HttpHeaders.ACCEPT_LANGUAGE, ACCEPT_LANGUAGE); - httpget.addHeader(HttpHeaders.PRAGMA, PRAGMA_NO_CACHE); - httpget.addHeader(HttpHeaders.CACHE_CONTROL, CACHE_CONTROL_NO_CACHE); - httpget.addHeader(HttpHeaders.USER_AGENT, userAgent); - - if (lastModified != null) { - httpget.addHeader(HttpHeaders.IF_MODIFIED_SINCE, lastModified); - } - if (eTag != null) { - httpget.addHeader(HttpHeaders.IF_NONE_MATCH, eTag); - } - - try { - response = client.execute(httpget, context); - int code = response.getStatusLine().getStatusCode(); - if (code == HttpStatus.SC_NOT_MODIFIED) { - throw new NotModifiedException("'304 - not modified' http code received"); - } else if (code >= 300) { - throw new HttpResponseException(code, "Server returned HTTP error code " + code); - } - - } catch (HttpResponseException e) { - if (e.getStatusCode() == HttpStatus.SC_NOT_MODIFIED) { - throw new NotModifiedException("'304 - not modified' http code received"); - } else { - throw e; - } - } - Header lastModifiedHeader = response.getFirstHeader(HttpHeaders.LAST_MODIFIED); - String lastModifiedHeaderValue = lastModifiedHeader == null ? null : StringUtils.trimToNull(lastModifiedHeader.getValue()); - if (lastModifiedHeaderValue != null && StringUtils.equals(lastModified, lastModifiedHeaderValue)) { - throw new NotModifiedException("lastModifiedHeader is the same"); - } - - Header eTagHeader = response.getFirstHeader(HttpHeaders.ETAG); - String eTagHeaderValue = eTagHeader == null ? null : StringUtils.trimToNull(eTagHeader.getValue()); - if (eTag != null && StringUtils.equals(eTag, eTagHeaderValue)) { - throw new NotModifiedException("eTagHeader is the same"); - } - - HttpEntity entity = response.getEntity(); - byte[] content = null; - String contentType = null; - if (entity != null) { - content = EntityUtils.toByteArray(entity); - if (entity.getContentType() != null) { - contentType = entity.getContentType().getValue(); - } - } - - String urlAfterRedirect = url; - if (context.getRequest() instanceof HttpUriRequest) { - HttpUriRequest req = (HttpUriRequest) context.getRequest(); - HttpHost host = context.getTargetHost(); - urlAfterRedirect = req.getURI().isAbsolute() ? req.getURI().toString() : host.toURI() + req.getURI(); - } - - long duration = System.currentTimeMillis() - start; - result = new HttpResult(content, contentType, lastModifiedHeaderValue, eTagHeaderValue, duration, urlAfterRedirect); - } finally { - IOUtils.closeQuietly(response); - IOUtils.closeQuietly(client); + HttpRequest.Builder builder = HttpRequest.newBuilder() + .uri(URI.create(url)) + .timeout(Duration.ofMillis(timeout)) + .header(HttpHeaders.ACCEPT_LANGUAGE, "en") + .header(HttpHeaders.PRAGMA, "No-cache") + .header(HttpHeaders.CACHE_CONTROL, "no-cache") + .header(HttpHeaders.USER_AGENT, userAgent); + if (lastModified != null) { + builder.header(HttpHeaders.IF_MODIFIED_SINCE, lastModified); } - return result; + if (eTag != null) { + builder.header(HttpHeaders.IF_NONE_MATCH, eTag); + } + HttpRequest request = builder.GET().build(); + + HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofByteArray()); + int code = response.statusCode(); + if (code == HttpStatus.NOT_MODIFIED_304) { + throw new NotModifiedException("'304 - not modified' http code received"); + } else if (code >= 300) { + throw new HttpResponseException(code, "Server returned HTTP error code " + code); + } + + String lastModifiedHeader = response.headers().firstValue(HttpHeaders.LAST_MODIFIED).map(StringUtils::trimToNull).orElse(null); + if (lastModifiedHeader != null && lastModifiedHeader.equals(lastModified)) { + throw new NotModifiedException("lastModifiedHeader is the same"); + } + + String eTagHeader = response.headers().firstValue(HttpHeaders.ETAG).map(StringUtils::trimToNull).orElse(null); + if (eTagHeader != null && eTagHeader.equals(eTag)) { + throw new NotModifiedException("eTagHeader is the same"); + } + + byte[] content = response.body(); + String contentType = response.headers().firstValue(HttpHeaders.CONTENT_TYPE).orElse(null); + String urlAfterRedirect = response.request().uri().toString(); + + long duration = System.currentTimeMillis() - start; + return new HttpResult(content, contentType, lastModifiedHeader, eTagHeader, duration, urlAfterRedirect); } - public static CloseableHttpClient newClient(int timeout) { - HttpClientBuilder builder = HttpClients.custom(); - builder.useSystemProperties(); - builder.addInterceptorFirst(REMOVE_INCORRECT_CONTENT_ENCODING); - builder.disableAutomaticRetries(); - - builder.setSSLContext(SSL_FACTORY.getSslContext()); - builder.setSSLHostnameVerifier(SSL_FACTORY.getHostnameVerifier()); - - RequestConfig.Builder configBuilder = RequestConfig.custom(); - configBuilder.setCookieSpec(CookieSpecs.IGNORE_COOKIES); - configBuilder.setSocketTimeout(timeout); - configBuilder.setConnectTimeout(timeout); - configBuilder.setConnectionRequestTimeout(timeout); - builder.setDefaultRequestConfig(configBuilder.build()); - - builder.setDefaultConnectionConfig(ConnectionConfig.custom().setCharset(Consts.ISO_8859_1).build()); - - return builder.build(); + public static HttpClient newClient() { + SSLFactory sslFactory = SSLFactory.builder().withUnsafeTrustMaterial().withUnsafeHostnameVerifier().build(); + return HttpClient.newBuilder() + .version(Version.HTTP_1_1) + .followRedirects(Redirect.ALWAYS) + .sslContext(sslFactory.getSslContext()) + .sslParameters(sslFactory.getSslParameters()) + .build(); } @Getter @@ -192,6 +132,18 @@ public class HttpGetter { this.newLastModifiedHeader = newLastModifiedHeader; this.newEtagHeader = newEtagHeader; } + } + + @Getter + public static class HttpResponseException extends IOException { + private static final long serialVersionUID = 1L; + + private final int code; + + public HttpResponseException(int code, String message) { + super(message); + this.code = code; + } } diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedFetcher.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedFetcher.java index 2955d09e..1122380e 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedFetcher.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedFetcher.java @@ -36,7 +36,7 @@ public class FeedFetcher { private final Set urlProviders; public FeedFetcherResult fetch(String feedUrl, boolean extractFeedUrlFromHtml, String lastModified, String eTag, Date lastPublishedDate, - String lastContentHash) throws FeedException, IOException, NotModifiedException { + String lastContentHash) throws FeedException, IOException, NotModifiedException, InterruptedException { log.debug("Fetching feed {}", feedUrl); int timeout = 20000; diff --git a/commafeed-server/src/test/java/com/commafeed/backend/HttpGetterTest.java b/commafeed-server/src/test/java/com/commafeed/backend/HttpGetterTest.java index c2932a1e..140d57a5 100644 --- a/commafeed-server/src/test/java/com/commafeed/backend/HttpGetterTest.java +++ b/commafeed-server/src/test/java/com/commafeed/backend/HttpGetterTest.java @@ -1,12 +1,10 @@ package com.commafeed.backend; import java.io.IOException; -import java.net.SocketTimeoutException; +import java.net.http.HttpTimeoutException; import java.util.Objects; import org.apache.commons.io.IOUtils; -import org.apache.http.HttpHeaders; -import org.apache.http.client.HttpResponseException; import org.eclipse.jetty.http.HttpStatus; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; @@ -23,12 +21,16 @@ import org.mockserver.model.MediaType; import com.commafeed.CommaFeedConfiguration; import com.commafeed.CommaFeedConfiguration.ApplicationSettings; +import com.commafeed.backend.HttpGetter.HttpResponseException; import com.commafeed.backend.HttpGetter.HttpResult; import com.commafeed.backend.HttpGetter.NotModifiedException; +import com.google.common.net.HttpHeaders; @ExtendWith(MockServerExtension.class) class HttpGetterTest { + private static final int TIMEOUT = 10000; + private MockServerClient mockServerClient; private String feedUrl; private byte[] feedContent; @@ -43,6 +45,7 @@ class HttpGetterTest { ApplicationSettings settings = new ApplicationSettings(); settings.setUserAgent("http-getter-test"); + settings.setBackgroundThreads(3); CommaFeedConfiguration config = new CommaFeedConfiguration(); config.setApplicationSettings(settings); @@ -57,12 +60,12 @@ class HttpGetterTest { void errorCodes(int code) { this.mockServerClient.when(HttpRequest.request().withMethod("GET")).respond(HttpResponse.response().withStatusCode(code)); - HttpResponseException e = Assertions.assertThrows(HttpResponseException.class, () -> getter.getBinary(this.feedUrl, 1000)); - Assertions.assertEquals(code, e.getStatusCode()); + HttpResponseException e = Assertions.assertThrows(HttpResponseException.class, () -> getter.getBinary(this.feedUrl, TIMEOUT)); + Assertions.assertEquals(code, e.getCode()); } @Test - void validFeed() throws IOException, NotModifiedException { + void validFeed() throws Exception { this.mockServerClient.when(HttpRequest.request().withMethod("GET")) .respond(HttpResponse.response() .withBody(feedContent) @@ -70,7 +73,7 @@ class HttpGetterTest { .withHeader(HttpHeaders.LAST_MODIFIED, "123456") .withHeader(HttpHeaders.ETAG, "78910")); - HttpResult result = getter.getBinary(this.feedUrl, 1000); + HttpResult result = getter.getBinary(this.feedUrl, TIMEOUT); Assertions.assertArrayEquals(feedContent, result.getContent()); Assertions.assertEquals(MediaType.APPLICATION_ATOM_XML.toString(), result.getContentType()); Assertions.assertEquals("123456", result.getLastModifiedSince()); @@ -80,7 +83,7 @@ class HttpGetterTest { } @Test - void followRedirects() throws IOException, NotModifiedException { + void followRedirects() throws Exception { this.mockServerClient.when(HttpRequest.request().withMethod("GET").withPath("/redirected")) .respond(HttpResponse.response().withBody(feedContent).withContentType(MediaType.APPLICATION_ATOM_XML)); this.mockServerClient.when(HttpRequest.request().withMethod("GET")) @@ -88,49 +91,50 @@ class HttpGetterTest { .withStatusCode(HttpStatus.MOVED_PERMANENTLY_301) .withHeader(HttpHeaders.LOCATION, "http://localhost:" + this.mockServerClient.getPort() + "/redirected")); - HttpResult result = getter.getBinary(this.feedUrl, 1000); + HttpResult result = getter.getBinary(this.feedUrl, TIMEOUT); Assertions.assertEquals("http://localhost:" + this.mockServerClient.getPort() + "/redirected", result.getUrlAfterRedirect()); } @Test void timeout() { + int smallTimeout = 500; this.mockServerClient.when(HttpRequest.request().withMethod("GET")) - .respond(HttpResponse.response().withDelay(Delay.milliseconds(2000))); + .respond(HttpResponse.response().withDelay(Delay.milliseconds(smallTimeout * 2))); - Assertions.assertThrows(SocketTimeoutException.class, () -> getter.getBinary(this.feedUrl, 1000)); + Assertions.assertThrows(HttpTimeoutException.class, () -> getter.getBinary(this.feedUrl, smallTimeout)); } @Test - void userAgent() throws IOException, NotModifiedException { + void userAgent() throws Exception { this.mockServerClient.when(HttpRequest.request().withMethod("GET").withHeader(HttpHeaders.USER_AGENT, "http-getter-test")) .respond(HttpResponse.response().withBody("ok")); - HttpResult result = getter.getBinary(this.feedUrl, 1000); + HttpResult result = getter.getBinary(this.feedUrl, TIMEOUT); Assertions.assertEquals("ok", new String(result.getContent())); } @Test - void ignoreInvalidSsl() throws IOException, NotModifiedException { + void ignoreInvalidSsl() throws Exception { this.mockServerClient.when(HttpRequest.request().withMethod("GET")).respond(HttpResponse.response().withBody("ok")); - HttpResult result = getter.getBinary("https://localhost:" + this.mockServerClient.getPort(), 1000); + HttpResult result = getter.getBinary("https://localhost:" + this.mockServerClient.getPort(), TIMEOUT); Assertions.assertEquals("ok", new String(result.getContent())); } @Test - void lastModifiedReturns304() throws IOException, NotModifiedException { + void lastModifiedReturns304() throws Exception { this.mockServerClient.when(HttpRequest.request().withMethod("GET").withHeader(HttpHeaders.IF_MODIFIED_SINCE, "123456")) .respond(HttpResponse.response().withStatusCode(HttpStatus.NOT_MODIFIED_304)); - Assertions.assertThrows(NotModifiedException.class, () -> getter.getBinary(this.feedUrl, "123456", null, 1000)); + Assertions.assertThrows(NotModifiedException.class, () -> getter.getBinary(this.feedUrl, "123456", null, TIMEOUT)); } @Test - void eTagReturns304() throws IOException, NotModifiedException { + void eTagReturns304() throws Exception { this.mockServerClient.when(HttpRequest.request().withMethod("GET").withHeader(HttpHeaders.IF_NONE_MATCH, "78910")) .respond(HttpResponse.response().withStatusCode(HttpStatus.NOT_MODIFIED_304)); - Assertions.assertThrows(NotModifiedException.class, () -> getter.getBinary(this.feedUrl, null, "78910", 1000)); + Assertions.assertThrows(NotModifiedException.class, () -> getter.getBinary(this.feedUrl, null, "78910", TIMEOUT)); } } \ No newline at end of file diff --git a/commafeed-server/src/test/java/com/commafeed/backend/feed/FeedFetcherTest.java b/commafeed-server/src/test/java/com/commafeed/backend/feed/FeedFetcherTest.java index f98dbc1f..562f5cda 100644 --- a/commafeed-server/src/test/java/com/commafeed/backend/feed/FeedFetcherTest.java +++ b/commafeed-server/src/test/java/com/commafeed/backend/feed/FeedFetcherTest.java @@ -1,6 +1,5 @@ package com.commafeed.backend.feed; -import java.io.IOException; import java.util.Date; import java.util.Set; @@ -17,7 +16,6 @@ import com.commafeed.backend.HttpGetter; import com.commafeed.backend.HttpGetter.HttpResult; import com.commafeed.backend.HttpGetter.NotModifiedException; import com.commafeed.backend.urlprovider.FeedURLProvider; -import com.rometools.rome.io.FeedException; @ExtendWith(MockitoExtension.class) class FeedFetcherTest { @@ -39,7 +37,7 @@ class FeedFetcherTest { } @Test - void updatesHeaderWhenContentDitNotChange() throws FeedException, IOException, NotModifiedException { + void updatesHeaderWhenContentDitNotChange() throws Exception { String url = "https://aaa.com"; String lastModified = "last-modified-1"; String etag = "etag-1";