use java standard http client because apache http clients should be reused because they support pooling but we don't need that

This commit is contained in:
Athou
2023-12-18 10:46:30 +01:00
parent 155e7ba1aa
commit d9ccdf1caf
6 changed files with 101 additions and 219 deletions

View File

@@ -279,18 +279,6 @@
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<exclusions>
<exclusion>
<artifactId>commons-logging</artifactId>
<groupId>commons-logging</groupId>
</exclusion>
</exclusions>
<version>4.5.14</version>
</dependency>
<dependency>
<groupId>io.swagger.core.v3</groupId>
<artifactId>swagger-annotations</artifactId>

View File

@@ -1,60 +0,0 @@
package com.commafeed.backend;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
import org.apache.http.HttpEntity;
import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.HttpResponseInterceptor;
import org.apache.http.entity.HttpEntityWrapper;
import org.apache.http.protocol.HttpContext;
class ContentEncodingInterceptor implements HttpResponseInterceptor {
private static final Set<String> ALLOWED_CONTENT_ENCODINGS = new HashSet<>(Arrays.asList("gzip", "x-gzip", "deflate", "identity"));
@Override
public void process(HttpResponse response, HttpContext context) throws HttpException, IOException {
if (hasContent(response)) {
Header contentEncodingHeader = response.getEntity().getContentEncoding();
if (contentEncodingHeader != null && containsUnsupportedEncodings(contentEncodingHeader)) {
overrideContentEncoding(response);
}
}
}
private boolean containsUnsupportedEncodings(Header contentEncodingHeader) {
HeaderElement[] codecs = contentEncodingHeader.getElements();
for (final HeaderElement codec : codecs) {
String codecName = codec.getName().toLowerCase(Locale.US);
if (!ALLOWED_CONTENT_ENCODINGS.contains(codecName)) {
return true;
}
}
return false;
}
private void overrideContentEncoding(HttpResponse response) {
HttpEntity wrapped = new HttpEntityWrapper(response.getEntity()) {
@Override
public Header getContentEncoding() {
return null;
}
};
response.setEntity(wrapped);
}
private boolean hasContent(HttpResponse response) {
return response.getEntity() != null && response.getEntity().getContentLength() != 0;
}
}

View File

@@ -1,30 +1,20 @@
package com.commafeed.backend;
import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpClient.Redirect;
import java.net.http.HttpClient.Version;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.Consts;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponseInterceptor;
import org.apache.http.HttpStatus;
import org.apache.http.client.HttpResponseException;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.config.ConnectionConfig;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.eclipse.jetty.http.HttpStatus;
import com.commafeed.CommaFeedConfiguration;
import com.google.common.net.HttpHeaders;
import jakarta.inject.Inject;
import jakarta.inject.Singleton;
@@ -39,25 +29,17 @@ import nl.altindag.ssl.SSLFactory;
@Singleton
public class HttpGetter {
private static final String ACCEPT_LANGUAGE = "en";
private static final String PRAGMA_NO_CACHE = "No-cache";
private static final String CACHE_CONTROL_NO_CACHE = "no-cache";
private static final HttpResponseInterceptor REMOVE_INCORRECT_CONTENT_ENCODING = new ContentEncodingInterceptor();
private static final SSLFactory SSL_FACTORY = SSLFactory.builder().withUnsafeTrustMaterial().withUnsafeHostnameVerifier().build();
private String userAgent;
private final HttpClient client;
private final String userAgent;
@Inject
public HttpGetter(CommaFeedConfiguration config) {
this.userAgent = config.getApplicationSettings().getUserAgent();
if (this.userAgent == null) {
this.userAgent = String.format("CommaFeed/%s (https://github.com/Athou/commafeed)", config.getVersion());
}
this.client = newClient();
this.userAgent = Optional.ofNullable(config.getApplicationSettings().getUserAgent())
.orElseGet(() -> String.format("CommaFeed/%s (https://github.com/Athou/commafeed)", config.getVersion()));
}
public HttpResult getBinary(String url, int timeout) throws IOException, NotModifiedException {
public HttpResult getBinary(String url, int timeout) throws IOException, NotModifiedException, InterruptedException {
return getBinary(url, null, null, timeout);
}
@@ -72,101 +54,59 @@ public class HttpGetter {
* @throws NotModifiedException
* if the url hasn't changed since we asked for it last time
*/
public HttpResult getBinary(String url, String lastModified, String eTag, int timeout) throws IOException, NotModifiedException {
HttpResult result = null;
public HttpResult getBinary(String url, String lastModified, String eTag, int timeout)
throws IOException, NotModifiedException, InterruptedException {
long start = System.currentTimeMillis();
CloseableHttpClient client = newClient(timeout);
CloseableHttpResponse response = null;
try {
HttpGet httpget = new HttpGet(url);
HttpClientContext context = HttpClientContext.create();
httpget.addHeader(HttpHeaders.ACCEPT_LANGUAGE, ACCEPT_LANGUAGE);
httpget.addHeader(HttpHeaders.PRAGMA, PRAGMA_NO_CACHE);
httpget.addHeader(HttpHeaders.CACHE_CONTROL, CACHE_CONTROL_NO_CACHE);
httpget.addHeader(HttpHeaders.USER_AGENT, userAgent);
if (lastModified != null) {
httpget.addHeader(HttpHeaders.IF_MODIFIED_SINCE, lastModified);
}
if (eTag != null) {
httpget.addHeader(HttpHeaders.IF_NONE_MATCH, eTag);
}
try {
response = client.execute(httpget, context);
int code = response.getStatusLine().getStatusCode();
if (code == HttpStatus.SC_NOT_MODIFIED) {
throw new NotModifiedException("'304 - not modified' http code received");
} else if (code >= 300) {
throw new HttpResponseException(code, "Server returned HTTP error code " + code);
}
} catch (HttpResponseException e) {
if (e.getStatusCode() == HttpStatus.SC_NOT_MODIFIED) {
throw new NotModifiedException("'304 - not modified' http code received");
} else {
throw e;
}
}
Header lastModifiedHeader = response.getFirstHeader(HttpHeaders.LAST_MODIFIED);
String lastModifiedHeaderValue = lastModifiedHeader == null ? null : StringUtils.trimToNull(lastModifiedHeader.getValue());
if (lastModifiedHeaderValue != null && StringUtils.equals(lastModified, lastModifiedHeaderValue)) {
throw new NotModifiedException("lastModifiedHeader is the same");
}
Header eTagHeader = response.getFirstHeader(HttpHeaders.ETAG);
String eTagHeaderValue = eTagHeader == null ? null : StringUtils.trimToNull(eTagHeader.getValue());
if (eTag != null && StringUtils.equals(eTag, eTagHeaderValue)) {
throw new NotModifiedException("eTagHeader is the same");
}
HttpEntity entity = response.getEntity();
byte[] content = null;
String contentType = null;
if (entity != null) {
content = EntityUtils.toByteArray(entity);
if (entity.getContentType() != null) {
contentType = entity.getContentType().getValue();
}
}
String urlAfterRedirect = url;
if (context.getRequest() instanceof HttpUriRequest) {
HttpUriRequest req = (HttpUriRequest) context.getRequest();
HttpHost host = context.getTargetHost();
urlAfterRedirect = req.getURI().isAbsolute() ? req.getURI().toString() : host.toURI() + req.getURI();
}
long duration = System.currentTimeMillis() - start;
result = new HttpResult(content, contentType, lastModifiedHeaderValue, eTagHeaderValue, duration, urlAfterRedirect);
} finally {
IOUtils.closeQuietly(response);
IOUtils.closeQuietly(client);
HttpRequest.Builder builder = HttpRequest.newBuilder()
.uri(URI.create(url))
.timeout(Duration.ofMillis(timeout))
.header(HttpHeaders.ACCEPT_LANGUAGE, "en")
.header(HttpHeaders.PRAGMA, "No-cache")
.header(HttpHeaders.CACHE_CONTROL, "no-cache")
.header(HttpHeaders.USER_AGENT, userAgent);
if (lastModified != null) {
builder.header(HttpHeaders.IF_MODIFIED_SINCE, lastModified);
}
return result;
if (eTag != null) {
builder.header(HttpHeaders.IF_NONE_MATCH, eTag);
}
HttpRequest request = builder.GET().build();
HttpResponse<byte[]> response = client.send(request, HttpResponse.BodyHandlers.ofByteArray());
int code = response.statusCode();
if (code == HttpStatus.NOT_MODIFIED_304) {
throw new NotModifiedException("'304 - not modified' http code received");
} else if (code >= 300) {
throw new HttpResponseException(code, "Server returned HTTP error code " + code);
}
String lastModifiedHeader = response.headers().firstValue(HttpHeaders.LAST_MODIFIED).map(StringUtils::trimToNull).orElse(null);
if (lastModifiedHeader != null && lastModifiedHeader.equals(lastModified)) {
throw new NotModifiedException("lastModifiedHeader is the same");
}
String eTagHeader = response.headers().firstValue(HttpHeaders.ETAG).map(StringUtils::trimToNull).orElse(null);
if (eTagHeader != null && eTagHeader.equals(eTag)) {
throw new NotModifiedException("eTagHeader is the same");
}
byte[] content = response.body();
String contentType = response.headers().firstValue(HttpHeaders.CONTENT_TYPE).orElse(null);
String urlAfterRedirect = response.request().uri().toString();
long duration = System.currentTimeMillis() - start;
return new HttpResult(content, contentType, lastModifiedHeader, eTagHeader, duration, urlAfterRedirect);
}
public static CloseableHttpClient newClient(int timeout) {
HttpClientBuilder builder = HttpClients.custom();
builder.useSystemProperties();
builder.addInterceptorFirst(REMOVE_INCORRECT_CONTENT_ENCODING);
builder.disableAutomaticRetries();
builder.setSSLContext(SSL_FACTORY.getSslContext());
builder.setSSLHostnameVerifier(SSL_FACTORY.getHostnameVerifier());
RequestConfig.Builder configBuilder = RequestConfig.custom();
configBuilder.setCookieSpec(CookieSpecs.IGNORE_COOKIES);
configBuilder.setSocketTimeout(timeout);
configBuilder.setConnectTimeout(timeout);
configBuilder.setConnectionRequestTimeout(timeout);
builder.setDefaultRequestConfig(configBuilder.build());
builder.setDefaultConnectionConfig(ConnectionConfig.custom().setCharset(Consts.ISO_8859_1).build());
return builder.build();
public static HttpClient newClient() {
SSLFactory sslFactory = SSLFactory.builder().withUnsafeTrustMaterial().withUnsafeHostnameVerifier().build();
return HttpClient.newBuilder()
.version(Version.HTTP_1_1)
.followRedirects(Redirect.ALWAYS)
.sslContext(sslFactory.getSslContext())
.sslParameters(sslFactory.getSslParameters())
.build();
}
@Getter
@@ -192,6 +132,18 @@ public class HttpGetter {
this.newLastModifiedHeader = newLastModifiedHeader;
this.newEtagHeader = newEtagHeader;
}
}
@Getter
public static class HttpResponseException extends IOException {
private static final long serialVersionUID = 1L;
private final int code;
public HttpResponseException(int code, String message) {
super(message);
this.code = code;
}
}

View File

@@ -36,7 +36,7 @@ public class FeedFetcher {
private final Set<FeedURLProvider> urlProviders;
public FeedFetcherResult fetch(String feedUrl, boolean extractFeedUrlFromHtml, String lastModified, String eTag, Date lastPublishedDate,
String lastContentHash) throws FeedException, IOException, NotModifiedException {
String lastContentHash) throws FeedException, IOException, NotModifiedException, InterruptedException {
log.debug("Fetching feed {}", feedUrl);
int timeout = 20000;

View File

@@ -1,12 +1,10 @@
package com.commafeed.backend;
import java.io.IOException;
import java.net.SocketTimeoutException;
import java.net.http.HttpTimeoutException;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpHeaders;
import org.apache.http.client.HttpResponseException;
import org.eclipse.jetty.http.HttpStatus;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
@@ -23,12 +21,16 @@ import org.mockserver.model.MediaType;
import com.commafeed.CommaFeedConfiguration;
import com.commafeed.CommaFeedConfiguration.ApplicationSettings;
import com.commafeed.backend.HttpGetter.HttpResponseException;
import com.commafeed.backend.HttpGetter.HttpResult;
import com.commafeed.backend.HttpGetter.NotModifiedException;
import com.google.common.net.HttpHeaders;
@ExtendWith(MockServerExtension.class)
class HttpGetterTest {
private static final int TIMEOUT = 10000;
private MockServerClient mockServerClient;
private String feedUrl;
private byte[] feedContent;
@@ -43,6 +45,7 @@ class HttpGetterTest {
ApplicationSettings settings = new ApplicationSettings();
settings.setUserAgent("http-getter-test");
settings.setBackgroundThreads(3);
CommaFeedConfiguration config = new CommaFeedConfiguration();
config.setApplicationSettings(settings);
@@ -57,12 +60,12 @@ class HttpGetterTest {
void errorCodes(int code) {
this.mockServerClient.when(HttpRequest.request().withMethod("GET")).respond(HttpResponse.response().withStatusCode(code));
HttpResponseException e = Assertions.assertThrows(HttpResponseException.class, () -> getter.getBinary(this.feedUrl, 1000));
Assertions.assertEquals(code, e.getStatusCode());
HttpResponseException e = Assertions.assertThrows(HttpResponseException.class, () -> getter.getBinary(this.feedUrl, TIMEOUT));
Assertions.assertEquals(code, e.getCode());
}
@Test
void validFeed() throws IOException, NotModifiedException {
void validFeed() throws Exception {
this.mockServerClient.when(HttpRequest.request().withMethod("GET"))
.respond(HttpResponse.response()
.withBody(feedContent)
@@ -70,7 +73,7 @@ class HttpGetterTest {
.withHeader(HttpHeaders.LAST_MODIFIED, "123456")
.withHeader(HttpHeaders.ETAG, "78910"));
HttpResult result = getter.getBinary(this.feedUrl, 1000);
HttpResult result = getter.getBinary(this.feedUrl, TIMEOUT);
Assertions.assertArrayEquals(feedContent, result.getContent());
Assertions.assertEquals(MediaType.APPLICATION_ATOM_XML.toString(), result.getContentType());
Assertions.assertEquals("123456", result.getLastModifiedSince());
@@ -80,7 +83,7 @@ class HttpGetterTest {
}
@Test
void followRedirects() throws IOException, NotModifiedException {
void followRedirects() throws Exception {
this.mockServerClient.when(HttpRequest.request().withMethod("GET").withPath("/redirected"))
.respond(HttpResponse.response().withBody(feedContent).withContentType(MediaType.APPLICATION_ATOM_XML));
this.mockServerClient.when(HttpRequest.request().withMethod("GET"))
@@ -88,49 +91,50 @@ class HttpGetterTest {
.withStatusCode(HttpStatus.MOVED_PERMANENTLY_301)
.withHeader(HttpHeaders.LOCATION, "http://localhost:" + this.mockServerClient.getPort() + "/redirected"));
HttpResult result = getter.getBinary(this.feedUrl, 1000);
HttpResult result = getter.getBinary(this.feedUrl, TIMEOUT);
Assertions.assertEquals("http://localhost:" + this.mockServerClient.getPort() + "/redirected", result.getUrlAfterRedirect());
}
@Test
void timeout() {
int smallTimeout = 500;
this.mockServerClient.when(HttpRequest.request().withMethod("GET"))
.respond(HttpResponse.response().withDelay(Delay.milliseconds(2000)));
.respond(HttpResponse.response().withDelay(Delay.milliseconds(smallTimeout * 2)));
Assertions.assertThrows(SocketTimeoutException.class, () -> getter.getBinary(this.feedUrl, 1000));
Assertions.assertThrows(HttpTimeoutException.class, () -> getter.getBinary(this.feedUrl, smallTimeout));
}
@Test
void userAgent() throws IOException, NotModifiedException {
void userAgent() throws Exception {
this.mockServerClient.when(HttpRequest.request().withMethod("GET").withHeader(HttpHeaders.USER_AGENT, "http-getter-test"))
.respond(HttpResponse.response().withBody("ok"));
HttpResult result = getter.getBinary(this.feedUrl, 1000);
HttpResult result = getter.getBinary(this.feedUrl, TIMEOUT);
Assertions.assertEquals("ok", new String(result.getContent()));
}
@Test
void ignoreInvalidSsl() throws IOException, NotModifiedException {
void ignoreInvalidSsl() throws Exception {
this.mockServerClient.when(HttpRequest.request().withMethod("GET")).respond(HttpResponse.response().withBody("ok"));
HttpResult result = getter.getBinary("https://localhost:" + this.mockServerClient.getPort(), 1000);
HttpResult result = getter.getBinary("https://localhost:" + this.mockServerClient.getPort(), TIMEOUT);
Assertions.assertEquals("ok", new String(result.getContent()));
}
@Test
void lastModifiedReturns304() throws IOException, NotModifiedException {
void lastModifiedReturns304() throws Exception {
this.mockServerClient.when(HttpRequest.request().withMethod("GET").withHeader(HttpHeaders.IF_MODIFIED_SINCE, "123456"))
.respond(HttpResponse.response().withStatusCode(HttpStatus.NOT_MODIFIED_304));
Assertions.assertThrows(NotModifiedException.class, () -> getter.getBinary(this.feedUrl, "123456", null, 1000));
Assertions.assertThrows(NotModifiedException.class, () -> getter.getBinary(this.feedUrl, "123456", null, TIMEOUT));
}
@Test
void eTagReturns304() throws IOException, NotModifiedException {
void eTagReturns304() throws Exception {
this.mockServerClient.when(HttpRequest.request().withMethod("GET").withHeader(HttpHeaders.IF_NONE_MATCH, "78910"))
.respond(HttpResponse.response().withStatusCode(HttpStatus.NOT_MODIFIED_304));
Assertions.assertThrows(NotModifiedException.class, () -> getter.getBinary(this.feedUrl, null, "78910", 1000));
Assertions.assertThrows(NotModifiedException.class, () -> getter.getBinary(this.feedUrl, null, "78910", TIMEOUT));
}
}

View File

@@ -1,6 +1,5 @@
package com.commafeed.backend.feed;
import java.io.IOException;
import java.util.Date;
import java.util.Set;
@@ -17,7 +16,6 @@ import com.commafeed.backend.HttpGetter;
import com.commafeed.backend.HttpGetter.HttpResult;
import com.commafeed.backend.HttpGetter.NotModifiedException;
import com.commafeed.backend.urlprovider.FeedURLProvider;
import com.rometools.rome.io.FeedException;
@ExtendWith(MockitoExtension.class)
class FeedFetcherTest {
@@ -39,7 +37,7 @@ class FeedFetcherTest {
}
@Test
void updatesHeaderWhenContentDitNotChange() throws FeedException, IOException, NotModifiedException {
void updatesHeaderWhenContentDitNotChange() throws Exception {
String url = "https://aaa.com";
String lastModified = "last-modified-1";
String etag = "etag-1";