prevent full feed fetching next time we fetch it if caching header values changed but content did not (#1037)

This commit is contained in:
Athou
2023-01-27 08:08:50 +01:00
parent 5c2454c331
commit ea4b120a85
5 changed files with 99 additions and 2 deletions

View File

@@ -474,6 +474,11 @@
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mock-server</groupId>
<artifactId>mockserver-junit-jupiter</artifactId>

View File

@@ -182,11 +182,28 @@ public class HttpGetter {
System.out.println(new String(result.content));
}
@Getter
public static class NotModifiedException extends Exception {
private static final long serialVersionUID = 1L;
/**
* if the value of this header changed, this is its new value
*/
private String newLastModifiedHeader;
/**
* if the value of this header changed, this is its new value
*/
private String newEtagHeader;
public NotModifiedException(String message) {
this(message, null, null);
}
public NotModifiedException(String message, String newLastModifiedHeader, String newEtagHeader) {
super(message);
this.newLastModifiedHeader = newLastModifiedHeader;
this.newEtagHeader = newEtagHeader;
}
}

View File

@@ -62,16 +62,23 @@ public class FeedFetcher {
throw new IOException("Feed content is empty.");
}
boolean lastModifiedHeaderValueChanged = !StringUtils.equals(lastModified, result.getLastModifiedSince());
boolean etagHeaderValueChanged = !StringUtils.equals(eTag, result.getETag());
String hash = DigestUtils.sha1Hex(content);
if (lastContentHash != null && hash != null && lastContentHash.equals(hash)) {
log.debug("content hash not modified: {}", feedUrl);
throw new NotModifiedException("content hash not modified");
throw new NotModifiedException("content hash not modified",
lastModifiedHeaderValueChanged ? result.getLastModifiedSince() : null,
etagHeaderValueChanged ? result.getETag() : null);
}
if (lastPublishedDate != null && fetchedFeed.getFeed().getLastPublishedDate() != null
&& lastPublishedDate.getTime() == fetchedFeed.getFeed().getLastPublishedDate().getTime()) {
log.debug("publishedDate not modified: {}", feedUrl);
throw new NotModifiedException("publishedDate not modified");
throw new NotModifiedException("publishedDate not modified",
lastModifiedHeaderValueChanged ? result.getLastModifiedSince() : null,
etagHeaderValueChanged ? result.getETag() : null);
}
Feed feed = fetchedFeed.getFeed();

View File

@@ -102,6 +102,14 @@ public class FeedRefreshWorker implements Managed {
feed.setMessage(e.getMessage());
feed.setDisabledUntil(refreshIntervalCalculator.onFeedNotModified(feed));
if (e.getNewLastModifiedHeader() != null) {
feed.setLastModifiedHeader(e.getNewLastModifiedHeader());
}
if (e.getNewEtagHeader() != null) {
feed.setEtagHeader(e.getNewEtagHeader());
}
queues.giveBack(feed);
} catch (Exception e) {
String message = "Unable to refresh feed " + feed.getUrl() + " : " + e.getMessage();

View File

@@ -0,0 +1,60 @@
package com.commafeed.backend.feed;
import java.io.IOException;
import java.util.Date;
import java.util.Set;
import org.apache.commons.codec.digest.DigestUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
import com.commafeed.backend.HttpGetter;
import com.commafeed.backend.HttpGetter.HttpResult;
import com.commafeed.backend.HttpGetter.NotModifiedException;
import com.commafeed.backend.urlprovider.FeedURLProvider;
import com.rometools.rome.io.FeedException;
@ExtendWith(MockitoExtension.class)
class FeedFetcherTest {
@Mock
private FeedParser parser;
@Mock
private HttpGetter getter;
@Mock
private Set<FeedURLProvider> urlProviders;
private FeedFetcher fetcher;
@BeforeEach
void init() {
fetcher = new FeedFetcher(parser, getter, urlProviders);
}
@Test
void updatesHeaderWhenContentDitNotChange() throws FeedException, IOException, NotModifiedException {
String url = "https://aaa.com";
String lastModified = "last-modified-1";
String etag = "etag-1";
byte[] content = "content".getBytes();
String lastContentHash = DigestUtils.sha1Hex(content);
Mockito.when(getter.getBinary(url, lastModified, etag, 20000))
.thenReturn(new HttpResult(content, "content-type", "last-modified-2", "etag-2", 20, null));
NotModifiedException e = Assertions.assertThrows(NotModifiedException.class,
() -> fetcher.fetch(url, false, lastModified, etag, new Date(), lastContentHash));
Assertions.assertEquals("last-modified-2", e.getNewLastModifiedHeader());
Assertions.assertEquals("etag-2", e.getNewEtagHeader());
}
}