feed refresh engine previously hardcoded values are now configurable (#1677)

This commit is contained in:
Athou
2025-02-11 22:14:32 +01:00
parent bde556d41f
commit 469420b5bf
5 changed files with 435 additions and 53 deletions

View File

@@ -366,7 +366,7 @@ Feed refresh engine settings
`commafeed.feed-refresh.interval`
Amount of time CommaFeed will wait before refreshing the same feed.
Default amount of time CommaFeed will wait before refreshing a feed.
@@ -383,10 +383,36 @@ Environment variable: `COMMAFEED_FEED_REFRESH_INTERVAL`</td>
<tr>
<td>
`commafeed.feed-refresh.max-interval`
Maximum amount of time CommaFeed will wait before refreshing a feed. This is used as an upper bound when:
<ul>
<li>an error occurs while refreshing a feed and we're backing off exponentially</li>
<li>we receive a Cache-Control header from the feed</li>
<li>we receive a Retry-After header from the feed</li>
</ul>
Environment variable: `COMMAFEED_FEED_REFRESH_MAX_INTERVAL`</td>
<td>
[Duration](https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/Duration.html) [🛈](#duration-note-anchor)
</td>
<td>
`24H`
</td>
</tr>
<tr>
<td>
`commafeed.feed-refresh.interval-empirical`
If true, CommaFeed will calculate the next refresh time based on the feed's average time between entries and the time since the
last entry was published. The interval will be somewhere between the default refresh interval and 24h.
If enabled, CommaFeed will calculate the next refresh time based on the feed's average time between entries and the time since
the last entry was published. The interval will be sometimes between the default refresh interval
(`commafeed.feed-refresh.interval`) and the maximum refresh interval (`commafeed.feed-refresh.max-interval`).
See <code>FeedRefreshIntervalCalculator</code> for details.
@@ -502,6 +528,52 @@ Environment variable: `COMMAFEED_FEED_REFRESH_FORCE_REFRESH_COOLDOWN_DURATION`</
<thead>
<tr>
<th align="left" colspan="3">
&nbsp;&nbsp;&nbsp;&nbsp;Feed refresh engine error handling settings
</th>
</tr>
</thead>
<tr>
<td>
`commafeed.feed-refresh.errors.retries-before-backoff`
Number of retries before backoff is applied.
Environment variable: `COMMAFEED_FEED_REFRESH_ERRORS_RETRIES_BEFORE_BACKOFF`</td>
<td>
int
</td>
<td>
`3`
</td>
</tr>
<tr>
<td>
`commafeed.feed-refresh.errors.backoff-interval`
Duration to wait before retrying after an error. Will be multiplied by the number of errors since the last successful fetch.
Environment variable: `COMMAFEED_FEED_REFRESH_ERRORS_BACKOFF_INTERVAL`</td>
<td>
[Duration](https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/Duration.html) [🛈](#duration-note-anchor)
</td>
<td>
`1H`
</td>
</tr>
<thead>
<tr>
<th align="left" colspan="3">
Database settings
</th>
</tr>

View File

@@ -168,20 +168,39 @@ public interface CommaFeedConfiguration {
interface FeedRefresh {
/**
* Amount of time CommaFeed will wait before refreshing the same feed.
* Default amount of time CommaFeed will wait before refreshing a feed.
*/
@WithDefault("5m")
Duration interval();
/**
* If true, CommaFeed will calculate the next refresh time based on the feed's average time between entries and the time since the
* last entry was published. The interval will be somewhere between the default refresh interval and 24h.
* Maximum amount of time CommaFeed will wait before refreshing a feed. This is used as an upper bound when:
*
* <ul>
* <li>an error occurs while refreshing a feed and we're backing off exponentially</li>
* <li>we receive a Cache-Control header from the feed</li>
* <li>we receive a Retry-After header from the feed</li>
* </ul>
*/
@WithDefault("24h")
Duration maxInterval();
/**
* If enabled, CommaFeed will calculate the next refresh time based on the feed's average time between entries and the time since
* the last entry was published. The interval will be sometimes between the default refresh interval
* (`commafeed.feed-refresh.interval`) and the maximum refresh interval (`commafeed.feed-refresh.max-interval`).
*
* See {@link FeedRefreshIntervalCalculator} for details.
*/
@WithDefault("false")
boolean intervalEmpirical();
/**
* Feed refresh engine error handling settings.
*/
@ConfigDocSection
FeedRefreshErrorHandling errors();
/**
* Amount of http threads used to fetch feeds.
*/
@@ -217,6 +236,21 @@ public interface CommaFeedConfiguration {
Duration forceRefreshCooldownDuration();
}
interface FeedRefreshErrorHandling {
/**
* Number of retries before backoff is applied.
*/
@Min(0)
@WithDefault("3")
int retriesBeforeBackoff();
/**
* Duration to wait before retrying after an error. Will be multiplied by the number of errors since the last successful fetch.
*/
@WithDefault("1h")
Duration backoffInterval();
}
interface Database {
/**
* Timeout applied to all database queries.

View File

@@ -2,83 +2,83 @@ package com.commafeed.backend.feed;
import java.time.Duration;
import java.time.Instant;
import java.time.InstantSource;
import java.time.temporal.ChronoUnit;
import org.apache.commons.lang3.ObjectUtils;
import com.commafeed.CommaFeedConfiguration;
import com.commafeed.CommaFeedConfiguration.FeedRefreshErrorHandling;
import com.google.common.primitives.Longs;
import jakarta.inject.Singleton;
@Singleton
public class FeedRefreshIntervalCalculator {
private final Duration refreshInterval;
private final boolean empiricalInterval;
private final Duration interval;
private final Duration maxInterval;
private final boolean empirical;
private final FeedRefreshErrorHandling errorHandling;
private final InstantSource instantSource;
public FeedRefreshIntervalCalculator(CommaFeedConfiguration config) {
this.refreshInterval = config.feedRefresh().interval();
this.empiricalInterval = config.feedRefresh().intervalEmpirical();
public FeedRefreshIntervalCalculator(CommaFeedConfiguration config, InstantSource instantSource) {
this.interval = config.feedRefresh().interval();
this.maxInterval = config.feedRefresh().maxInterval();
this.empirical = config.feedRefresh().intervalEmpirical();
this.errorHandling = config.feedRefresh().errors();
this.instantSource = instantSource;
}
public Instant onFetchSuccess(Instant publishedDate, Long averageEntryInterval) {
Instant defaultRefreshInterval = getDefaultRefreshInterval();
return empiricalInterval ? computeEmpiricalRefreshInterval(publishedDate, averageEntryInterval, defaultRefreshInterval)
: defaultRefreshInterval;
public Instant onFetchSuccess(Instant publishedDate, Long averageEntryInterval, Duration validFor) {
Instant instant = empirical ? computeEmpiricalRefreshInterval(publishedDate, averageEntryInterval)
: instantSource.instant().plus(interval);
return limitToMaxInterval(ObjectUtils.max(instant, instantSource.instant().plus(validFor)));
}
public Instant onFeedNotModified(Instant publishedDate, Long averageEntryInterval) {
return onFetchSuccess(publishedDate, averageEntryInterval);
return onFetchSuccess(publishedDate, averageEntryInterval, Duration.ZERO);
}
public Instant onTooManyRequests(Instant retryAfter, int errorCount) {
return ObjectUtils.max(retryAfter, onFetchError(errorCount));
return limitToMaxInterval(ObjectUtils.max(retryAfter, onFetchError(errorCount)));
}
public Instant onFetchError(int errorCount) {
int retriesBeforeDisable = 3;
if (errorCount < retriesBeforeDisable || !empiricalInterval) {
return getDefaultRefreshInterval();
if (errorCount < errorHandling.retriesBeforeBackoff()) {
return limitToMaxInterval(instantSource.instant().plus(interval));
}
int disabledHours = Math.min(24 * 7, errorCount - retriesBeforeDisable + 1);
return Instant.now().plus(Duration.ofHours(disabledHours));
Duration retryInterval = errorHandling.backoffInterval().multipliedBy(errorCount - errorHandling.retriesBeforeBackoff() + 1L);
return limitToMaxInterval(instantSource.instant().plus(retryInterval));
}
private Instant getDefaultRefreshInterval() {
return Instant.now().plus(refreshInterval);
}
private Instant computeEmpiricalRefreshInterval(Instant publishedDate, Long averageEntryInterval, Instant defaultRefreshInterval) {
Instant now = Instant.now();
private Instant computeEmpiricalRefreshInterval(Instant publishedDate, Long averageEntryInterval) {
Instant now = instantSource.instant();
if (publishedDate == null) {
// feed with no entries, recheck in 24 hours
return now.plus(Duration.ofHours(24));
} else if (ChronoUnit.DAYS.between(publishedDate, now) >= 30) {
// older than a month, recheck in 24 hours
return now.plus(Duration.ofHours(24));
} else if (ChronoUnit.DAYS.between(publishedDate, now) >= 14) {
// older than two weeks, recheck in 12 hours
return now.plus(Duration.ofHours(12));
} else if (ChronoUnit.DAYS.between(publishedDate, now) >= 7) {
// older than a week, recheck in 6 hours
return now.plus(Duration.ofHours(6));
return now.plus(maxInterval);
}
long daysSinceLastPublication = ChronoUnit.DAYS.between(publishedDate, now);
if (daysSinceLastPublication >= 30) {
return now.plus(maxInterval);
} else if (daysSinceLastPublication >= 14) {
return now.plus(maxInterval.dividedBy(2));
} else if (daysSinceLastPublication >= 7) {
return now.plus(maxInterval.dividedBy(4));
} else if (averageEntryInterval != null) {
// use average time between entries to decide when to refresh next, divided by factor
int factor = 2;
// not more than 6 hours
long date = Math.min(now.plus(Duration.ofHours(6)).toEpochMilli(), now.toEpochMilli() + averageEntryInterval / factor);
// not less than default refresh interval
date = Math.max(defaultRefreshInterval.toEpochMilli(), date);
return Instant.ofEpochMilli(date);
long millis = Longs.constrainToRange(averageEntryInterval / factor, interval.toMillis(), maxInterval.dividedBy(4).toMillis());
return now.plusMillis(millis);
} else {
// unknown case, recheck in 24 hours
return now.plus(Duration.ofHours(24));
// unknown case
return now.plus(maxInterval);
}
}
private Instant limitToMaxInterval(Instant instant) {
return ObjectUtils.min(instant, instantSource.instant().plus(maxInterval));
}
}

View File

@@ -6,7 +6,6 @@ import java.util.Collections;
import java.util.List;
import java.util.Optional;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import com.codahale.metrics.Meter;
@@ -78,9 +77,8 @@ public class FeedRefreshWorker {
feed.setErrorCount(0);
feed.setMessage(null);
feed.setDisabledUntil(ObjectUtils.max(
refreshIntervalCalculator.onFetchSuccess(result.feed().lastPublishedDate(), result.feed().averageEntryInterval()),
Instant.now().plus(result.validFor())));
feed.setDisabledUntil(refreshIntervalCalculator.onFetchSuccess(result.feed().lastPublishedDate(),
result.feed().averageEntryInterval(), result.validFor()));
return new FeedRefreshWorkerResult(feed, entries);
} catch (NotModifiedException e) {

View File

@@ -0,0 +1,278 @@
package com.commafeed.backend.feed;
import java.time.Duration;
import java.time.Instant;
import java.time.InstantSource;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
import com.commafeed.CommaFeedConfiguration;
import com.commafeed.CommaFeedConfiguration.FeedRefreshErrorHandling;
@ExtendWith(MockitoExtension.class)
class FeedRefreshIntervalCalculatorTest {
private static final Instant NOW = Instant.now();
private static final Duration DEFAULT_INTERVAL = Duration.ofHours(1);
private static final Duration MAX_INTERVAL = Duration.ofDays(1);
@Mock
private InstantSource instantSource;
@Mock
private CommaFeedConfiguration config;
@Mock
private FeedRefreshErrorHandling errorHandling;
private FeedRefreshIntervalCalculator calculator;
@BeforeEach
void setUp() {
Mockito.when(instantSource.instant()).thenReturn(NOW);
Mockito.when(config.feedRefresh()).thenReturn(Mockito.mock(CommaFeedConfiguration.FeedRefresh.class));
Mockito.when(config.feedRefresh().interval()).thenReturn(DEFAULT_INTERVAL);
Mockito.when(config.feedRefresh().maxInterval()).thenReturn(MAX_INTERVAL);
Mockito.when(config.feedRefresh().errors()).thenReturn(errorHandling);
calculator = new FeedRefreshIntervalCalculator(config, instantSource);
}
@Nested
class FetchSuccess {
@Nested
class EmpiricalDisabled {
@ParameterizedTest
@ValueSource(longs = { 0, 1, 300, 86400000L })
void withoutValidFor(long averageEntryInterval) {
// averageEntryInterval is ignored when empirical is disabled
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), averageEntryInterval, Duration.ZERO);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void withValidForGreaterThanMaxInterval() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), 1L, MAX_INTERVAL.plusDays(1));
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void withValidForLowerThanMaxInterval() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), 1L, MAX_INTERVAL.minusSeconds(1));
Assertions.assertEquals(NOW.plus(MAX_INTERVAL).minusSeconds(1), result);
}
}
@Nested
class EmpiricalEnabled {
@BeforeEach
void setUp() {
Mockito.when(config.feedRefresh().intervalEmpirical()).thenReturn(true);
calculator = new FeedRefreshIntervalCalculator(config, instantSource);
}
@Test
void withNullPublishedDate() {
Instant result = calculator.onFetchSuccess(null, 1L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void with31DaysOldPublishedDate() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(31)), 1L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void with15DaysOldPublishedDate() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(15)), 1L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(2)), result);
}
@Test
void with8DaysOldPublishedDate() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(8)), 1L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(4)), result);
}
@Nested
class FiveDaysOld {
@Test
void averageBetweenBounds() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), Duration.ofHours(4).toMillis(),
Duration.ZERO);
Assertions.assertEquals(NOW.plus(Duration.ofHours(2)), result);
}
@Test
void averageBelowMinimum() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), 10L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void averageAboveMaximum() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), Long.MAX_VALUE, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(4)), result);
}
@Test
void noAverage() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), null, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
}
}
}
@Nested
class FeedNotModified {
@Nested
class EmpiricalDisabled {
@ParameterizedTest
@ValueSource(longs = { 0, 1, 300, 86400000L })
void withoutValidFor(long averageEntryInterval) {
// averageEntryInterval is ignored when empirical is disabled
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), averageEntryInterval);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
}
@Nested
class EmpiricalEnabled {
@BeforeEach
void setUp() {
Mockito.when(config.feedRefresh().intervalEmpirical()).thenReturn(true);
calculator = new FeedRefreshIntervalCalculator(config, instantSource);
}
@Test
void withNullPublishedDate() {
Instant result = calculator.onFeedNotModified(null, 1L);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void with31DaysOldPublishedDate() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(31)), 1L);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void with15DaysOldPublishedDate() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(15)), 1L);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(2)), result);
}
@Test
void with8DaysOldPublishedDate() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(8)), 1L);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(4)), result);
}
@Nested
class FiveDaysOld {
@Test
void averageBetweenBounds() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), Duration.ofHours(4).toMillis());
Assertions.assertEquals(NOW.plus(Duration.ofHours(2)), result);
}
@Test
void averageBelowMinimum() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), 10L);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void averageAboveMaximum() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), Long.MAX_VALUE);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(4)), result);
}
@Test
void noAverage() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), null);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
}
}
}
@Nested
class FetchError {
@BeforeEach
void setUp() {
Mockito.when(config.feedRefresh().errors().retriesBeforeBackoff()).thenReturn(3);
}
@Test
void lowErrorCount() {
Instant result = calculator.onFetchError(1);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void highErrorCount() {
Mockito.when(config.feedRefresh().errors().backoffInterval()).thenReturn(Duration.ofHours(1));
Instant result = calculator.onFetchError(5);
Assertions.assertEquals(NOW.plus(Duration.ofHours(3)), result);
}
@Test
void veryHighErrorCount() {
Mockito.when(config.feedRefresh().errors().backoffInterval()).thenReturn(Duration.ofHours(1));
Instant result = calculator.onFetchError(100000);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
}
@Nested
class TooManyRequests {
@BeforeEach
void setUp() {
Mockito.when(config.feedRefresh().errors().retriesBeforeBackoff()).thenReturn(3);
}
@Test
void withRetryAfterZero() {
Instant result = calculator.onTooManyRequests(NOW, 1);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void withRetryAfterLowerThanInterval() {
Instant retryAfter = NOW.plus(DEFAULT_INTERVAL.minusSeconds(10));
Instant result = calculator.onTooManyRequests(retryAfter, 1);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void withRetryAfterBetweenBounds() {
Instant retryAfter = NOW.plus(DEFAULT_INTERVAL.plusSeconds(10));
Instant result = calculator.onTooManyRequests(retryAfter, 1);
Assertions.assertEquals(retryAfter, result);
}
@Test
void withRetryAfterGreaterThanMaxInterval() {
Instant retryAfter = NOW.plus(MAX_INTERVAL.plusSeconds(10));
Instant result = calculator.onTooManyRequests(retryAfter, 1);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
}
}