feed refresh engine previously hardcoded values are now configurable (#1677)

This commit is contained in:
Athou
2025-02-11 22:14:32 +01:00
parent bde556d41f
commit 469420b5bf
5 changed files with 435 additions and 53 deletions

View File

@@ -168,20 +168,39 @@ public interface CommaFeedConfiguration {
interface FeedRefresh {
/**
* Amount of time CommaFeed will wait before refreshing the same feed.
* Default amount of time CommaFeed will wait before refreshing a feed.
*/
@WithDefault("5m")
Duration interval();
/**
* If true, CommaFeed will calculate the next refresh time based on the feed's average time between entries and the time since the
* last entry was published. The interval will be somewhere between the default refresh interval and 24h.
* Maximum amount of time CommaFeed will wait before refreshing a feed. This is used as an upper bound when:
*
* <ul>
* <li>an error occurs while refreshing a feed and we're backing off exponentially</li>
* <li>we receive a Cache-Control header from the feed</li>
* <li>we receive a Retry-After header from the feed</li>
* </ul>
*/
@WithDefault("24h")
Duration maxInterval();
/**
* If enabled, CommaFeed will calculate the next refresh time based on the feed's average time between entries and the time since
* the last entry was published. The interval will be sometimes between the default refresh interval
* (`commafeed.feed-refresh.interval`) and the maximum refresh interval (`commafeed.feed-refresh.max-interval`).
*
* See {@link FeedRefreshIntervalCalculator} for details.
*/
@WithDefault("false")
boolean intervalEmpirical();
/**
* Feed refresh engine error handling settings.
*/
@ConfigDocSection
FeedRefreshErrorHandling errors();
/**
* Amount of http threads used to fetch feeds.
*/
@@ -217,6 +236,21 @@ public interface CommaFeedConfiguration {
Duration forceRefreshCooldownDuration();
}
interface FeedRefreshErrorHandling {
/**
* Number of retries before backoff is applied.
*/
@Min(0)
@WithDefault("3")
int retriesBeforeBackoff();
/**
* Duration to wait before retrying after an error. Will be multiplied by the number of errors since the last successful fetch.
*/
@WithDefault("1h")
Duration backoffInterval();
}
interface Database {
/**
* Timeout applied to all database queries.

View File

@@ -2,83 +2,83 @@ package com.commafeed.backend.feed;
import java.time.Duration;
import java.time.Instant;
import java.time.InstantSource;
import java.time.temporal.ChronoUnit;
import org.apache.commons.lang3.ObjectUtils;
import com.commafeed.CommaFeedConfiguration;
import com.commafeed.CommaFeedConfiguration.FeedRefreshErrorHandling;
import com.google.common.primitives.Longs;
import jakarta.inject.Singleton;
@Singleton
public class FeedRefreshIntervalCalculator {
private final Duration refreshInterval;
private final boolean empiricalInterval;
private final Duration interval;
private final Duration maxInterval;
private final boolean empirical;
private final FeedRefreshErrorHandling errorHandling;
private final InstantSource instantSource;
public FeedRefreshIntervalCalculator(CommaFeedConfiguration config) {
this.refreshInterval = config.feedRefresh().interval();
this.empiricalInterval = config.feedRefresh().intervalEmpirical();
public FeedRefreshIntervalCalculator(CommaFeedConfiguration config, InstantSource instantSource) {
this.interval = config.feedRefresh().interval();
this.maxInterval = config.feedRefresh().maxInterval();
this.empirical = config.feedRefresh().intervalEmpirical();
this.errorHandling = config.feedRefresh().errors();
this.instantSource = instantSource;
}
public Instant onFetchSuccess(Instant publishedDate, Long averageEntryInterval) {
Instant defaultRefreshInterval = getDefaultRefreshInterval();
return empiricalInterval ? computeEmpiricalRefreshInterval(publishedDate, averageEntryInterval, defaultRefreshInterval)
: defaultRefreshInterval;
public Instant onFetchSuccess(Instant publishedDate, Long averageEntryInterval, Duration validFor) {
Instant instant = empirical ? computeEmpiricalRefreshInterval(publishedDate, averageEntryInterval)
: instantSource.instant().plus(interval);
return limitToMaxInterval(ObjectUtils.max(instant, instantSource.instant().plus(validFor)));
}
public Instant onFeedNotModified(Instant publishedDate, Long averageEntryInterval) {
return onFetchSuccess(publishedDate, averageEntryInterval);
return onFetchSuccess(publishedDate, averageEntryInterval, Duration.ZERO);
}
public Instant onTooManyRequests(Instant retryAfter, int errorCount) {
return ObjectUtils.max(retryAfter, onFetchError(errorCount));
return limitToMaxInterval(ObjectUtils.max(retryAfter, onFetchError(errorCount)));
}
public Instant onFetchError(int errorCount) {
int retriesBeforeDisable = 3;
if (errorCount < retriesBeforeDisable || !empiricalInterval) {
return getDefaultRefreshInterval();
if (errorCount < errorHandling.retriesBeforeBackoff()) {
return limitToMaxInterval(instantSource.instant().plus(interval));
}
int disabledHours = Math.min(24 * 7, errorCount - retriesBeforeDisable + 1);
return Instant.now().plus(Duration.ofHours(disabledHours));
Duration retryInterval = errorHandling.backoffInterval().multipliedBy(errorCount - errorHandling.retriesBeforeBackoff() + 1L);
return limitToMaxInterval(instantSource.instant().plus(retryInterval));
}
private Instant getDefaultRefreshInterval() {
return Instant.now().plus(refreshInterval);
}
private Instant computeEmpiricalRefreshInterval(Instant publishedDate, Long averageEntryInterval, Instant defaultRefreshInterval) {
Instant now = Instant.now();
private Instant computeEmpiricalRefreshInterval(Instant publishedDate, Long averageEntryInterval) {
Instant now = instantSource.instant();
if (publishedDate == null) {
// feed with no entries, recheck in 24 hours
return now.plus(Duration.ofHours(24));
} else if (ChronoUnit.DAYS.between(publishedDate, now) >= 30) {
// older than a month, recheck in 24 hours
return now.plus(Duration.ofHours(24));
} else if (ChronoUnit.DAYS.between(publishedDate, now) >= 14) {
// older than two weeks, recheck in 12 hours
return now.plus(Duration.ofHours(12));
} else if (ChronoUnit.DAYS.between(publishedDate, now) >= 7) {
// older than a week, recheck in 6 hours
return now.plus(Duration.ofHours(6));
return now.plus(maxInterval);
}
long daysSinceLastPublication = ChronoUnit.DAYS.between(publishedDate, now);
if (daysSinceLastPublication >= 30) {
return now.plus(maxInterval);
} else if (daysSinceLastPublication >= 14) {
return now.plus(maxInterval.dividedBy(2));
} else if (daysSinceLastPublication >= 7) {
return now.plus(maxInterval.dividedBy(4));
} else if (averageEntryInterval != null) {
// use average time between entries to decide when to refresh next, divided by factor
int factor = 2;
// not more than 6 hours
long date = Math.min(now.plus(Duration.ofHours(6)).toEpochMilli(), now.toEpochMilli() + averageEntryInterval / factor);
// not less than default refresh interval
date = Math.max(defaultRefreshInterval.toEpochMilli(), date);
return Instant.ofEpochMilli(date);
long millis = Longs.constrainToRange(averageEntryInterval / factor, interval.toMillis(), maxInterval.dividedBy(4).toMillis());
return now.plusMillis(millis);
} else {
// unknown case, recheck in 24 hours
return now.plus(Duration.ofHours(24));
// unknown case
return now.plus(maxInterval);
}
}
private Instant limitToMaxInterval(Instant instant) {
return ObjectUtils.min(instant, instantSource.instant().plus(maxInterval));
}
}

View File

@@ -6,7 +6,6 @@ import java.util.Collections;
import java.util.List;
import java.util.Optional;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import com.codahale.metrics.Meter;
@@ -78,9 +77,8 @@ public class FeedRefreshWorker {
feed.setErrorCount(0);
feed.setMessage(null);
feed.setDisabledUntil(ObjectUtils.max(
refreshIntervalCalculator.onFetchSuccess(result.feed().lastPublishedDate(), result.feed().averageEntryInterval()),
Instant.now().plus(result.validFor())));
feed.setDisabledUntil(refreshIntervalCalculator.onFetchSuccess(result.feed().lastPublishedDate(),
result.feed().averageEntryInterval(), result.validFor()));
return new FeedRefreshWorkerResult(feed, entries);
} catch (NotModifiedException e) {

View File

@@ -0,0 +1,278 @@
package com.commafeed.backend.feed;
import java.time.Duration;
import java.time.Instant;
import java.time.InstantSource;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
import com.commafeed.CommaFeedConfiguration;
import com.commafeed.CommaFeedConfiguration.FeedRefreshErrorHandling;
@ExtendWith(MockitoExtension.class)
class FeedRefreshIntervalCalculatorTest {
private static final Instant NOW = Instant.now();
private static final Duration DEFAULT_INTERVAL = Duration.ofHours(1);
private static final Duration MAX_INTERVAL = Duration.ofDays(1);
@Mock
private InstantSource instantSource;
@Mock
private CommaFeedConfiguration config;
@Mock
private FeedRefreshErrorHandling errorHandling;
private FeedRefreshIntervalCalculator calculator;
@BeforeEach
void setUp() {
Mockito.when(instantSource.instant()).thenReturn(NOW);
Mockito.when(config.feedRefresh()).thenReturn(Mockito.mock(CommaFeedConfiguration.FeedRefresh.class));
Mockito.when(config.feedRefresh().interval()).thenReturn(DEFAULT_INTERVAL);
Mockito.when(config.feedRefresh().maxInterval()).thenReturn(MAX_INTERVAL);
Mockito.when(config.feedRefresh().errors()).thenReturn(errorHandling);
calculator = new FeedRefreshIntervalCalculator(config, instantSource);
}
@Nested
class FetchSuccess {
@Nested
class EmpiricalDisabled {
@ParameterizedTest
@ValueSource(longs = { 0, 1, 300, 86400000L })
void withoutValidFor(long averageEntryInterval) {
// averageEntryInterval is ignored when empirical is disabled
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), averageEntryInterval, Duration.ZERO);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void withValidForGreaterThanMaxInterval() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), 1L, MAX_INTERVAL.plusDays(1));
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void withValidForLowerThanMaxInterval() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), 1L, MAX_INTERVAL.minusSeconds(1));
Assertions.assertEquals(NOW.plus(MAX_INTERVAL).minusSeconds(1), result);
}
}
@Nested
class EmpiricalEnabled {
@BeforeEach
void setUp() {
Mockito.when(config.feedRefresh().intervalEmpirical()).thenReturn(true);
calculator = new FeedRefreshIntervalCalculator(config, instantSource);
}
@Test
void withNullPublishedDate() {
Instant result = calculator.onFetchSuccess(null, 1L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void with31DaysOldPublishedDate() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(31)), 1L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void with15DaysOldPublishedDate() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(15)), 1L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(2)), result);
}
@Test
void with8DaysOldPublishedDate() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(8)), 1L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(4)), result);
}
@Nested
class FiveDaysOld {
@Test
void averageBetweenBounds() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), Duration.ofHours(4).toMillis(),
Duration.ZERO);
Assertions.assertEquals(NOW.plus(Duration.ofHours(2)), result);
}
@Test
void averageBelowMinimum() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), 10L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void averageAboveMaximum() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), Long.MAX_VALUE, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(4)), result);
}
@Test
void noAverage() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), null, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
}
}
}
@Nested
class FeedNotModified {
@Nested
class EmpiricalDisabled {
@ParameterizedTest
@ValueSource(longs = { 0, 1, 300, 86400000L })
void withoutValidFor(long averageEntryInterval) {
// averageEntryInterval is ignored when empirical is disabled
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), averageEntryInterval);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
}
@Nested
class EmpiricalEnabled {
@BeforeEach
void setUp() {
Mockito.when(config.feedRefresh().intervalEmpirical()).thenReturn(true);
calculator = new FeedRefreshIntervalCalculator(config, instantSource);
}
@Test
void withNullPublishedDate() {
Instant result = calculator.onFeedNotModified(null, 1L);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void with31DaysOldPublishedDate() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(31)), 1L);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void with15DaysOldPublishedDate() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(15)), 1L);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(2)), result);
}
@Test
void with8DaysOldPublishedDate() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(8)), 1L);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(4)), result);
}
@Nested
class FiveDaysOld {
@Test
void averageBetweenBounds() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), Duration.ofHours(4).toMillis());
Assertions.assertEquals(NOW.plus(Duration.ofHours(2)), result);
}
@Test
void averageBelowMinimum() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), 10L);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void averageAboveMaximum() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), Long.MAX_VALUE);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(4)), result);
}
@Test
void noAverage() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), null);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
}
}
}
@Nested
class FetchError {
@BeforeEach
void setUp() {
Mockito.when(config.feedRefresh().errors().retriesBeforeBackoff()).thenReturn(3);
}
@Test
void lowErrorCount() {
Instant result = calculator.onFetchError(1);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void highErrorCount() {
Mockito.when(config.feedRefresh().errors().backoffInterval()).thenReturn(Duration.ofHours(1));
Instant result = calculator.onFetchError(5);
Assertions.assertEquals(NOW.plus(Duration.ofHours(3)), result);
}
@Test
void veryHighErrorCount() {
Mockito.when(config.feedRefresh().errors().backoffInterval()).thenReturn(Duration.ofHours(1));
Instant result = calculator.onFetchError(100000);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
}
@Nested
class TooManyRequests {
@BeforeEach
void setUp() {
Mockito.when(config.feedRefresh().errors().retriesBeforeBackoff()).thenReturn(3);
}
@Test
void withRetryAfterZero() {
Instant result = calculator.onTooManyRequests(NOW, 1);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void withRetryAfterLowerThanInterval() {
Instant retryAfter = NOW.plus(DEFAULT_INTERVAL.minusSeconds(10));
Instant result = calculator.onTooManyRequests(retryAfter, 1);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void withRetryAfterBetweenBounds() {
Instant retryAfter = NOW.plus(DEFAULT_INTERVAL.plusSeconds(10));
Instant result = calculator.onTooManyRequests(retryAfter, 1);
Assertions.assertEquals(retryAfter, result);
}
@Test
void withRetryAfterGreaterThanMaxInterval() {
Instant retryAfter = NOW.plus(MAX_INTERVAL.plusSeconds(10));
Instant result = calculator.onTooManyRequests(retryAfter, 1);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
}
}