feed refresh engine previously hardcoded values are now configurable (#1677)

This commit is contained in:
Athou
2025-02-11 22:14:32 +01:00
parent bde556d41f
commit 469420b5bf
5 changed files with 435 additions and 53 deletions

View File

@@ -366,7 +366,7 @@ Feed refresh engine settings
`commafeed.feed-refresh.interval` `commafeed.feed-refresh.interval`
Amount of time CommaFeed will wait before refreshing the same feed. Default amount of time CommaFeed will wait before refreshing a feed.
@@ -383,10 +383,36 @@ Environment variable: `COMMAFEED_FEED_REFRESH_INTERVAL`</td>
<tr> <tr>
<td> <td>
`commafeed.feed-refresh.max-interval`
Maximum amount of time CommaFeed will wait before refreshing a feed. This is used as an upper bound when:
<ul>
<li>an error occurs while refreshing a feed and we're backing off exponentially</li>
<li>we receive a Cache-Control header from the feed</li>
<li>we receive a Retry-After header from the feed</li>
</ul>
Environment variable: `COMMAFEED_FEED_REFRESH_MAX_INTERVAL`</td>
<td>
[Duration](https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/Duration.html) [🛈](#duration-note-anchor)
</td>
<td>
`24H`
</td>
</tr>
<tr>
<td>
`commafeed.feed-refresh.interval-empirical` `commafeed.feed-refresh.interval-empirical`
If true, CommaFeed will calculate the next refresh time based on the feed's average time between entries and the time since the If enabled, CommaFeed will calculate the next refresh time based on the feed's average time between entries and the time since
last entry was published. The interval will be somewhere between the default refresh interval and 24h. the last entry was published. The interval will be sometimes between the default refresh interval
(`commafeed.feed-refresh.interval`) and the maximum refresh interval (`commafeed.feed-refresh.max-interval`).
See <code>FeedRefreshIntervalCalculator</code> for details. See <code>FeedRefreshIntervalCalculator</code> for details.
@@ -502,6 +528,52 @@ Environment variable: `COMMAFEED_FEED_REFRESH_FORCE_REFRESH_COOLDOWN_DURATION`</
<thead> <thead>
<tr> <tr>
<th align="left" colspan="3"> <th align="left" colspan="3">
&nbsp;&nbsp;&nbsp;&nbsp;Feed refresh engine error handling settings
</th>
</tr>
</thead>
<tr>
<td>
`commafeed.feed-refresh.errors.retries-before-backoff`
Number of retries before backoff is applied.
Environment variable: `COMMAFEED_FEED_REFRESH_ERRORS_RETRIES_BEFORE_BACKOFF`</td>
<td>
int
</td>
<td>
`3`
</td>
</tr>
<tr>
<td>
`commafeed.feed-refresh.errors.backoff-interval`
Duration to wait before retrying after an error. Will be multiplied by the number of errors since the last successful fetch.
Environment variable: `COMMAFEED_FEED_REFRESH_ERRORS_BACKOFF_INTERVAL`</td>
<td>
[Duration](https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/Duration.html) [🛈](#duration-note-anchor)
</td>
<td>
`1H`
</td>
</tr>
<thead>
<tr>
<th align="left" colspan="3">
Database settings Database settings
</th> </th>
</tr> </tr>

View File

@@ -168,20 +168,39 @@ public interface CommaFeedConfiguration {
interface FeedRefresh { interface FeedRefresh {
/** /**
* Amount of time CommaFeed will wait before refreshing the same feed. * Default amount of time CommaFeed will wait before refreshing a feed.
*/ */
@WithDefault("5m") @WithDefault("5m")
Duration interval(); Duration interval();
/** /**
* If true, CommaFeed will calculate the next refresh time based on the feed's average time between entries and the time since the * Maximum amount of time CommaFeed will wait before refreshing a feed. This is used as an upper bound when:
* last entry was published. The interval will be somewhere between the default refresh interval and 24h. *
* <ul>
* <li>an error occurs while refreshing a feed and we're backing off exponentially</li>
* <li>we receive a Cache-Control header from the feed</li>
* <li>we receive a Retry-After header from the feed</li>
* </ul>
*/
@WithDefault("24h")
Duration maxInterval();
/**
* If enabled, CommaFeed will calculate the next refresh time based on the feed's average time between entries and the time since
* the last entry was published. The interval will be sometimes between the default refresh interval
* (`commafeed.feed-refresh.interval`) and the maximum refresh interval (`commafeed.feed-refresh.max-interval`).
* *
* See {@link FeedRefreshIntervalCalculator} for details. * See {@link FeedRefreshIntervalCalculator} for details.
*/ */
@WithDefault("false") @WithDefault("false")
boolean intervalEmpirical(); boolean intervalEmpirical();
/**
* Feed refresh engine error handling settings.
*/
@ConfigDocSection
FeedRefreshErrorHandling errors();
/** /**
* Amount of http threads used to fetch feeds. * Amount of http threads used to fetch feeds.
*/ */
@@ -217,6 +236,21 @@ public interface CommaFeedConfiguration {
Duration forceRefreshCooldownDuration(); Duration forceRefreshCooldownDuration();
} }
interface FeedRefreshErrorHandling {
/**
* Number of retries before backoff is applied.
*/
@Min(0)
@WithDefault("3")
int retriesBeforeBackoff();
/**
* Duration to wait before retrying after an error. Will be multiplied by the number of errors since the last successful fetch.
*/
@WithDefault("1h")
Duration backoffInterval();
}
interface Database { interface Database {
/** /**
* Timeout applied to all database queries. * Timeout applied to all database queries.

View File

@@ -2,83 +2,83 @@ package com.commafeed.backend.feed;
import java.time.Duration; import java.time.Duration;
import java.time.Instant; import java.time.Instant;
import java.time.InstantSource;
import java.time.temporal.ChronoUnit; import java.time.temporal.ChronoUnit;
import org.apache.commons.lang3.ObjectUtils; import org.apache.commons.lang3.ObjectUtils;
import com.commafeed.CommaFeedConfiguration; import com.commafeed.CommaFeedConfiguration;
import com.commafeed.CommaFeedConfiguration.FeedRefreshErrorHandling;
import com.google.common.primitives.Longs;
import jakarta.inject.Singleton; import jakarta.inject.Singleton;
@Singleton @Singleton
public class FeedRefreshIntervalCalculator { public class FeedRefreshIntervalCalculator {
private final Duration refreshInterval; private final Duration interval;
private final boolean empiricalInterval; private final Duration maxInterval;
private final boolean empirical;
private final FeedRefreshErrorHandling errorHandling;
private final InstantSource instantSource;
public FeedRefreshIntervalCalculator(CommaFeedConfiguration config) { public FeedRefreshIntervalCalculator(CommaFeedConfiguration config, InstantSource instantSource) {
this.refreshInterval = config.feedRefresh().interval(); this.interval = config.feedRefresh().interval();
this.empiricalInterval = config.feedRefresh().intervalEmpirical(); this.maxInterval = config.feedRefresh().maxInterval();
this.empirical = config.feedRefresh().intervalEmpirical();
this.errorHandling = config.feedRefresh().errors();
this.instantSource = instantSource;
} }
public Instant onFetchSuccess(Instant publishedDate, Long averageEntryInterval) { public Instant onFetchSuccess(Instant publishedDate, Long averageEntryInterval, Duration validFor) {
Instant defaultRefreshInterval = getDefaultRefreshInterval(); Instant instant = empirical ? computeEmpiricalRefreshInterval(publishedDate, averageEntryInterval)
return empiricalInterval ? computeEmpiricalRefreshInterval(publishedDate, averageEntryInterval, defaultRefreshInterval) : instantSource.instant().plus(interval);
: defaultRefreshInterval; return limitToMaxInterval(ObjectUtils.max(instant, instantSource.instant().plus(validFor)));
} }
public Instant onFeedNotModified(Instant publishedDate, Long averageEntryInterval) { public Instant onFeedNotModified(Instant publishedDate, Long averageEntryInterval) {
return onFetchSuccess(publishedDate, averageEntryInterval); return onFetchSuccess(publishedDate, averageEntryInterval, Duration.ZERO);
} }
public Instant onTooManyRequests(Instant retryAfter, int errorCount) { public Instant onTooManyRequests(Instant retryAfter, int errorCount) {
return ObjectUtils.max(retryAfter, onFetchError(errorCount)); return limitToMaxInterval(ObjectUtils.max(retryAfter, onFetchError(errorCount)));
} }
public Instant onFetchError(int errorCount) { public Instant onFetchError(int errorCount) {
int retriesBeforeDisable = 3; if (errorCount < errorHandling.retriesBeforeBackoff()) {
if (errorCount < retriesBeforeDisable || !empiricalInterval) { return limitToMaxInterval(instantSource.instant().plus(interval));
return getDefaultRefreshInterval();
} }
int disabledHours = Math.min(24 * 7, errorCount - retriesBeforeDisable + 1); Duration retryInterval = errorHandling.backoffInterval().multipliedBy(errorCount - errorHandling.retriesBeforeBackoff() + 1L);
return Instant.now().plus(Duration.ofHours(disabledHours)); return limitToMaxInterval(instantSource.instant().plus(retryInterval));
} }
private Instant getDefaultRefreshInterval() { private Instant computeEmpiricalRefreshInterval(Instant publishedDate, Long averageEntryInterval) {
return Instant.now().plus(refreshInterval); Instant now = instantSource.instant();
}
private Instant computeEmpiricalRefreshInterval(Instant publishedDate, Long averageEntryInterval, Instant defaultRefreshInterval) {
Instant now = Instant.now();
if (publishedDate == null) { if (publishedDate == null) {
// feed with no entries, recheck in 24 hours return now.plus(maxInterval);
return now.plus(Duration.ofHours(24)); }
} else if (ChronoUnit.DAYS.between(publishedDate, now) >= 30) {
// older than a month, recheck in 24 hours long daysSinceLastPublication = ChronoUnit.DAYS.between(publishedDate, now);
return now.plus(Duration.ofHours(24)); if (daysSinceLastPublication >= 30) {
} else if (ChronoUnit.DAYS.between(publishedDate, now) >= 14) { return now.plus(maxInterval);
// older than two weeks, recheck in 12 hours } else if (daysSinceLastPublication >= 14) {
return now.plus(Duration.ofHours(12)); return now.plus(maxInterval.dividedBy(2));
} else if (ChronoUnit.DAYS.between(publishedDate, now) >= 7) { } else if (daysSinceLastPublication >= 7) {
// older than a week, recheck in 6 hours return now.plus(maxInterval.dividedBy(4));
return now.plus(Duration.ofHours(6));
} else if (averageEntryInterval != null) { } else if (averageEntryInterval != null) {
// use average time between entries to decide when to refresh next, divided by factor // use average time between entries to decide when to refresh next, divided by factor
int factor = 2; int factor = 2;
long millis = Longs.constrainToRange(averageEntryInterval / factor, interval.toMillis(), maxInterval.dividedBy(4).toMillis());
// not more than 6 hours return now.plusMillis(millis);
long date = Math.min(now.plus(Duration.ofHours(6)).toEpochMilli(), now.toEpochMilli() + averageEntryInterval / factor);
// not less than default refresh interval
date = Math.max(defaultRefreshInterval.toEpochMilli(), date);
return Instant.ofEpochMilli(date);
} else { } else {
// unknown case, recheck in 24 hours // unknown case
return now.plus(Duration.ofHours(24)); return now.plus(maxInterval);
} }
} }
private Instant limitToMaxInterval(Instant instant) {
return ObjectUtils.min(instant, instantSource.instant().plus(maxInterval));
}
} }

View File

@@ -6,7 +6,6 @@ import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import com.codahale.metrics.Meter; import com.codahale.metrics.Meter;
@@ -78,9 +77,8 @@ public class FeedRefreshWorker {
feed.setErrorCount(0); feed.setErrorCount(0);
feed.setMessage(null); feed.setMessage(null);
feed.setDisabledUntil(ObjectUtils.max( feed.setDisabledUntil(refreshIntervalCalculator.onFetchSuccess(result.feed().lastPublishedDate(),
refreshIntervalCalculator.onFetchSuccess(result.feed().lastPublishedDate(), result.feed().averageEntryInterval()), result.feed().averageEntryInterval(), result.validFor()));
Instant.now().plus(result.validFor())));
return new FeedRefreshWorkerResult(feed, entries); return new FeedRefreshWorkerResult(feed, entries);
} catch (NotModifiedException e) { } catch (NotModifiedException e) {

View File

@@ -0,0 +1,278 @@
package com.commafeed.backend.feed;
import java.time.Duration;
import java.time.Instant;
import java.time.InstantSource;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
import com.commafeed.CommaFeedConfiguration;
import com.commafeed.CommaFeedConfiguration.FeedRefreshErrorHandling;
@ExtendWith(MockitoExtension.class)
class FeedRefreshIntervalCalculatorTest {
private static final Instant NOW = Instant.now();
private static final Duration DEFAULT_INTERVAL = Duration.ofHours(1);
private static final Duration MAX_INTERVAL = Duration.ofDays(1);
@Mock
private InstantSource instantSource;
@Mock
private CommaFeedConfiguration config;
@Mock
private FeedRefreshErrorHandling errorHandling;
private FeedRefreshIntervalCalculator calculator;
@BeforeEach
void setUp() {
Mockito.when(instantSource.instant()).thenReturn(NOW);
Mockito.when(config.feedRefresh()).thenReturn(Mockito.mock(CommaFeedConfiguration.FeedRefresh.class));
Mockito.when(config.feedRefresh().interval()).thenReturn(DEFAULT_INTERVAL);
Mockito.when(config.feedRefresh().maxInterval()).thenReturn(MAX_INTERVAL);
Mockito.when(config.feedRefresh().errors()).thenReturn(errorHandling);
calculator = new FeedRefreshIntervalCalculator(config, instantSource);
}
@Nested
class FetchSuccess {
@Nested
class EmpiricalDisabled {
@ParameterizedTest
@ValueSource(longs = { 0, 1, 300, 86400000L })
void withoutValidFor(long averageEntryInterval) {
// averageEntryInterval is ignored when empirical is disabled
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), averageEntryInterval, Duration.ZERO);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void withValidForGreaterThanMaxInterval() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), 1L, MAX_INTERVAL.plusDays(1));
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void withValidForLowerThanMaxInterval() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), 1L, MAX_INTERVAL.minusSeconds(1));
Assertions.assertEquals(NOW.plus(MAX_INTERVAL).minusSeconds(1), result);
}
}
@Nested
class EmpiricalEnabled {
@BeforeEach
void setUp() {
Mockito.when(config.feedRefresh().intervalEmpirical()).thenReturn(true);
calculator = new FeedRefreshIntervalCalculator(config, instantSource);
}
@Test
void withNullPublishedDate() {
Instant result = calculator.onFetchSuccess(null, 1L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void with31DaysOldPublishedDate() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(31)), 1L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void with15DaysOldPublishedDate() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(15)), 1L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(2)), result);
}
@Test
void with8DaysOldPublishedDate() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(8)), 1L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(4)), result);
}
@Nested
class FiveDaysOld {
@Test
void averageBetweenBounds() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), Duration.ofHours(4).toMillis(),
Duration.ZERO);
Assertions.assertEquals(NOW.plus(Duration.ofHours(2)), result);
}
@Test
void averageBelowMinimum() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), 10L, Duration.ZERO);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void averageAboveMaximum() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), Long.MAX_VALUE, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(4)), result);
}
@Test
void noAverage() {
Instant result = calculator.onFetchSuccess(NOW.minus(Duration.ofDays(5)), null, Duration.ZERO);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
}
}
}
@Nested
class FeedNotModified {
@Nested
class EmpiricalDisabled {
@ParameterizedTest
@ValueSource(longs = { 0, 1, 300, 86400000L })
void withoutValidFor(long averageEntryInterval) {
// averageEntryInterval is ignored when empirical is disabled
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), averageEntryInterval);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
}
@Nested
class EmpiricalEnabled {
@BeforeEach
void setUp() {
Mockito.when(config.feedRefresh().intervalEmpirical()).thenReturn(true);
calculator = new FeedRefreshIntervalCalculator(config, instantSource);
}
@Test
void withNullPublishedDate() {
Instant result = calculator.onFeedNotModified(null, 1L);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void with31DaysOldPublishedDate() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(31)), 1L);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
@Test
void with15DaysOldPublishedDate() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(15)), 1L);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(2)), result);
}
@Test
void with8DaysOldPublishedDate() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(8)), 1L);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(4)), result);
}
@Nested
class FiveDaysOld {
@Test
void averageBetweenBounds() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), Duration.ofHours(4).toMillis());
Assertions.assertEquals(NOW.plus(Duration.ofHours(2)), result);
}
@Test
void averageBelowMinimum() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), 10L);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void averageAboveMaximum() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), Long.MAX_VALUE);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL.dividedBy(4)), result);
}
@Test
void noAverage() {
Instant result = calculator.onFeedNotModified(NOW.minus(Duration.ofDays(5)), null);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
}
}
}
@Nested
class FetchError {
@BeforeEach
void setUp() {
Mockito.when(config.feedRefresh().errors().retriesBeforeBackoff()).thenReturn(3);
}
@Test
void lowErrorCount() {
Instant result = calculator.onFetchError(1);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void highErrorCount() {
Mockito.when(config.feedRefresh().errors().backoffInterval()).thenReturn(Duration.ofHours(1));
Instant result = calculator.onFetchError(5);
Assertions.assertEquals(NOW.plus(Duration.ofHours(3)), result);
}
@Test
void veryHighErrorCount() {
Mockito.when(config.feedRefresh().errors().backoffInterval()).thenReturn(Duration.ofHours(1));
Instant result = calculator.onFetchError(100000);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
}
@Nested
class TooManyRequests {
@BeforeEach
void setUp() {
Mockito.when(config.feedRefresh().errors().retriesBeforeBackoff()).thenReturn(3);
}
@Test
void withRetryAfterZero() {
Instant result = calculator.onTooManyRequests(NOW, 1);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void withRetryAfterLowerThanInterval() {
Instant retryAfter = NOW.plus(DEFAULT_INTERVAL.minusSeconds(10));
Instant result = calculator.onTooManyRequests(retryAfter, 1);
Assertions.assertEquals(NOW.plus(DEFAULT_INTERVAL), result);
}
@Test
void withRetryAfterBetweenBounds() {
Instant retryAfter = NOW.plus(DEFAULT_INTERVAL.plusSeconds(10));
Instant result = calculator.onTooManyRequests(retryAfter, 1);
Assertions.assertEquals(retryAfter, result);
}
@Test
void withRetryAfterGreaterThanMaxInterval() {
Instant retryAfter = NOW.plus(MAX_INTERVAL.plusSeconds(10));
Instant result = calculator.onTooManyRequests(retryAfter, 1);
Assertions.assertEquals(NOW.plus(MAX_INTERVAL), result);
}
}
}