store and use urlAfterRedirect if different than the actual url

This commit is contained in:
Athou
2013-08-22 15:55:05 +02:00
parent a14484ee03
commit 966caae727
4 changed files with 28 additions and 11 deletions

View File

@@ -12,6 +12,8 @@ import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang.time.DateUtils; import org.apache.commons.lang.time.DateUtils;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.MetricRegistry;
import com.commafeed.backend.HttpGetter.NotModifiedException; import com.commafeed.backend.HttpGetter.NotModifiedException;
@@ -86,17 +88,21 @@ public class FeedRefreshWorker {
int refreshInterval = applicationSettingsService.get().getRefreshIntervalMinutes(); int refreshInterval = applicationSettingsService.get().getRefreshIntervalMinutes();
Date disabledUntil = DateUtils.addMinutes(new Date(), refreshInterval); Date disabledUntil = DateUtils.addMinutes(new Date(), refreshInterval);
try { try {
FetchedFeed fetchedFeed = fetcher.fetch(feed.getUrl(), false, feed.getLastModifiedHeader(), feed.getEtagHeader(), String url = ObjectUtils.firstNonNull(feed.getUrlAfterRedirect(), feed.getUrl());
FetchedFeed fetchedFeed = fetcher.fetch(url, false, feed.getLastModifiedHeader(), feed.getEtagHeader(),
feed.getLastPublishedDate(), feed.getLastContentHash()); feed.getLastPublishedDate(), feed.getLastContentHash());
// stops here if NotModifiedException or any other exception is // stops here if NotModifiedException or any other exception is thrown
// thrown
List<FeedEntry> entries = fetchedFeed.getEntries(); List<FeedEntry> entries = fetchedFeed.getEntries();
if (applicationSettingsService.get().isHeavyLoad()) { if (applicationSettingsService.get().isHeavyLoad()) {
disabledUntil = FeedUtils.buildDisabledUntil(fetchedFeed.getFeed().getLastEntryDate(), fetchedFeed.getFeed() disabledUntil = FeedUtils.buildDisabledUntil(fetchedFeed.getFeed().getLastEntryDate(), fetchedFeed.getFeed()
.getAverageEntryInterval(), disabledUntil); .getAverageEntryInterval(), disabledUntil);
} }
String urlAfterRedirect = fetchedFeed.getUrlAfterRedirect();
if (StringUtils.equals(url, urlAfterRedirect)) {
urlAfterRedirect = null;
}
feed.setUrlAfterRedirect(urlAfterRedirect);
feed.setLink(fetchedFeed.getFeed().getLink()); feed.setLink(fetchedFeed.getFeed().getLink());
feed.setLastModifiedHeader(fetchedFeed.getFeed().getLastModifiedHeader()); feed.setLastModifiedHeader(fetchedFeed.getFeed().getLastModifiedHeader());
feed.setEtagHeader(fetchedFeed.getFeed().getEtagHeader()); feed.setEtagHeader(fetchedFeed.getFeed().getEtagHeader());

View File

@@ -33,6 +33,12 @@ public class Feed extends AbstractModel {
@Column(length = 2048, nullable = false) @Column(length = 2048, nullable = false)
private String url; private String url;
/**
* cache the url after potential http 30x redirects
*/
@Column(name = "url_after_redirect", length = 2048, nullable = false)
private String urlAfterRedirect;
@Column(length = 2048, nullable = false) @Column(length = 2048, nullable = false)
private String normalizedUrl; private String normalizedUrl;
@@ -130,11 +136,4 @@ public class Feed extends AbstractModel {
@Temporal(TemporalType.TIMESTAMP) @Temporal(TemporalType.TIMESTAMP)
private Date pushLastPing; private Date pushLastPing;
public Feed() {
}
public Feed(String url) {
this.url = url;
}
} }

View File

@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<databaseChangeLog xmlns="http://www.liquibase.org/xml/ns/dbchangelog" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-3.0.xsd">
<changeSet author="athou" id="add-url-after-redirect">
<addColumn tableName="FEEDS">
<column name="url_after_redirect" type="VARCHAR(2048)" />
</addColumn>
</changeSet>
</databaseChangeLog>

View File

@@ -6,5 +6,6 @@
<include file="changelogs/db.changelog-1.0.xml" /> <include file="changelogs/db.changelog-1.0.xml" />
<include file="changelogs/db.changelog-1.1.xml" /> <include file="changelogs/db.changelog-1.1.xml" />
<include file="changelogs/db.changelog-1.2.xml" /> <include file="changelogs/db.changelog-1.2.xml" />
<include file="changelogs/db.changelog-1.3.xml" />
</databaseChangeLog> </databaseChangeLog>