don't reuse a content if other fields are different (#800)

This commit is contained in:
Athou
2022-01-02 21:45:21 +01:00
parent 78b637c83b
commit a77a860e0c
3 changed files with 38 additions and 21 deletions

View File

@@ -24,11 +24,8 @@ public class FeedEntryContentDAO extends GenericDAO<FeedEntryContent> {
super(sessionFactory);
}
public Long findExisting(String contentHash, String titleHash) {
return query().select(content.id)
.from(content)
.where(content.contentHash.eq(contentHash), content.titleHash.eq(titleHash))
.fetchFirst();
public List<FeedEntryContent> findExisting(String contentHash, String titleHash) {
return query().select(content).from(content).where(content.contentHash.eq(contentHash), content.titleHash.eq(titleHash)).fetch();
}
public int deleteWithoutEntries(int max) {

View File

@@ -8,6 +8,7 @@ import javax.persistence.Lob;
import javax.persistence.OneToMany;
import javax.persistence.Table;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.hibernate.annotations.Type;
import lombok.Getter;
@@ -60,4 +61,22 @@ public class FeedEntryContent extends AbstractModel {
@OneToMany(mappedBy = "content")
private Set<FeedEntry> entries;
public boolean equivalentTo(FeedEntryContent c) {
if (c == null) {
return false;
}
return new EqualsBuilder().append(title, c.title)
.append(content, c.content)
.append(author, c.author)
.append(enclosureUrl, c.enclosureUrl)
.append(enclosureType, c.enclosureType)
.append(mediaDescription, c.mediaDescription)
.append(mediaThumbnailUrl, c.mediaThumbnailUrl)
.append(mediaThumbnailWidth, c.mediaThumbnailWidth)
.append(mediaThumbnailHeight, c.mediaThumbnailHeight)
.append(categories, c.categories)
.build();
}
}

View File

@@ -1,5 +1,8 @@
package com.commafeed.backend.service;
import java.util.List;
import java.util.Optional;
import javax.inject.Inject;
import javax.inject.Singleton;
@@ -22,26 +25,24 @@ public class FeedEntryContentService {
* this is NOT thread-safe
*/
public FeedEntryContent findOrCreate(FeedEntryContent content, String baseUrl) {
content.setAuthor(FeedUtils.truncate(FeedUtils.handleContent(content.getAuthor(), baseUrl, true), 128));
content.setTitle(FeedUtils.truncate(FeedUtils.handleContent(content.getTitle(), baseUrl, true), 2048));
content.setContent(FeedUtils.handleContent(content.getContent(), baseUrl, false));
content.setMediaDescription(FeedUtils.handleContent(content.getMediaDescription(), baseUrl, false));
String contentHash = DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.getContent()));
content.setContentHash(contentHash);
String titleHash = DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.getTitle()));
Long existingId = feedEntryContentDAO.findExisting(contentHash, titleHash);
content.setTitleHash(titleHash);
FeedEntryContent result = null;
if (existingId == null) {
content.setContentHash(contentHash);
content.setTitleHash(titleHash);
content.setAuthor(FeedUtils.truncate(FeedUtils.handleContent(content.getAuthor(), baseUrl, true), 128));
content.setTitle(FeedUtils.truncate(FeedUtils.handleContent(content.getTitle(), baseUrl, true), 2048));
content.setContent(FeedUtils.handleContent(content.getContent(), baseUrl, false));
content.setMediaDescription(FeedUtils.handleContent(content.getMediaDescription(), baseUrl, false));
result = content;
feedEntryContentDAO.saveOrUpdate(result);
} else {
result = new FeedEntryContent();
result.setId(existingId);
List<FeedEntryContent> existing = feedEntryContentDAO.findExisting(contentHash, titleHash);
Optional<FeedEntryContent> equivalentContent = existing.stream().filter(c -> content.equivalentTo(c)).findFirst();
if (equivalentContent.isPresent()) {
return equivalentContent.get();
}
return result;
feedEntryContentDAO.saveOrUpdate(content);
return content;
}
}