don't reuse a content if other fields are different (#800)

This commit is contained in:
Athou
2022-01-02 21:45:21 +01:00
parent 78b637c83b
commit a77a860e0c
3 changed files with 38 additions and 21 deletions

View File

@@ -24,11 +24,8 @@ public class FeedEntryContentDAO extends GenericDAO<FeedEntryContent> {
super(sessionFactory); super(sessionFactory);
} }
public Long findExisting(String contentHash, String titleHash) { public List<FeedEntryContent> findExisting(String contentHash, String titleHash) {
return query().select(content.id) return query().select(content).from(content).where(content.contentHash.eq(contentHash), content.titleHash.eq(titleHash)).fetch();
.from(content)
.where(content.contentHash.eq(contentHash), content.titleHash.eq(titleHash))
.fetchFirst();
} }
public int deleteWithoutEntries(int max) { public int deleteWithoutEntries(int max) {

View File

@@ -8,6 +8,7 @@ import javax.persistence.Lob;
import javax.persistence.OneToMany; import javax.persistence.OneToMany;
import javax.persistence.Table; import javax.persistence.Table;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.hibernate.annotations.Type; import org.hibernate.annotations.Type;
import lombok.Getter; import lombok.Getter;
@@ -60,4 +61,22 @@ public class FeedEntryContent extends AbstractModel {
@OneToMany(mappedBy = "content") @OneToMany(mappedBy = "content")
private Set<FeedEntry> entries; private Set<FeedEntry> entries;
public boolean equivalentTo(FeedEntryContent c) {
if (c == null) {
return false;
}
return new EqualsBuilder().append(title, c.title)
.append(content, c.content)
.append(author, c.author)
.append(enclosureUrl, c.enclosureUrl)
.append(enclosureType, c.enclosureType)
.append(mediaDescription, c.mediaDescription)
.append(mediaThumbnailUrl, c.mediaThumbnailUrl)
.append(mediaThumbnailWidth, c.mediaThumbnailWidth)
.append(mediaThumbnailHeight, c.mediaThumbnailHeight)
.append(categories, c.categories)
.build();
}
} }

View File

@@ -1,5 +1,8 @@
package com.commafeed.backend.service; package com.commafeed.backend.service;
import java.util.List;
import java.util.Optional;
import javax.inject.Inject; import javax.inject.Inject;
import javax.inject.Singleton; import javax.inject.Singleton;
@@ -22,26 +25,24 @@ public class FeedEntryContentService {
* this is NOT thread-safe * this is NOT thread-safe
*/ */
public FeedEntryContent findOrCreate(FeedEntryContent content, String baseUrl) { public FeedEntryContent findOrCreate(FeedEntryContent content, String baseUrl) {
content.setAuthor(FeedUtils.truncate(FeedUtils.handleContent(content.getAuthor(), baseUrl, true), 128));
content.setTitle(FeedUtils.truncate(FeedUtils.handleContent(content.getTitle(), baseUrl, true), 2048));
content.setContent(FeedUtils.handleContent(content.getContent(), baseUrl, false));
content.setMediaDescription(FeedUtils.handleContent(content.getMediaDescription(), baseUrl, false));
String contentHash = DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.getContent())); String contentHash = DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.getContent()));
content.setContentHash(contentHash);
String titleHash = DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.getTitle())); String titleHash = DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.getTitle()));
Long existingId = feedEntryContentDAO.findExisting(contentHash, titleHash); content.setTitleHash(titleHash);
FeedEntryContent result = null; List<FeedEntryContent> existing = feedEntryContentDAO.findExisting(contentHash, titleHash);
if (existingId == null) { Optional<FeedEntryContent> equivalentContent = existing.stream().filter(c -> content.equivalentTo(c)).findFirst();
content.setContentHash(contentHash); if (equivalentContent.isPresent()) {
content.setTitleHash(titleHash); return equivalentContent.get();
content.setAuthor(FeedUtils.truncate(FeedUtils.handleContent(content.getAuthor(), baseUrl, true), 128));
content.setTitle(FeedUtils.truncate(FeedUtils.handleContent(content.getTitle(), baseUrl, true), 2048));
content.setContent(FeedUtils.handleContent(content.getContent(), baseUrl, false));
content.setMediaDescription(FeedUtils.handleContent(content.getMediaDescription(), baseUrl, false));
result = content;
feedEntryContentDAO.saveOrUpdate(result);
} else {
result = new FeedEntryContent();
result.setId(existingId);
} }
return result;
feedEntryContentDAO.saveOrUpdate(content);
return content;
} }
} }