compare feed entry content after cleanup because that's what saved in the database

This commit is contained in:
Athou
2024-01-07 14:57:40 +01:00
parent ed45746f52
commit 789857b09f
2 changed files with 34 additions and 38 deletions

View File

@@ -3,6 +3,7 @@ package com.commafeed.backend.model;
import java.sql.Types;
import java.util.Set;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.hibernate.annotations.JdbcTypeCode;
import jakarta.persistence.Column;
@@ -60,4 +61,22 @@ public class FeedEntryContent extends AbstractModel {
@OneToMany(mappedBy = "content")
private Set<FeedEntry> entries;
public boolean equivalentTo(FeedEntryContent c) {
if (c == null) {
return false;
}
return new EqualsBuilder().append(title, c.title)
.append(content, c.content)
.append(author, c.author)
.append(categories, c.categories)
.append(enclosureUrl, c.enclosureUrl)
.append(enclosureType, c.enclosureType)
.append(mediaDescription, c.mediaDescription)
.append(mediaThumbnailUrl, c.mediaThumbnailUrl)
.append(mediaThumbnailWidth, c.mediaThumbnailWidth)
.append(mediaThumbnailHeight, c.mediaThumbnailHeight)
.build();
}
}

View File

@@ -1,11 +1,9 @@
package com.commafeed.backend.service;
import java.util.List;
import java.util.Optional;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.builder.EqualsBuilder;
import com.commafeed.backend.dao.FeedEntryContentDAO;
import com.commafeed.backend.feed.FeedUtils;
@@ -29,25 +27,25 @@ public class FeedEntryContentService {
* this is NOT thread-safe
*/
public FeedEntryContent findOrCreate(Content content, String baseUrl) {
String title = FeedUtils.truncate(cleaningService.clean(content.title(), baseUrl, true), 2048);
String titleHash = DigestUtils.sha1Hex(StringUtils.trimToEmpty(title));
String contentString = cleaningService.clean(content.content(), baseUrl, false);
String contentHash = DigestUtils.sha1Hex(StringUtils.trimToEmpty(contentString));
List<FeedEntryContent> existing = feedEntryContentDAO.findExisting(contentHash, titleHash);
Optional<FeedEntryContent> equivalentContent = existing.stream()
.filter(c -> isEquivalent(c, content, title, contentString))
FeedEntryContent entryContent = buildContent(content, baseUrl);
Optional<FeedEntryContent> existing = feedEntryContentDAO.findExisting(entryContent.getContentHash(), entryContent.getTitleHash())
.stream()
.filter(entryContent::equivalentTo)
.findFirst();
if (equivalentContent.isPresent()) {
return equivalentContent.get();
if (existing.isPresent()) {
return existing.get();
} else {
feedEntryContentDAO.saveOrUpdate(entryContent);
return entryContent;
}
}
private FeedEntryContent buildContent(Content content, String baseUrl) {
FeedEntryContent entryContent = new FeedEntryContent();
entryContent.setTitle(title);
entryContent.setTitleHash(titleHash);
entryContent.setContent(contentString);
entryContent.setContentHash(contentHash);
entryContent.setTitleHash(DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.title())));
entryContent.setContentHash(DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.content())));
entryContent.setTitle(FeedUtils.truncate(cleaningService.clean(content.title(), baseUrl, true), 2048));
entryContent.setContent(cleaningService.clean(content.content(), baseUrl, false));
entryContent.setAuthor(FeedUtils.truncate(cleaningService.clean(content.author(), baseUrl, true), 128));
entryContent.setCategories(FeedUtils.truncate(content.categories(), 4096));
@@ -65,28 +63,7 @@ public class FeedEntryContentService {
entryContent.setMediaThumbnailHeight(media.thumbnailHeight());
}
feedEntryContentDAO.saveOrUpdate(entryContent);
return entryContent;
}
private boolean isEquivalent(FeedEntryContent content, Content c, String title, String contentString) {
EqualsBuilder builder = new EqualsBuilder().append(content.getTitle(), title)
.append(content.getContent(), contentString)
.append(content.getAuthor(), c.author())
.append(content.getCategories(), c.categories());
if (c.enclosure() != null) {
builder.append(content.getEnclosureUrl(), c.enclosure().url()).append(content.getEnclosureType(), c.enclosure().type());
}
if (c.media() != null) {
builder.append(content.getMediaDescription(), c.media().description())
.append(content.getMediaThumbnailUrl(), c.media().thumbnailUrl())
.append(content.getMediaThumbnailWidth(), c.media().thumbnailWidth())
.append(content.getMediaThumbnailHeight(), c.media().thumbnailHeight());
}
return builder.build();
}
}