From 789857b09f6610bfcca929b0c4ad93ededdd9f16 Mon Sep 17 00:00:00 2001 From: Athou Date: Sun, 7 Jan 2024 14:57:40 +0100 Subject: [PATCH] compare feed entry content after cleanup because that's what saved in the database --- .../backend/model/FeedEntryContent.java | 19 +++++++ .../service/FeedEntryContentService.java | 53 ++++++------------- 2 files changed, 34 insertions(+), 38 deletions(-) diff --git a/commafeed-server/src/main/java/com/commafeed/backend/model/FeedEntryContent.java b/commafeed-server/src/main/java/com/commafeed/backend/model/FeedEntryContent.java index 04376ce2..e03e9226 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/model/FeedEntryContent.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/model/FeedEntryContent.java @@ -3,6 +3,7 @@ package com.commafeed.backend.model; import java.sql.Types; import java.util.Set; +import org.apache.commons.lang3.builder.EqualsBuilder; import org.hibernate.annotations.JdbcTypeCode; import jakarta.persistence.Column; @@ -60,4 +61,22 @@ public class FeedEntryContent extends AbstractModel { @OneToMany(mappedBy = "content") private Set entries; + public boolean equivalentTo(FeedEntryContent c) { + if (c == null) { + return false; + } + + return new EqualsBuilder().append(title, c.title) + .append(content, c.content) + .append(author, c.author) + .append(categories, c.categories) + .append(enclosureUrl, c.enclosureUrl) + .append(enclosureType, c.enclosureType) + .append(mediaDescription, c.mediaDescription) + .append(mediaThumbnailUrl, c.mediaThumbnailUrl) + .append(mediaThumbnailWidth, c.mediaThumbnailWidth) + .append(mediaThumbnailHeight, c.mediaThumbnailHeight) + .build(); + } + } diff --git a/commafeed-server/src/main/java/com/commafeed/backend/service/FeedEntryContentService.java b/commafeed-server/src/main/java/com/commafeed/backend/service/FeedEntryContentService.java index 4baff814..52974fa7 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/service/FeedEntryContentService.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/service/FeedEntryContentService.java @@ -1,11 +1,9 @@ package com.commafeed.backend.service; -import java.util.List; import java.util.Optional; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.builder.EqualsBuilder; import com.commafeed.backend.dao.FeedEntryContentDAO; import com.commafeed.backend.feed.FeedUtils; @@ -29,25 +27,25 @@ public class FeedEntryContentService { * this is NOT thread-safe */ public FeedEntryContent findOrCreate(Content content, String baseUrl) { - String title = FeedUtils.truncate(cleaningService.clean(content.title(), baseUrl, true), 2048); - String titleHash = DigestUtils.sha1Hex(StringUtils.trimToEmpty(title)); - - String contentString = cleaningService.clean(content.content(), baseUrl, false); - String contentHash = DigestUtils.sha1Hex(StringUtils.trimToEmpty(contentString)); - - List existing = feedEntryContentDAO.findExisting(contentHash, titleHash); - Optional equivalentContent = existing.stream() - .filter(c -> isEquivalent(c, content, title, contentString)) + FeedEntryContent entryContent = buildContent(content, baseUrl); + Optional existing = feedEntryContentDAO.findExisting(entryContent.getContentHash(), entryContent.getTitleHash()) + .stream() + .filter(entryContent::equivalentTo) .findFirst(); - if (equivalentContent.isPresent()) { - return equivalentContent.get(); + if (existing.isPresent()) { + return existing.get(); + } else { + feedEntryContentDAO.saveOrUpdate(entryContent); + return entryContent; } + } + private FeedEntryContent buildContent(Content content, String baseUrl) { FeedEntryContent entryContent = new FeedEntryContent(); - entryContent.setTitle(title); - entryContent.setTitleHash(titleHash); - entryContent.setContent(contentString); - entryContent.setContentHash(contentHash); + entryContent.setTitleHash(DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.title()))); + entryContent.setContentHash(DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.content()))); + entryContent.setTitle(FeedUtils.truncate(cleaningService.clean(content.title(), baseUrl, true), 2048)); + entryContent.setContent(cleaningService.clean(content.content(), baseUrl, false)); entryContent.setAuthor(FeedUtils.truncate(cleaningService.clean(content.author(), baseUrl, true), 128)); entryContent.setCategories(FeedUtils.truncate(content.categories(), 4096)); @@ -65,28 +63,7 @@ public class FeedEntryContentService { entryContent.setMediaThumbnailHeight(media.thumbnailHeight()); } - feedEntryContentDAO.saveOrUpdate(entryContent); return entryContent; } - private boolean isEquivalent(FeedEntryContent content, Content c, String title, String contentString) { - EqualsBuilder builder = new EqualsBuilder().append(content.getTitle(), title) - .append(content.getContent(), contentString) - .append(content.getAuthor(), c.author()) - .append(content.getCategories(), c.categories()); - - if (c.enclosure() != null) { - builder.append(content.getEnclosureUrl(), c.enclosure().url()).append(content.getEnclosureType(), c.enclosure().type()); - } - - if (c.media() != null) { - builder.append(content.getMediaDescription(), c.media().description()) - .append(content.getMediaThumbnailUrl(), c.media().thumbnailUrl()) - .append(content.getMediaThumbnailWidth(), c.media().thumbnailWidth()) - .append(content.getMediaThumbnailHeight(), c.media().thumbnailHeight()); - } - - return builder.build(); - } - }