diff --git a/src/main/java/com/commafeed/backend/dao/FeedEntryContentDAO.java b/src/main/java/com/commafeed/backend/dao/FeedEntryContentDAO.java index a03591e9..4a244e28 100644 --- a/src/main/java/com/commafeed/backend/dao/FeedEntryContentDAO.java +++ b/src/main/java/com/commafeed/backend/dao/FeedEntryContentDAO.java @@ -9,8 +9,6 @@ import javax.persistence.criteria.JoinType; import javax.persistence.criteria.Predicate; import javax.persistence.criteria.Root; -import org.apache.commons.codec.digest.DigestUtils; - import com.commafeed.backend.model.FeedEntry; import com.commafeed.backend.model.FeedEntryContent; import com.commafeed.backend.model.FeedEntryContent_; @@ -19,27 +17,15 @@ import com.google.common.collect.Iterables; public class FeedEntryContentDAO extends GenericDAO { - public FeedEntryContent findExisting(FeedEntryContent content) { + public FeedEntryContent findExisting(String contentHash, String titleHash) { CriteriaQuery query = builder.createQuery(getType()); Root root = query.from(getType()); - Predicate p1 = builder.equal(root.get(FeedEntryContent_.contentHash), DigestUtils.sha1Hex(content.getContent())); - Predicate p2 = null; - if (content.getTitle() == null) { - p2 = builder.isNull(root.get(FeedEntryContent_.title)); - } else { - p2 = builder.equal(root.get(FeedEntryContent_.title), content.getTitle()); - } + Predicate p1 = builder.equal(root.get(FeedEntryContent_.contentHash), contentHash); + Predicate p2 = builder.equal(root.get(FeedEntryContent_.titleHash), titleHash); - Predicate p3 = null; - if (content.getAuthor() == null) { - p3 = builder.isNull(root.get(FeedEntryContent_.author)); - } else { - p3 = builder.equal(root.get(FeedEntryContent_.author), content.getAuthor()); - } - - query.where(p1, p2, p3); + query.where(p1, p2); TypedQuery q = em.createQuery(query); return Iterables.getFirst(q.getResultList(), null); diff --git a/src/main/java/com/commafeed/backend/feeds/FeedParser.java b/src/main/java/com/commafeed/backend/feeds/FeedParser.java index e3bea81c..81617b71 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedParser.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedParser.java @@ -90,7 +90,7 @@ public class FeedParser { FeedEntryContent content = new FeedEntryContent(); content.setContent(getContent(item)); content.setTitle(getTitle(item)); - content.setAuthor(item.getAuthor()); + content.setAuthor(StringUtils.trimToNull(item.getAuthor())); SyndEnclosure enclosure = (SyndEnclosure) Iterables.getFirst(item.getEnclosures(), null); if (enclosure != null) { content.setEnclosureUrl(FeedUtils.truncate(enclosure.getUrl(), 2048)); @@ -187,7 +187,7 @@ public class FeedParser { } else { content = StringUtils.join(Collections2.transform(item.getContents(), CONTENT_TO_STRING), SystemUtils.LINE_SEPARATOR); } - return StringUtils.trimToEmpty(content); + return StringUtils.trimToNull(content); } private String getTitle(SyndEntry item) { @@ -200,7 +200,7 @@ public class FeedParser { title = "(no title)"; } } - return title; + return StringUtils.trimToNull(title); } @SuppressWarnings("unchecked") diff --git a/src/main/java/com/commafeed/backend/feeds/FeedRefreshUpdater.java b/src/main/java/com/commafeed/backend/feeds/FeedRefreshUpdater.java index bab91020..b8c0c632 100644 --- a/src/main/java/com/commafeed/backend/feeds/FeedRefreshUpdater.java +++ b/src/main/java/com/commafeed/backend/feeds/FeedRefreshUpdater.java @@ -151,7 +151,7 @@ public class FeedRefreshUpdater { // lock on content, make sure we are not updating the same entry // twice at the same time FeedEntryContent content = entry.getContent(); - String key2 = DigestUtils.sha1Hex(content.getContent() + content.getTitle() + content.getAuthor()); + String key2 = DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.getContent() + content.getTitle())); Iterator iterator = locks.bulkGet(Arrays.asList(key1, key2)).iterator(); Lock lock1 = iterator.next(); diff --git a/src/main/java/com/commafeed/backend/model/FeedEntryContent.java b/src/main/java/com/commafeed/backend/model/FeedEntryContent.java index fc6a545e..551f0dc5 100644 --- a/src/main/java/com/commafeed/backend/model/FeedEntryContent.java +++ b/src/main/java/com/commafeed/backend/model/FeedEntryContent.java @@ -21,6 +21,9 @@ public class FeedEntryContent extends AbstractModel { @Column(length = 2048) private String title; + + @Column(length = 40) + private String titleHash; @Lob @Column(length = Integer.MAX_VALUE) @@ -97,4 +100,12 @@ public class FeedEntryContent extends AbstractModel { this.entries = entries; } + public String getTitleHash() { + return titleHash; + } + + public void setTitleHash(String titleHash) { + this.titleHash = titleHash; + } + } diff --git a/src/main/java/com/commafeed/backend/services/FeedEntryContentService.java b/src/main/java/com/commafeed/backend/services/FeedEntryContentService.java index 18ec32f4..eeeeae6a 100644 --- a/src/main/java/com/commafeed/backend/services/FeedEntryContentService.java +++ b/src/main/java/com/commafeed/backend/services/FeedEntryContentService.java @@ -3,6 +3,7 @@ package com.commafeed.backend.services; import javax.inject.Inject; import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.lang.StringUtils; import com.commafeed.backend.dao.FeedEntryContentDAO; import com.commafeed.backend.feeds.FeedUtils; @@ -17,12 +18,16 @@ public class FeedEntryContentService { * this is NOT thread-safe */ public FeedEntryContent findOrCreate(FeedEntryContent content, String baseUrl) { - - FeedEntryContent existing = feedEntryContentDAO.findExisting(content); + + String contentHash = DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.getContent())); + String titleHash = DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.getTitle())); + FeedEntryContent existing = feedEntryContentDAO.findExisting(contentHash, titleHash); if (existing == null) { + content.setContentHash(contentHash); + content.setTitleHash(titleHash); + content.setAuthor(FeedUtils.truncate(FeedUtils.handleContent(content.getAuthor(), baseUrl, true), 128)); content.setTitle(FeedUtils.truncate(FeedUtils.handleContent(content.getTitle(), baseUrl, true), 2048)); - content.setContentHash(DigestUtils.sha1Hex(content.getContent())); content.setContent(FeedUtils.handleContent(content.getContent(), baseUrl, false)); existing = content; feedEntryContentDAO.saveOrUpdate(existing); diff --git a/src/main/resources/changelogs/db.changelog-1.2.xml b/src/main/resources/changelogs/db.changelog-1.2.xml index fcb8bd54..34297474 100644 --- a/src/main/resources/changelogs/db.changelog-1.2.xml +++ b/src/main/resources/changelogs/db.changelog-1.2.xml @@ -90,4 +90,15 @@ + + + + + + + + + + +