forked from Archives/Athou_commafeed
index title hash
This commit is contained in:
@@ -9,8 +9,6 @@ import javax.persistence.criteria.JoinType;
|
|||||||
import javax.persistence.criteria.Predicate;
|
import javax.persistence.criteria.Predicate;
|
||||||
import javax.persistence.criteria.Root;
|
import javax.persistence.criteria.Root;
|
||||||
|
|
||||||
import org.apache.commons.codec.digest.DigestUtils;
|
|
||||||
|
|
||||||
import com.commafeed.backend.model.FeedEntry;
|
import com.commafeed.backend.model.FeedEntry;
|
||||||
import com.commafeed.backend.model.FeedEntryContent;
|
import com.commafeed.backend.model.FeedEntryContent;
|
||||||
import com.commafeed.backend.model.FeedEntryContent_;
|
import com.commafeed.backend.model.FeedEntryContent_;
|
||||||
@@ -19,27 +17,15 @@ import com.google.common.collect.Iterables;
|
|||||||
|
|
||||||
public class FeedEntryContentDAO extends GenericDAO<FeedEntryContent> {
|
public class FeedEntryContentDAO extends GenericDAO<FeedEntryContent> {
|
||||||
|
|
||||||
public FeedEntryContent findExisting(FeedEntryContent content) {
|
public FeedEntryContent findExisting(String contentHash, String titleHash) {
|
||||||
|
|
||||||
CriteriaQuery<FeedEntryContent> query = builder.createQuery(getType());
|
CriteriaQuery<FeedEntryContent> query = builder.createQuery(getType());
|
||||||
Root<FeedEntryContent> root = query.from(getType());
|
Root<FeedEntryContent> root = query.from(getType());
|
||||||
|
|
||||||
Predicate p1 = builder.equal(root.get(FeedEntryContent_.contentHash), DigestUtils.sha1Hex(content.getContent()));
|
Predicate p1 = builder.equal(root.get(FeedEntryContent_.contentHash), contentHash);
|
||||||
Predicate p2 = null;
|
Predicate p2 = builder.equal(root.get(FeedEntryContent_.titleHash), titleHash);
|
||||||
if (content.getTitle() == null) {
|
|
||||||
p2 = builder.isNull(root.get(FeedEntryContent_.title));
|
|
||||||
} else {
|
|
||||||
p2 = builder.equal(root.get(FeedEntryContent_.title), content.getTitle());
|
|
||||||
}
|
|
||||||
|
|
||||||
Predicate p3 = null;
|
query.where(p1, p2);
|
||||||
if (content.getAuthor() == null) {
|
|
||||||
p3 = builder.isNull(root.get(FeedEntryContent_.author));
|
|
||||||
} else {
|
|
||||||
p3 = builder.equal(root.get(FeedEntryContent_.author), content.getAuthor());
|
|
||||||
}
|
|
||||||
|
|
||||||
query.where(p1, p2, p3);
|
|
||||||
TypedQuery<FeedEntryContent> q = em.createQuery(query);
|
TypedQuery<FeedEntryContent> q = em.createQuery(query);
|
||||||
return Iterables.getFirst(q.getResultList(), null);
|
return Iterables.getFirst(q.getResultList(), null);
|
||||||
|
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ public class FeedParser {
|
|||||||
FeedEntryContent content = new FeedEntryContent();
|
FeedEntryContent content = new FeedEntryContent();
|
||||||
content.setContent(getContent(item));
|
content.setContent(getContent(item));
|
||||||
content.setTitle(getTitle(item));
|
content.setTitle(getTitle(item));
|
||||||
content.setAuthor(item.getAuthor());
|
content.setAuthor(StringUtils.trimToNull(item.getAuthor()));
|
||||||
SyndEnclosure enclosure = (SyndEnclosure) Iterables.getFirst(item.getEnclosures(), null);
|
SyndEnclosure enclosure = (SyndEnclosure) Iterables.getFirst(item.getEnclosures(), null);
|
||||||
if (enclosure != null) {
|
if (enclosure != null) {
|
||||||
content.setEnclosureUrl(FeedUtils.truncate(enclosure.getUrl(), 2048));
|
content.setEnclosureUrl(FeedUtils.truncate(enclosure.getUrl(), 2048));
|
||||||
@@ -187,7 +187,7 @@ public class FeedParser {
|
|||||||
} else {
|
} else {
|
||||||
content = StringUtils.join(Collections2.transform(item.getContents(), CONTENT_TO_STRING), SystemUtils.LINE_SEPARATOR);
|
content = StringUtils.join(Collections2.transform(item.getContents(), CONTENT_TO_STRING), SystemUtils.LINE_SEPARATOR);
|
||||||
}
|
}
|
||||||
return StringUtils.trimToEmpty(content);
|
return StringUtils.trimToNull(content);
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getTitle(SyndEntry item) {
|
private String getTitle(SyndEntry item) {
|
||||||
@@ -200,7 +200,7 @@ public class FeedParser {
|
|||||||
title = "(no title)";
|
title = "(no title)";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return title;
|
return StringUtils.trimToNull(title);
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
|
|||||||
@@ -151,7 +151,7 @@ public class FeedRefreshUpdater {
|
|||||||
// lock on content, make sure we are not updating the same entry
|
// lock on content, make sure we are not updating the same entry
|
||||||
// twice at the same time
|
// twice at the same time
|
||||||
FeedEntryContent content = entry.getContent();
|
FeedEntryContent content = entry.getContent();
|
||||||
String key2 = DigestUtils.sha1Hex(content.getContent() + content.getTitle() + content.getAuthor());
|
String key2 = DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.getContent() + content.getTitle()));
|
||||||
|
|
||||||
Iterator<Lock> iterator = locks.bulkGet(Arrays.asList(key1, key2)).iterator();
|
Iterator<Lock> iterator = locks.bulkGet(Arrays.asList(key1, key2)).iterator();
|
||||||
Lock lock1 = iterator.next();
|
Lock lock1 = iterator.next();
|
||||||
|
|||||||
@@ -22,6 +22,9 @@ public class FeedEntryContent extends AbstractModel {
|
|||||||
@Column(length = 2048)
|
@Column(length = 2048)
|
||||||
private String title;
|
private String title;
|
||||||
|
|
||||||
|
@Column(length = 40)
|
||||||
|
private String titleHash;
|
||||||
|
|
||||||
@Lob
|
@Lob
|
||||||
@Column(length = Integer.MAX_VALUE)
|
@Column(length = Integer.MAX_VALUE)
|
||||||
private String content;
|
private String content;
|
||||||
@@ -97,4 +100,12 @@ public class FeedEntryContent extends AbstractModel {
|
|||||||
this.entries = entries;
|
this.entries = entries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getTitleHash() {
|
||||||
|
return titleHash;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTitleHash(String titleHash) {
|
||||||
|
this.titleHash = titleHash;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package com.commafeed.backend.services;
|
|||||||
import javax.inject.Inject;
|
import javax.inject.Inject;
|
||||||
|
|
||||||
import org.apache.commons.codec.digest.DigestUtils;
|
import org.apache.commons.codec.digest.DigestUtils;
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
import com.commafeed.backend.dao.FeedEntryContentDAO;
|
import com.commafeed.backend.dao.FeedEntryContentDAO;
|
||||||
import com.commafeed.backend.feeds.FeedUtils;
|
import com.commafeed.backend.feeds.FeedUtils;
|
||||||
@@ -18,11 +19,15 @@ public class FeedEntryContentService {
|
|||||||
*/
|
*/
|
||||||
public FeedEntryContent findOrCreate(FeedEntryContent content, String baseUrl) {
|
public FeedEntryContent findOrCreate(FeedEntryContent content, String baseUrl) {
|
||||||
|
|
||||||
FeedEntryContent existing = feedEntryContentDAO.findExisting(content);
|
String contentHash = DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.getContent()));
|
||||||
|
String titleHash = DigestUtils.sha1Hex(StringUtils.trimToEmpty(content.getTitle()));
|
||||||
|
FeedEntryContent existing = feedEntryContentDAO.findExisting(contentHash, titleHash);
|
||||||
if (existing == null) {
|
if (existing == null) {
|
||||||
|
content.setContentHash(contentHash);
|
||||||
|
content.setTitleHash(titleHash);
|
||||||
|
|
||||||
content.setAuthor(FeedUtils.truncate(FeedUtils.handleContent(content.getAuthor(), baseUrl, true), 128));
|
content.setAuthor(FeedUtils.truncate(FeedUtils.handleContent(content.getAuthor(), baseUrl, true), 128));
|
||||||
content.setTitle(FeedUtils.truncate(FeedUtils.handleContent(content.getTitle(), baseUrl, true), 2048));
|
content.setTitle(FeedUtils.truncate(FeedUtils.handleContent(content.getTitle(), baseUrl, true), 2048));
|
||||||
content.setContentHash(DigestUtils.sha1Hex(content.getContent()));
|
|
||||||
content.setContent(FeedUtils.handleContent(content.getContent(), baseUrl, false));
|
content.setContent(FeedUtils.handleContent(content.getContent(), baseUrl, false));
|
||||||
existing = content;
|
existing = content;
|
||||||
feedEntryContentDAO.saveOrUpdate(existing);
|
feedEntryContentDAO.saveOrUpdate(existing);
|
||||||
|
|||||||
@@ -90,4 +90,15 @@
|
|||||||
</createIndex>
|
</createIndex>
|
||||||
</changeSet>
|
</changeSet>
|
||||||
|
|
||||||
|
<changeSet author="athou" id="add-title-hashes">
|
||||||
|
<addColumn tableName="FEEDENTRYCONTENTS">
|
||||||
|
<column name="titleHash" type="VARCHAR(40)" />
|
||||||
|
</addColumn>
|
||||||
|
<createIndex tableName="FEEDENTRYCONTENTS" indexName="content_title_index">
|
||||||
|
<column name="contentHash" />
|
||||||
|
<column name="titleHash" />
|
||||||
|
</createIndex>
|
||||||
|
<dropIndex tableName="FEEDENTRYCONTENTS" indexName="content_hash_index" />
|
||||||
|
</changeSet>
|
||||||
|
|
||||||
</databaseChangeLog>
|
</databaseChangeLog>
|
||||||
|
|||||||
Reference in New Issue
Block a user