interface for deleting duplicates

This commit is contained in:
Athou
2013-07-08 16:38:09 +02:00
parent c8ff6e1d20
commit 52270d50d9
6 changed files with 146 additions and 2 deletions

View File

@@ -7,10 +7,15 @@ import javax.ejb.Stateless;
import javax.persistence.Query;
import javax.persistence.TypedQuery;
import javax.persistence.criteria.CriteriaQuery;
import javax.persistence.criteria.Expression;
import javax.persistence.criteria.JoinType;
import javax.persistence.criteria.Path;
import javax.persistence.criteria.Predicate;
import javax.persistence.criteria.Root;
import javax.persistence.criteria.SetJoin;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlRootElement;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang.StringUtils;
@@ -26,6 +31,13 @@ import com.google.common.collect.Lists;
@Stateless
public class FeedDAO extends GenericDAO<Feed> {
@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
public static class FeedCount {
public String normalizedUrlHash;
public List<Feed> feeds;
}
private List<Predicate> getUpdatablePredicates(Root<Feed> root,
Date threshold) {
@@ -123,4 +135,29 @@ public class FeedDAO extends GenericDAO<Feed> {
return deleted;
}
public List<FeedCount> findDuplicates(int offset, int limit) {
CriteriaQuery<String> query = builder.createQuery(String.class);
Root<Feed> root = query.from(getType());
Path<String> hashPath = root.get(Feed_.normalizedUrlHash);
Expression<Long> count = builder.count(hashPath);
query.select(hashPath);
query.groupBy(hashPath);
query.having(builder.greaterThan(count, 1l));
TypedQuery<String> q = em.createQuery(query);
limit(q, offset, limit);
List<String> normalizedUrlHashes = q.getResultList();
List<FeedCount> result = Lists.newArrayList();
for (String hash : normalizedUrlHashes) {
FeedCount fc = new FeedCount();
fc.normalizedUrlHash = hash;
fc.feeds = findByField(Feed_.normalizedUrlHash, hash);
}
return result;
}
}

View File

@@ -21,6 +21,7 @@ import com.commafeed.backend.DatabaseCleaner;
import com.commafeed.backend.MetricsBean;
import com.commafeed.backend.StartupBean;
import com.commafeed.backend.dao.FeedDAO;
import com.commafeed.backend.dao.FeedDAO.FeedCount;
import com.commafeed.backend.dao.UserDAO;
import com.commafeed.backend.dao.UserRoleDAO;
import com.commafeed.backend.feeds.FeedRefreshTaskGiver;
@@ -38,8 +39,8 @@ import com.commafeed.frontend.SecurityCheck;
import com.commafeed.frontend.model.UserModel;
import com.commafeed.frontend.model.request.FeedMergeRequest;
import com.commafeed.frontend.model.request.IDRequest;
import com.google.api.client.util.Lists;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.wordnik.swagger.annotations.Api;
@@ -268,17 +269,36 @@ public class AdminREST extends AbstractResourceREST {
return Response.ok(map).build();
}
@Path("/cleanup/findDuplicateFeeds")
@GET
@ApiOperation(value = "Find duplicate feeds")
public Response findDuplicateFeeds(@QueryParam("page") int page,
@QueryParam("limit") int limit) {
List<FeedCount> list = feedDAO.findDuplicates(limit * page, limit);
return Response.ok(list).build();
}
@Path("/cleanup/merge")
@POST
@ApiOperation(value = "Merge feeds", notes = "Merge feeds together")
public Response mergeFeeds(@ApiParam(required = true) FeedMergeRequest request) {
public Response mergeFeeds(
@ApiParam(required = true) FeedMergeRequest request) {
Feed into = feedDAO.findById(request.getIntoFeedId());
if (into == null) {
return Response.status(Status.BAD_REQUEST)
.entity("'into feed' not found").build();
}
List<Feed> feeds = Lists.newArrayList();
for (Long feedId : request.getFeedIds()) {
Feed feed = feedDAO.findById(feedId);
feeds.add(feed);
}
if (feeds.isEmpty()) {
return Response.status(Status.BAD_REQUEST)
.entity("'from feeds' empty").build();
}
cleaner.mergeFeeds(into, feeds);
return Response.ok().build();