From a841c802613014dd4cd7fc41e0f86bd8f2ff13b1 Mon Sep 17 00:00:00 2001 From: Athou Date: Sun, 7 Jan 2024 15:28:48 +0100 Subject: [PATCH] simplify trie building --- .../commafeed/backend/feed/parser/FeedCleaner.java | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedCleaner.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedCleaner.java index 6a175239..02c7d84d 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedCleaner.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/FeedCleaner.java @@ -4,7 +4,6 @@ import java.util.Collection; import org.ahocorasick.trie.Emit; import org.ahocorasick.trie.Trie; -import org.ahocorasick.trie.Trie.TrieBuilder; import org.apache.commons.lang3.StringUtils; import jakarta.inject.Singleton; @@ -39,19 +38,12 @@ class FeedCleaner { return sb.toString(); } + // https://stackoverflow.com/a/40836618/1885506 public String replaceHtmlEntitiesWithNumericEntities(String source) { // Create a buffer sufficiently large that re-allocations are minimized. StringBuilder sb = new StringBuilder(source.length() << 1); - TrieBuilder builder = Trie.builder(); - builder.ignoreOverlaps(); - - for (String key : HtmlEntities.HTML_ENTITIES) { - builder.addKeyword(key); - } - - Trie trie = builder.build(); - Collection emits = trie.parseText(source); + Collection emits = Trie.builder().ignoreOverlaps().addKeywords(HtmlEntities.HTML_ENTITIES).build().parseText(source); int prevIndex = 0; for (Emit emit : emits) {