mirror of
https://github.com/Athou/commafeed.git
synced 2026-03-21 21:37:29 +00:00
only compute rtl once by storing it in the database on fetch
This commit is contained in:
@@ -17,7 +17,6 @@ import org.netpreserve.urlcanon.ParsedUrl;
|
||||
|
||||
import com.commafeed.backend.feed.FeedEntryKeyword.Mode;
|
||||
import com.commafeed.backend.feed.parser.TextDirectionDetector;
|
||||
import com.commafeed.backend.model.FeedEntry;
|
||||
import com.commafeed.backend.model.FeedSubscription;
|
||||
import com.commafeed.frontend.model.Entry;
|
||||
|
||||
@@ -92,24 +91,18 @@ public class FeedUtils {
|
||||
return normalized;
|
||||
}
|
||||
|
||||
public static boolean isRTL(FeedEntry entry) {
|
||||
String text = entry.getContent().getContent();
|
||||
|
||||
if (StringUtils.isBlank(text)) {
|
||||
text = entry.getContent().getTitle();
|
||||
}
|
||||
|
||||
public static boolean isRTL(String title, String content) {
|
||||
String text = StringUtils.isNotBlank(content) ? content : title;
|
||||
if (StringUtils.isBlank(text)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
text = Jsoup.parse(text).text();
|
||||
if (StringUtils.isBlank(text)) {
|
||||
String stripped = Jsoup.parse(text).text();
|
||||
if (StringUtils.isBlank(stripped)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
TextDirectionDetector.Direction direction = TextDirectionDetector.detect(text);
|
||||
return direction == TextDirectionDetector.Direction.RIGHT_TO_LEFT;
|
||||
return TextDirectionDetector.detect(stripped) == TextDirectionDetector.Direction.RIGHT_TO_LEFT;
|
||||
}
|
||||
|
||||
public static String removeTrailingSlash(String url) {
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package com.commafeed.backend.feed.parser;
|
||||
|
||||
import java.text.Bidi;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.lang3.math.NumberUtils;
|
||||
@@ -22,8 +21,8 @@ public class TextDirectionDetector {
|
||||
return Direction.LEFT_TO_RIGHT;
|
||||
}
|
||||
|
||||
AtomicLong rtl = new AtomicLong();
|
||||
AtomicLong total = new AtomicLong();
|
||||
long rtl = 0;
|
||||
long total = 0;
|
||||
for (String token : WORDS_PATTERN.split(input)) {
|
||||
// skip urls
|
||||
if (URL_PATTERN.matcher(token).matches()) {
|
||||
@@ -39,18 +38,18 @@ public class TextDirectionDetector {
|
||||
if (requiresBidi) {
|
||||
Bidi bidi = new Bidi(token, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT);
|
||||
if (bidi.getBaseLevel() == 1) {
|
||||
rtl.incrementAndGet();
|
||||
rtl++;
|
||||
}
|
||||
}
|
||||
|
||||
total.incrementAndGet();
|
||||
total++;
|
||||
}
|
||||
|
||||
if (total.longValue() == 0) {
|
||||
if (total == 0) {
|
||||
return Direction.LEFT_TO_RIGHT;
|
||||
}
|
||||
|
||||
double ratio = rtl.doubleValue() / total.doubleValue();
|
||||
double ratio = (double) rtl / total;
|
||||
return ratio > RTL_THRESHOLD ? Direction.RIGHT_TO_LEFT : Direction.LEFT_TO_RIGHT;
|
||||
}
|
||||
|
||||
|
||||
@@ -6,8 +6,12 @@ import java.util.Set;
|
||||
import org.apache.commons.lang3.builder.EqualsBuilder;
|
||||
import org.hibernate.annotations.JdbcTypeCode;
|
||||
|
||||
import com.commafeed.backend.feed.FeedUtils;
|
||||
|
||||
import jakarta.persistence.Column;
|
||||
import jakarta.persistence.Entity;
|
||||
import jakarta.persistence.EnumType;
|
||||
import jakarta.persistence.Enumerated;
|
||||
import jakarta.persistence.Lob;
|
||||
import jakarta.persistence.OneToMany;
|
||||
import jakarta.persistence.Table;
|
||||
@@ -21,6 +25,10 @@ import lombok.Setter;
|
||||
@Setter
|
||||
public class FeedEntryContent extends AbstractModel {
|
||||
|
||||
public enum Direction {
|
||||
ltr, rtl, unknown
|
||||
}
|
||||
|
||||
@Column(length = 2048)
|
||||
private String title;
|
||||
|
||||
@@ -58,6 +66,10 @@ public class FeedEntryContent extends AbstractModel {
|
||||
@Column(length = 4096)
|
||||
private String categories;
|
||||
|
||||
@Column
|
||||
@Enumerated(EnumType.STRING)
|
||||
private Direction direction = Direction.unknown;
|
||||
|
||||
@OneToMany(mappedBy = "content")
|
||||
private Set<FeedEntry> entries;
|
||||
|
||||
@@ -79,4 +91,14 @@ public class FeedEntryContent extends AbstractModel {
|
||||
.build();
|
||||
}
|
||||
|
||||
public boolean isRTL() {
|
||||
if (direction == Direction.rtl) {
|
||||
return true;
|
||||
} else if (direction == Direction.ltr) {
|
||||
return false;
|
||||
} else {
|
||||
// detect on the fly for content that was inserted before the direction field was added
|
||||
return FeedUtils.isRTL(title, content);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,6 +47,8 @@ public class FeedEntryContentService {
|
||||
entryContent.setContent(cleaningService.clean(content.content(), baseUrl, false));
|
||||
entryContent.setAuthor(FeedUtils.truncate(cleaningService.clean(content.author(), baseUrl, true), 128));
|
||||
entryContent.setCategories(FeedUtils.truncate(content.categories(), 4096));
|
||||
entryContent.setDirection(
|
||||
FeedUtils.isRTL(content.title(), content.content()) ? FeedEntryContent.Direction.rtl : FeedEntryContent.Direction.ltr);
|
||||
|
||||
Enclosure enclosure = content.enclosure();
|
||||
if (enclosure != null) {
|
||||
|
||||
@@ -128,7 +128,7 @@ public class Entry implements Serializable {
|
||||
entry.setTags(status.getTags().stream().map(FeedEntryTag::getName).toList());
|
||||
|
||||
if (content != null) {
|
||||
entry.setRtl(FeedUtils.isRTL(feedEntry));
|
||||
entry.setRtl(content.isRTL());
|
||||
entry.setTitle(content.getTitle());
|
||||
entry.setContent(proxyImages ? FeedUtils.proxyImages(content.getContent()) : content.getContent());
|
||||
entry.setAuthor(content.getAuthor());
|
||||
|
||||
@@ -10,4 +10,10 @@
|
||||
</column>
|
||||
</addColumn>
|
||||
</changeSet>
|
||||
|
||||
<changeSet id="content-direction" author="athou">
|
||||
<addColumn tableName="FEEDENTRYCONTENTS">
|
||||
<column name="direction" type="varchar(16)" />
|
||||
</addColumn>
|
||||
</changeSet>
|
||||
</databaseChangeLog>
|
||||
|
||||
Reference in New Issue
Block a user