diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java index 7c23e734..a6441402 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/FeedUtils.java @@ -17,7 +17,6 @@ import org.netpreserve.urlcanon.ParsedUrl; import com.commafeed.backend.feed.FeedEntryKeyword.Mode; import com.commafeed.backend.feed.parser.TextDirectionDetector; -import com.commafeed.backend.model.FeedEntry; import com.commafeed.backend.model.FeedSubscription; import com.commafeed.frontend.model.Entry; @@ -92,24 +91,18 @@ public class FeedUtils { return normalized; } - public static boolean isRTL(FeedEntry entry) { - String text = entry.getContent().getContent(); - - if (StringUtils.isBlank(text)) { - text = entry.getContent().getTitle(); - } - + public static boolean isRTL(String title, String content) { + String text = StringUtils.isNotBlank(content) ? content : title; if (StringUtils.isBlank(text)) { return false; } - text = Jsoup.parse(text).text(); - if (StringUtils.isBlank(text)) { + String stripped = Jsoup.parse(text).text(); + if (StringUtils.isBlank(stripped)) { return false; } - TextDirectionDetector.Direction direction = TextDirectionDetector.detect(text); - return direction == TextDirectionDetector.Direction.RIGHT_TO_LEFT; + return TextDirectionDetector.detect(stripped) == TextDirectionDetector.Direction.RIGHT_TO_LEFT; } public static String removeTrailingSlash(String url) { diff --git a/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/TextDirectionDetector.java b/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/TextDirectionDetector.java index b2a482a9..693dbda3 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/TextDirectionDetector.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/feed/parser/TextDirectionDetector.java @@ -1,7 +1,6 @@ package com.commafeed.backend.feed.parser; import java.text.Bidi; -import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Pattern; import org.apache.commons.lang3.math.NumberUtils; @@ -22,8 +21,8 @@ public class TextDirectionDetector { return Direction.LEFT_TO_RIGHT; } - AtomicLong rtl = new AtomicLong(); - AtomicLong total = new AtomicLong(); + long rtl = 0; + long total = 0; for (String token : WORDS_PATTERN.split(input)) { // skip urls if (URL_PATTERN.matcher(token).matches()) { @@ -39,18 +38,18 @@ public class TextDirectionDetector { if (requiresBidi) { Bidi bidi = new Bidi(token, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT); if (bidi.getBaseLevel() == 1) { - rtl.incrementAndGet(); + rtl++; } } - total.incrementAndGet(); + total++; } - if (total.longValue() == 0) { + if (total == 0) { return Direction.LEFT_TO_RIGHT; } - double ratio = rtl.doubleValue() / total.doubleValue(); + double ratio = (double) rtl / total; return ratio > RTL_THRESHOLD ? Direction.RIGHT_TO_LEFT : Direction.LEFT_TO_RIGHT; } diff --git a/commafeed-server/src/main/java/com/commafeed/backend/model/FeedEntryContent.java b/commafeed-server/src/main/java/com/commafeed/backend/model/FeedEntryContent.java index e03e9226..e28611c1 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/model/FeedEntryContent.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/model/FeedEntryContent.java @@ -6,8 +6,12 @@ import java.util.Set; import org.apache.commons.lang3.builder.EqualsBuilder; import org.hibernate.annotations.JdbcTypeCode; +import com.commafeed.backend.feed.FeedUtils; + import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EnumType; +import jakarta.persistence.Enumerated; import jakarta.persistence.Lob; import jakarta.persistence.OneToMany; import jakarta.persistence.Table; @@ -21,6 +25,10 @@ import lombok.Setter; @Setter public class FeedEntryContent extends AbstractModel { + public enum Direction { + ltr, rtl, unknown + } + @Column(length = 2048) private String title; @@ -58,6 +66,10 @@ public class FeedEntryContent extends AbstractModel { @Column(length = 4096) private String categories; + @Column + @Enumerated(EnumType.STRING) + private Direction direction = Direction.unknown; + @OneToMany(mappedBy = "content") private Set entries; @@ -79,4 +91,14 @@ public class FeedEntryContent extends AbstractModel { .build(); } + public boolean isRTL() { + if (direction == Direction.rtl) { + return true; + } else if (direction == Direction.ltr) { + return false; + } else { + // detect on the fly for content that was inserted before the direction field was added + return FeedUtils.isRTL(title, content); + } + } } diff --git a/commafeed-server/src/main/java/com/commafeed/backend/service/FeedEntryContentService.java b/commafeed-server/src/main/java/com/commafeed/backend/service/FeedEntryContentService.java index 879f42f3..94c1f11e 100644 --- a/commafeed-server/src/main/java/com/commafeed/backend/service/FeedEntryContentService.java +++ b/commafeed-server/src/main/java/com/commafeed/backend/service/FeedEntryContentService.java @@ -47,6 +47,8 @@ public class FeedEntryContentService { entryContent.setContent(cleaningService.clean(content.content(), baseUrl, false)); entryContent.setAuthor(FeedUtils.truncate(cleaningService.clean(content.author(), baseUrl, true), 128)); entryContent.setCategories(FeedUtils.truncate(content.categories(), 4096)); + entryContent.setDirection( + FeedUtils.isRTL(content.title(), content.content()) ? FeedEntryContent.Direction.rtl : FeedEntryContent.Direction.ltr); Enclosure enclosure = content.enclosure(); if (enclosure != null) { diff --git a/commafeed-server/src/main/java/com/commafeed/frontend/model/Entry.java b/commafeed-server/src/main/java/com/commafeed/frontend/model/Entry.java index b7add91f..9fdde296 100644 --- a/commafeed-server/src/main/java/com/commafeed/frontend/model/Entry.java +++ b/commafeed-server/src/main/java/com/commafeed/frontend/model/Entry.java @@ -128,7 +128,7 @@ public class Entry implements Serializable { entry.setTags(status.getTags().stream().map(FeedEntryTag::getName).toList()); if (content != null) { - entry.setRtl(FeedUtils.isRTL(feedEntry)); + entry.setRtl(content.isRTL()); entry.setTitle(content.getTitle()); entry.setContent(proxyImages ? FeedUtils.proxyImages(content.getContent()) : content.getContent()); entry.setAuthor(content.getAuthor()); diff --git a/commafeed-server/src/main/resources/changelogs/db.changelog-5.2.xml b/commafeed-server/src/main/resources/changelogs/db.changelog-5.2.xml index 339a746b..83834ddf 100644 --- a/commafeed-server/src/main/resources/changelogs/db.changelog-5.2.xml +++ b/commafeed-server/src/main/resources/changelogs/db.changelog-5.2.xml @@ -10,4 +10,10 @@ + + + + + +