mirror of
https://github.com/Athou/commafeed.git
synced 2026-03-21 21:37:29 +00:00
only compute rtl once by storing it in the database on fetch
This commit is contained in:
@@ -17,7 +17,6 @@ import org.netpreserve.urlcanon.ParsedUrl;
|
|||||||
|
|
||||||
import com.commafeed.backend.feed.FeedEntryKeyword.Mode;
|
import com.commafeed.backend.feed.FeedEntryKeyword.Mode;
|
||||||
import com.commafeed.backend.feed.parser.TextDirectionDetector;
|
import com.commafeed.backend.feed.parser.TextDirectionDetector;
|
||||||
import com.commafeed.backend.model.FeedEntry;
|
|
||||||
import com.commafeed.backend.model.FeedSubscription;
|
import com.commafeed.backend.model.FeedSubscription;
|
||||||
import com.commafeed.frontend.model.Entry;
|
import com.commafeed.frontend.model.Entry;
|
||||||
|
|
||||||
@@ -92,24 +91,18 @@ public class FeedUtils {
|
|||||||
return normalized;
|
return normalized;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isRTL(FeedEntry entry) {
|
public static boolean isRTL(String title, String content) {
|
||||||
String text = entry.getContent().getContent();
|
String text = StringUtils.isNotBlank(content) ? content : title;
|
||||||
|
|
||||||
if (StringUtils.isBlank(text)) {
|
|
||||||
text = entry.getContent().getTitle();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (StringUtils.isBlank(text)) {
|
if (StringUtils.isBlank(text)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
text = Jsoup.parse(text).text();
|
String stripped = Jsoup.parse(text).text();
|
||||||
if (StringUtils.isBlank(text)) {
|
if (StringUtils.isBlank(stripped)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
TextDirectionDetector.Direction direction = TextDirectionDetector.detect(text);
|
return TextDirectionDetector.detect(stripped) == TextDirectionDetector.Direction.RIGHT_TO_LEFT;
|
||||||
return direction == TextDirectionDetector.Direction.RIGHT_TO_LEFT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String removeTrailingSlash(String url) {
|
public static String removeTrailingSlash(String url) {
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
package com.commafeed.backend.feed.parser;
|
package com.commafeed.backend.feed.parser;
|
||||||
|
|
||||||
import java.text.Bidi;
|
import java.text.Bidi;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.commons.lang3.math.NumberUtils;
|
import org.apache.commons.lang3.math.NumberUtils;
|
||||||
@@ -22,8 +21,8 @@ public class TextDirectionDetector {
|
|||||||
return Direction.LEFT_TO_RIGHT;
|
return Direction.LEFT_TO_RIGHT;
|
||||||
}
|
}
|
||||||
|
|
||||||
AtomicLong rtl = new AtomicLong();
|
long rtl = 0;
|
||||||
AtomicLong total = new AtomicLong();
|
long total = 0;
|
||||||
for (String token : WORDS_PATTERN.split(input)) {
|
for (String token : WORDS_PATTERN.split(input)) {
|
||||||
// skip urls
|
// skip urls
|
||||||
if (URL_PATTERN.matcher(token).matches()) {
|
if (URL_PATTERN.matcher(token).matches()) {
|
||||||
@@ -39,18 +38,18 @@ public class TextDirectionDetector {
|
|||||||
if (requiresBidi) {
|
if (requiresBidi) {
|
||||||
Bidi bidi = new Bidi(token, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT);
|
Bidi bidi = new Bidi(token, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT);
|
||||||
if (bidi.getBaseLevel() == 1) {
|
if (bidi.getBaseLevel() == 1) {
|
||||||
rtl.incrementAndGet();
|
rtl++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
total.incrementAndGet();
|
total++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (total.longValue() == 0) {
|
if (total == 0) {
|
||||||
return Direction.LEFT_TO_RIGHT;
|
return Direction.LEFT_TO_RIGHT;
|
||||||
}
|
}
|
||||||
|
|
||||||
double ratio = rtl.doubleValue() / total.doubleValue();
|
double ratio = (double) rtl / total;
|
||||||
return ratio > RTL_THRESHOLD ? Direction.RIGHT_TO_LEFT : Direction.LEFT_TO_RIGHT;
|
return ratio > RTL_THRESHOLD ? Direction.RIGHT_TO_LEFT : Direction.LEFT_TO_RIGHT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,8 +6,12 @@ import java.util.Set;
|
|||||||
import org.apache.commons.lang3.builder.EqualsBuilder;
|
import org.apache.commons.lang3.builder.EqualsBuilder;
|
||||||
import org.hibernate.annotations.JdbcTypeCode;
|
import org.hibernate.annotations.JdbcTypeCode;
|
||||||
|
|
||||||
|
import com.commafeed.backend.feed.FeedUtils;
|
||||||
|
|
||||||
import jakarta.persistence.Column;
|
import jakarta.persistence.Column;
|
||||||
import jakarta.persistence.Entity;
|
import jakarta.persistence.Entity;
|
||||||
|
import jakarta.persistence.EnumType;
|
||||||
|
import jakarta.persistence.Enumerated;
|
||||||
import jakarta.persistence.Lob;
|
import jakarta.persistence.Lob;
|
||||||
import jakarta.persistence.OneToMany;
|
import jakarta.persistence.OneToMany;
|
||||||
import jakarta.persistence.Table;
|
import jakarta.persistence.Table;
|
||||||
@@ -21,6 +25,10 @@ import lombok.Setter;
|
|||||||
@Setter
|
@Setter
|
||||||
public class FeedEntryContent extends AbstractModel {
|
public class FeedEntryContent extends AbstractModel {
|
||||||
|
|
||||||
|
public enum Direction {
|
||||||
|
ltr, rtl, unknown
|
||||||
|
}
|
||||||
|
|
||||||
@Column(length = 2048)
|
@Column(length = 2048)
|
||||||
private String title;
|
private String title;
|
||||||
|
|
||||||
@@ -58,6 +66,10 @@ public class FeedEntryContent extends AbstractModel {
|
|||||||
@Column(length = 4096)
|
@Column(length = 4096)
|
||||||
private String categories;
|
private String categories;
|
||||||
|
|
||||||
|
@Column
|
||||||
|
@Enumerated(EnumType.STRING)
|
||||||
|
private Direction direction = Direction.unknown;
|
||||||
|
|
||||||
@OneToMany(mappedBy = "content")
|
@OneToMany(mappedBy = "content")
|
||||||
private Set<FeedEntry> entries;
|
private Set<FeedEntry> entries;
|
||||||
|
|
||||||
@@ -79,4 +91,14 @@ public class FeedEntryContent extends AbstractModel {
|
|||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isRTL() {
|
||||||
|
if (direction == Direction.rtl) {
|
||||||
|
return true;
|
||||||
|
} else if (direction == Direction.ltr) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
// detect on the fly for content that was inserted before the direction field was added
|
||||||
|
return FeedUtils.isRTL(title, content);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -47,6 +47,8 @@ public class FeedEntryContentService {
|
|||||||
entryContent.setContent(cleaningService.clean(content.content(), baseUrl, false));
|
entryContent.setContent(cleaningService.clean(content.content(), baseUrl, false));
|
||||||
entryContent.setAuthor(FeedUtils.truncate(cleaningService.clean(content.author(), baseUrl, true), 128));
|
entryContent.setAuthor(FeedUtils.truncate(cleaningService.clean(content.author(), baseUrl, true), 128));
|
||||||
entryContent.setCategories(FeedUtils.truncate(content.categories(), 4096));
|
entryContent.setCategories(FeedUtils.truncate(content.categories(), 4096));
|
||||||
|
entryContent.setDirection(
|
||||||
|
FeedUtils.isRTL(content.title(), content.content()) ? FeedEntryContent.Direction.rtl : FeedEntryContent.Direction.ltr);
|
||||||
|
|
||||||
Enclosure enclosure = content.enclosure();
|
Enclosure enclosure = content.enclosure();
|
||||||
if (enclosure != null) {
|
if (enclosure != null) {
|
||||||
|
|||||||
@@ -128,7 +128,7 @@ public class Entry implements Serializable {
|
|||||||
entry.setTags(status.getTags().stream().map(FeedEntryTag::getName).toList());
|
entry.setTags(status.getTags().stream().map(FeedEntryTag::getName).toList());
|
||||||
|
|
||||||
if (content != null) {
|
if (content != null) {
|
||||||
entry.setRtl(FeedUtils.isRTL(feedEntry));
|
entry.setRtl(content.isRTL());
|
||||||
entry.setTitle(content.getTitle());
|
entry.setTitle(content.getTitle());
|
||||||
entry.setContent(proxyImages ? FeedUtils.proxyImages(content.getContent()) : content.getContent());
|
entry.setContent(proxyImages ? FeedUtils.proxyImages(content.getContent()) : content.getContent());
|
||||||
entry.setAuthor(content.getAuthor());
|
entry.setAuthor(content.getAuthor());
|
||||||
|
|||||||
@@ -10,4 +10,10 @@
|
|||||||
</column>
|
</column>
|
||||||
</addColumn>
|
</addColumn>
|
||||||
</changeSet>
|
</changeSet>
|
||||||
|
|
||||||
|
<changeSet id="content-direction" author="athou">
|
||||||
|
<addColumn tableName="FEEDENTRYCONTENTS">
|
||||||
|
<column name="direction" type="varchar(16)" />
|
||||||
|
</addColumn>
|
||||||
|
</changeSet>
|
||||||
</databaseChangeLog>
|
</databaseChangeLog>
|
||||||
|
|||||||
Reference in New Issue
Block a user