diff --git a/src/main/java/run/halo/app/model/entity/BasePost.java b/src/main/java/run/halo/app/model/entity/BasePost.java index 94a480fc25..de485fe498 100644 --- a/src/main/java/run/halo/app/model/entity/BasePost.java +++ b/src/main/java/run/halo/app/model/entity/BasePost.java @@ -143,7 +143,7 @@ public class BasePost extends BaseEntity { private Integer topPriority; /** - * Likes + * Likes. */ @Column(name = "likes") @ColumnDefault("0") @@ -169,7 +169,7 @@ public class BasePost extends BaseEntity { private String metaDescription; /** - * Content word count + * Content word count. */ @Column(name = "word_count") @ColumnDefault("0") @@ -188,6 +188,7 @@ public class BasePost extends BaseEntity { @Transient private PatchedContent content; + @Override public void prePersist() { super.prePersist(); @@ -243,6 +244,7 @@ public void prePersist() { if (version == null || version < 0) { version = 1; } + // Clear the value of the deprecated attributes this.originalContent = ""; this.formatContent = ""; diff --git a/src/main/java/run/halo/app/service/impl/BasePostServiceImpl.java b/src/main/java/run/halo/app/service/impl/BasePostServiceImpl.java index 157d13316d..5e148a2c08 100644 --- a/src/main/java/run/halo/app/service/impl/BasePostServiceImpl.java +++ b/src/main/java/run/halo/app/service/impl/BasePostServiceImpl.java @@ -64,6 +64,10 @@ public abstract class BasePostServiceImpl private static final Pattern BLANK_PATTERN = Pattern.compile("\\s"); + private static final String CHINESE_REGEX = "[^\\x00-\\xff]"; + + private static final String PUNCTUATION_REGEX = "[\\p{P}\\p{S}\\p{Z}\\s]+"; + public BasePostServiceImpl(BasePostRepository basePostRepository, OptionService optionService, ContentService contentService, @@ -301,7 +305,6 @@ public POST createOrUpdateBy(POST post) { PatchedContent postContent = post.getContent(); // word count stat post.setWordCount(htmlFormatWordCount(postContent.getContent())); - POST savedPost; // Create or update post if (ServiceUtils.isEmptyId(post.getId())) { @@ -484,7 +487,7 @@ protected void generateAndSetSummaryIfAbsent(POST } } - // CS304 issue link : https://github.com/halo-dev/halo/issues/1224 + // CS304 issue link : https://github.com/halo-dev/halo/issues/1759 /** * @param htmlContent the markdown style content @@ -498,6 +501,39 @@ public static long htmlFormatWordCount(String htmlContent) { String cleanContent = HaloUtils.cleanHtmlTag(htmlContent); + String tempString = cleanContent.replaceAll(CHINESE_REGEX, ""); + + String otherString = cleanContent.replaceAll(CHINESE_REGEX, " "); + + int chineseWordCount = cleanContent.length() - tempString.length(); + + String[] otherWords = otherString.split(PUNCTUATION_REGEX); + + int otherWordLength = otherWords.length; + + if (otherWordLength > 0 && otherWords[0].length() == 0) { + otherWordLength--; + } + + if (otherWords.length > 1 && otherWords[otherWords.length - 1].length() == 0) { + otherWordLength--; + } + + return chineseWordCount + otherWordLength; + } + + /** + * @param htmlContent the markdown style content + * @return character count except space and line separator + */ + + public static long htmlFormatCharacterCount(String htmlContent) { + if (htmlContent == null) { + return 0; + } + + String cleanContent = HaloUtils.cleanHtmlTag(htmlContent); + Matcher matcher = BLANK_PATTERN.matcher(cleanContent); int count = 0; diff --git a/src/test/java/run/halo/app/service/impl/HTMLWordCountTest.java b/src/test/java/run/halo/app/service/impl/HTMLWordCountTest.java index 6d6d7d50e5..3624663914 100644 --- a/src/test/java/run/halo/app/service/impl/HTMLWordCountTest.java +++ b/src/test/java/run/halo/app/service/impl/HTMLWordCountTest.java @@ -59,6 +59,20 @@ public class HTMLWordCountTest { String emptyString = ""; + String englishString = "I have a red apple"; + + String hybridString = "I have a red apple哈哈"; + + + String complexText2 = "Hi,Jessica!这个project的schedule有些问题。"; + + String complexText3 = "The company had a meeting yesterday。Why did you ask for leave?"; + + String complexText4 = "这是一个句子,但是只有中文。"; + + String complexText5 = + "The wind and the moon are all beautiful, love and hate are all romantic."; + @Test void pictureTest() { assertEquals("图片字数测试".length(), @@ -128,4 +142,42 @@ void emptyTest() { assertEquals(0, BasePostServiceImpl.htmlFormatWordCount(MarkdownUtils.renderHtml(emptyString))); } -} + + @Test + void englishTest() { + assertEquals(5, + BasePostServiceImpl.htmlFormatWordCount(MarkdownUtils.renderHtml(englishString))); + } + + @Test + void hybridTest() { + assertEquals(7, + BasePostServiceImpl.htmlFormatWordCount(MarkdownUtils.renderHtml(hybridString))); + } + + @Test + void englishCharacterTest() { + assertEquals(14, + BasePostServiceImpl.htmlFormatCharacterCount(MarkdownUtils.renderHtml(englishString))); + } + + @Test + void hybridCharacterTest() { + assertEquals(16, + BasePostServiceImpl.htmlFormatCharacterCount(MarkdownUtils.renderHtml(hybridString))); + } + + @Test + void moreComplexTest() { + assertEquals(14, + BasePostServiceImpl.htmlFormatWordCount(MarkdownUtils.renderHtml(complexText2))); + assertEquals(14, + BasePostServiceImpl.htmlFormatWordCount(MarkdownUtils.renderHtml(complexText3))); + assertEquals(14, + BasePostServiceImpl.htmlFormatWordCount(MarkdownUtils.renderHtml(complexText4))); + assertEquals(14, + BasePostServiceImpl.htmlFormatWordCount(MarkdownUtils.renderHtml(complexText5))); + } + + +} \ No newline at end of file