chore: improve article auto-excerpt, allow inline code to wrap

This commit is contained in:
radishzzz 2025-04-15 16:53:29 +01:00
parent dfc30d7f85
commit 018f1c9b6c
11 changed files with 230 additions and 58 deletions

View file

@ -30,6 +30,15 @@ const EXCERPT_LENGTHS: Record<ExcerptScene, {
},
}
const HTML_ENTITIES: Record<string, string> = {
'&lt;': '<',
'&gt;': '>',
'&amp;': '&',
'&quot;': '"',
'&apos;': '\'',
'&nbsp;': ' ',
}
// Generate an excerpt from Markdown content
export function generateExcerpt(
content: string,
@ -39,33 +48,41 @@ export function generateExcerpt(
if (!content)
return ''
// Remove Markdown headings
const contentWithoutHeadings = content
.replace(/^#{1,6}\s+\S.*$/gm, '')
.replace(/\n{2,}/g, '\n\n')
const length = isCJKLang(lang)
? EXCERPT_LENGTHS[scene].cjk
: EXCERPT_LENGTHS[scene].other
// Remove all HTML tags and decode HTML entities
const plainText = parser.render(content)
// Remove all HTML tags
let plainText = parser.render(contentWithoutHeadings)
.replace(/<[^>]*>/g, '')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&apos;/g, '\'')
.replace(/&nbsp;/g, ' ')
// Decode HTML entities using the mapping table
Object.entries(HTML_ENTITIES).forEach(([entity, char]) => {
plainText = plainText.replace(new RegExp(entity, 'g'), char)
})
// Replace line breaks with spaces
const normalizedText = plainText.replace(/\s+/g, ' ')
// Remove spaces after CJK punctuation marks
.replace(/([。?!:"」』])\s+/g, '$1')
const excerpt = normalizedText.slice(0, length).trim()
// Add ellipsis if text was truncated
return normalizedText.length > length ? `${excerpt}...` : excerpt
// Remove trailing punctuation from the excerpt
if (normalizedText.length > length)
return `${excerpt.replace(/\p{P}+$/u, '')}...`
return excerpt
}
// Automatically generate a description for the article
// Automatically Generate article description
export function generateDescription(
post: CollectionEntry<'posts'>,
scene: ExcerptScene,
): string {
// If the article already has a description, return it directly
// Prioritize existing description
if (post.data.description)
return post.data.description