import type { CollectionEntry } from 'astro:content' import MarkdownIt from 'markdown-it' import { defaultLocale } from '@/config' type ExcerptScene = 'list' | 'meta' | 'og' | 'feed' const parser = new MarkdownIt() const isCJKLang = (lang: string) => ['zh', 'zh-tw', 'ja'].includes(lang) // Excerpt length in different scenarios const EXCERPT_LENGTHS: Record = { list: { cjk: 120, other: 240, }, meta: { cjk: 120, other: 240, }, og: { cjk: 70, other: 140, }, feed: { cjk: 70, other: 140, }, } const HTML_ENTITIES: Record = { '<': '<', '>': '>', '&': '&', '"': '"', ''': '\'', ' ': ' ', } // Generate an excerpt from Markdown content export function generateExcerpt( content: string, scene: ExcerptScene, lang: string, ): string { if (!content) return '' // Remove Markdown headings const contentWithoutHeadings = content .replace(/^#{1,6}\s+\S.*$/gm, '') .replace(/\n{2,}/g, '\n\n') const length = isCJKLang(lang) ? EXCERPT_LENGTHS[scene].cjk : EXCERPT_LENGTHS[scene].other // Remove all HTML tags let plainText = parser.render(contentWithoutHeadings) .replace(/<[^>]*>/g, '') // Decode HTML entities using the mapping table Object.entries(HTML_ENTITIES).forEach(([entity, char]) => { plainText = plainText.replace(new RegExp(entity, 'g'), char) }) // Replace line breaks with spaces const normalizedText = plainText.replace(/\s+/g, ' ') // Remove spaces after CJK punctuation marks .replace(/([。?!:"」』])\s+/g, '$1') const excerpt = normalizedText.slice(0, length).trim() // Remove trailing punctuation from the excerpt if (normalizedText.length > length) return `${excerpt.replace(/\p{P}+$/u, '')}...` return excerpt } // Automatically Generate article description export function generateDescription( post: CollectionEntry<'posts'>, scene: ExcerptScene, ): string { // Prioritize existing description if (post.data.description) return post.data.description const lang = (!post.data.lang || post.data.lang === '') ? defaultLocale : post.data.lang return generateExcerpt(post.body || '', scene, lang) }