fix: prevent HTML tags from being escaped in article descriptions

This commit is contained in:
radishzzz 2025-03-23 18:50:14 +00:00
parent 08438bd71a
commit cb9a212cc7
2 changed files with 13 additions and 10 deletions

View file

@ -62,6 +62,7 @@
"abbrlink", "abbrlink",
"antfu", "antfu",
"apiflash", "apiflash",
"apos",
"Artículos", "Artículos",
"astrodotbuild", "astrodotbuild",
"astrojs", "astrojs",

View file

@ -1,12 +1,12 @@
import type { CollectionEntry } from 'astro:content' import type { CollectionEntry } from 'astro:content'
import { defaultLocale } from '@/config' import { defaultLocale } from '@/config'
import MarkdownIt from 'markdown-it' import MarkdownIt from 'markdown-it'
import sanitizeHtml from 'sanitize-html'
const parser = new MarkdownIt()
type ExcerptScene = 'list' | 'meta' | 'og' | 'rss' type ExcerptScene = 'list' | 'meta' | 'og' | 'rss'
const parser = new MarkdownIt()
const isCJKLang = (lang: string) => ['zh', 'zh-tw', 'ja'].includes(lang)
// Excerpt length in different scenarios // Excerpt length in different scenarios
const EXCERPT_LENGTHS: Record<ExcerptScene, { const EXCERPT_LENGTHS: Record<ExcerptScene, {
cjk: number cjk: number
@ -30,8 +30,6 @@ const EXCERPT_LENGTHS: Record<ExcerptScene, {
}, },
} }
const isCJKLang = (lang: string) => ['zh', 'zh-tw', 'ja'].includes(lang)
// Generate an excerpt from Markdown content // Generate an excerpt from Markdown content
export function generateExcerpt( export function generateExcerpt(
content: string, content: string,
@ -45,11 +43,15 @@ export function generateExcerpt(
? EXCERPT_LENGTHS[scene].cjk ? EXCERPT_LENGTHS[scene].cjk
: EXCERPT_LENGTHS[scene].other : EXCERPT_LENGTHS[scene].other
// Convert Markdown to plain text // Remove all HTML tags and decode HTML entities
const plainText = sanitizeHtml(parser.render(content), { const plainText = parser.render(content)
allowedTags: [], .replace(/<[^>]*>/g, '')
allowedAttributes: {}, .replace(/&lt;/g, '<')
}) .replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&apos;/g, '\'')
.replace(/&nbsp;/g, ' ')
// Replace line breaks with spaces // Replace line breaks with spaces
const normalizedText = plainText.replace(/\s+/g, ' ') const normalizedText = plainText.replace(/\s+/g, ' ')