remove duplicate overview from TMDB detail hero, fix wikipedia edit text and entity decoding

This commit is contained in:
2026-04-18 09:15:57 +03:00
parent 48cef31467
commit a7e36478a0
2 changed files with 35 additions and 18 deletions
+28 -5
View File
@@ -116,18 +116,20 @@ export async function wikipediaSection(
*/ */
function stripWikiHtml(raw: string): string { function stripWikiHtml(raw: string): string {
if (!raw) return '' if (!raw) return ''
return raw let text = raw
// 1. Drop entirely-non-prose blocks. Each gets matched as a unit // 1. Drop entirely-non-prose blocks. Each gets matched as a unit
// so their text content (heading words, edit-link text, etc.) // so their text content (heading words, edit-link text, etc.)
// disappears with the wrapping tags. // disappears with the wrapping tags.
.replace(/<style[\s\S]*?<\/style>/gi, '') .replace(/<style[\s\S]*?<\/style>/gi, '')
.replace(/<script[\s\S]*?<\/script>/gi, '') .replace(/<script[\s\S]*?<\/script>/gi, '')
.replace(/<sup[^>]*class="[^"]*reference[^"]*"[^>]*>[\s\S]*?<\/sup>/gi, '') .replace(/<sup[^>]*class="[^"]*reference[^"]*"[^>]*>[\s\S]*?<\/sup>/gi, '')
.replace(/<span[^>]*class="[^"]*mw-editsection[^"]*"[^>]*>[\s\S]*?<\/span>/gi, '') // Edit links: match both double and single quoted class attrs, and
// also catch the bracket wrappers that sometimes leak through.
.replace(/<span[^>]*class=["'][^"']*mw-editsection[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, '')
.replace(/<h[1-6][^>]*>[\s\S]*?<\/h[1-6]>/gi, '\n\n') .replace(/<h[1-6][^>]*>[\s\S]*?<\/h[1-6]>/gi, '\n\n')
.replace(/<table[\s\S]*?<\/table>/gi, '') .replace(/<table[\s\S]*?<\/table>/gi, '')
.replace(/<figure[\s\S]*?<\/figure>/gi, '') .replace(/<figure[\s\S]*?<\/figure>/gi, '')
.replace(/<div[^>]*class="[^"]*(?:thumb|infobox|navbox|hatnote|reflist|mw-references|gallery)[^"]*"[\s\S]*?<\/div>/gi, '') .replace(/<div[^>]*class=["'][^"']*(?:thumb|infobox|navbox|hatnote|reflist|mw-references|gallery)[^"']*["'][\s\S]*?<\/div>/gi, '')
// 2. Preserve paragraph + line breaks before flattening tags. // 2. Preserve paragraph + line breaks before flattening tags.
.replace(/<\/p>/gi, '\n\n') .replace(/<\/p>/gi, '\n\n')
.replace(/<br\s*\/?>/gi, '\n') .replace(/<br\s*\/?>/gi, '\n')
@@ -139,15 +141,36 @@ function stripWikiHtml(raw: string): string {
.replace(/\[\s*\d+\s*\]/g, '') .replace(/\[\s*\d+\s*\]/g, '')
.replace(/\[\s*[a-z]\s*\]/g, '') .replace(/\[\s*[a-z]\s*\]/g, '')
.replace(/\[\s*(citation needed|clarification needed|when\?|who\?|why\?)\s*\]/gi, '') .replace(/\[\s*(citation needed|clarification needed|when\?|who\?|why\?)\s*\]/gi, '')
// 5. HTML entities. // 5. Stray edit markers that leaked through (e.g. standalone
// brackets or "Edit" at the start of the text).
.replace(/^\s*\[?\s*edit\s*\]?\s*/i, '')
.replace(/\n\s*\[?\s*edit\s*\]?\s*/gi, '\n')
// 6. Decode ALL HTML entities (numeric, named, hex) via DOM so we
// don't have to maintain an exhaustive regex list.
if (typeof document !== 'undefined') {
const tmp = document.createElement('textarea')
tmp.innerHTML = text
text = tmp.value
} else {
// Fallback for SSR / test environments: cover the common ones.
text = text
.replace(/&nbsp;/g, ' ') .replace(/&nbsp;/g, ' ')
.replace(/&amp;/g, '&') .replace(/&amp;/g, '&')
.replace(/&quot;/g, '"') .replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&#39;/g, "'") .replace(/&#39;/g, "'")
.replace(/&#039;/g, "'")
.replace(/&#8212;/g, '-')
.replace(/&#8217;/g, "'")
.replace(/&#8230;/g, '...')
.replace(/&lt;/g, '<') .replace(/&lt;/g, '<')
.replace(/&gt;/g, '>') .replace(/&gt;/g, '>')
// 6. Whitespace cleanup. Collapse runs of spaces, trim stray }
// 7. Whitespace cleanup. Collapse runs of spaces, trim stray
// indents around newlines, normalise blank-line gaps to one. // indents around newlines, normalise blank-line gaps to one.
return text
.replace(/[ \t]+/g, ' ') .replace(/[ \t]+/g, ' ')
.replace(/[ \t]*\n[ \t]*/g, '\n') .replace(/[ \t]*\n[ \t]*/g, '\n')
.replace(/\n{3,}/g, '\n\n') .replace(/\n{3,}/g, '\n\n')
-6
View File
@@ -286,12 +286,6 @@ export default function TmdbDetailPage({ tmdbId, kind }: Props) {
</div> </div>
)} )}
{overview && (
<p className="text-[14px] text-white/85 leading-[1.65] mb-6 line-clamp-4 max-w-xl drop-shadow-sm">
{overview}
</p>
)}
<div className="flex items-center gap-2.5 flex-wrap"> <div className="flex items-center gap-2.5 flex-wrap">
<RequestButton tmdbId={tmdbId} kind={kind} tmdbData={data as any} /> <RequestButton tmdbId={tmdbId} kind={kind} tmdbData={data as any} />
{matchedLocal && ( {matchedLocal && (