remove duplicate overview from TMDB detail hero, fix wikipedia edit text and entity decoding
This commit is contained in:
+28
-5
@@ -116,18 +116,20 @@ export async function wikipediaSection(
|
|||||||
*/
|
*/
|
||||||
function stripWikiHtml(raw: string): string {
|
function stripWikiHtml(raw: string): string {
|
||||||
if (!raw) return ''
|
if (!raw) return ''
|
||||||
return raw
|
let text = raw
|
||||||
// 1. Drop entirely-non-prose blocks. Each gets matched as a unit
|
// 1. Drop entirely-non-prose blocks. Each gets matched as a unit
|
||||||
// so their text content (heading words, edit-link text, etc.)
|
// so their text content (heading words, edit-link text, etc.)
|
||||||
// disappears with the wrapping tags.
|
// disappears with the wrapping tags.
|
||||||
.replace(/<style[\s\S]*?<\/style>/gi, '')
|
.replace(/<style[\s\S]*?<\/style>/gi, '')
|
||||||
.replace(/<script[\s\S]*?<\/script>/gi, '')
|
.replace(/<script[\s\S]*?<\/script>/gi, '')
|
||||||
.replace(/<sup[^>]*class="[^"]*reference[^"]*"[^>]*>[\s\S]*?<\/sup>/gi, '')
|
.replace(/<sup[^>]*class="[^"]*reference[^"]*"[^>]*>[\s\S]*?<\/sup>/gi, '')
|
||||||
.replace(/<span[^>]*class="[^"]*mw-editsection[^"]*"[^>]*>[\s\S]*?<\/span>/gi, '')
|
// Edit links: match both double and single quoted class attrs, and
|
||||||
|
// also catch the bracket wrappers that sometimes leak through.
|
||||||
|
.replace(/<span[^>]*class=["'][^"']*mw-editsection[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, '')
|
||||||
.replace(/<h[1-6][^>]*>[\s\S]*?<\/h[1-6]>/gi, '\n\n')
|
.replace(/<h[1-6][^>]*>[\s\S]*?<\/h[1-6]>/gi, '\n\n')
|
||||||
.replace(/<table[\s\S]*?<\/table>/gi, '')
|
.replace(/<table[\s\S]*?<\/table>/gi, '')
|
||||||
.replace(/<figure[\s\S]*?<\/figure>/gi, '')
|
.replace(/<figure[\s\S]*?<\/figure>/gi, '')
|
||||||
.replace(/<div[^>]*class="[^"]*(?:thumb|infobox|navbox|hatnote|reflist|mw-references|gallery)[^"]*"[\s\S]*?<\/div>/gi, '')
|
.replace(/<div[^>]*class=["'][^"']*(?:thumb|infobox|navbox|hatnote|reflist|mw-references|gallery)[^"']*["'][\s\S]*?<\/div>/gi, '')
|
||||||
// 2. Preserve paragraph + line breaks before flattening tags.
|
// 2. Preserve paragraph + line breaks before flattening tags.
|
||||||
.replace(/<\/p>/gi, '\n\n')
|
.replace(/<\/p>/gi, '\n\n')
|
||||||
.replace(/<br\s*\/?>/gi, '\n')
|
.replace(/<br\s*\/?>/gi, '\n')
|
||||||
@@ -139,15 +141,36 @@ function stripWikiHtml(raw: string): string {
|
|||||||
.replace(/\[\s*\d+\s*\]/g, '')
|
.replace(/\[\s*\d+\s*\]/g, '')
|
||||||
.replace(/\[\s*[a-z]\s*\]/g, '')
|
.replace(/\[\s*[a-z]\s*\]/g, '')
|
||||||
.replace(/\[\s*(citation needed|clarification needed|when\?|who\?|why\?)\s*\]/gi, '')
|
.replace(/\[\s*(citation needed|clarification needed|when\?|who\?|why\?)\s*\]/gi, '')
|
||||||
// 5. HTML entities.
|
// 5. Stray edit markers that leaked through (e.g. standalone
|
||||||
|
// brackets or "Edit" at the start of the text).
|
||||||
|
.replace(/^\s*\[?\s*edit\s*\]?\s*/i, '')
|
||||||
|
.replace(/\n\s*\[?\s*edit\s*\]?\s*/gi, '\n')
|
||||||
|
|
||||||
|
// 6. Decode ALL HTML entities (numeric, named, hex) via DOM so we
|
||||||
|
// don't have to maintain an exhaustive regex list.
|
||||||
|
if (typeof document !== 'undefined') {
|
||||||
|
const tmp = document.createElement('textarea')
|
||||||
|
tmp.innerHTML = text
|
||||||
|
text = tmp.value
|
||||||
|
} else {
|
||||||
|
// Fallback for SSR / test environments: cover the common ones.
|
||||||
|
text = text
|
||||||
.replace(/ /g, ' ')
|
.replace(/ /g, ' ')
|
||||||
.replace(/&/g, '&')
|
.replace(/&/g, '&')
|
||||||
.replace(/"/g, '"')
|
.replace(/"/g, '"')
|
||||||
|
.replace(/'/g, "'")
|
||||||
.replace(/'/g, "'")
|
.replace(/'/g, "'")
|
||||||
|
.replace(/'/g, "'")
|
||||||
|
.replace(/—/g, '-')
|
||||||
|
.replace(/’/g, "'")
|
||||||
|
.replace(/…/g, '...')
|
||||||
.replace(/</g, '<')
|
.replace(/</g, '<')
|
||||||
.replace(/>/g, '>')
|
.replace(/>/g, '>')
|
||||||
// 6. Whitespace cleanup. Collapse runs of spaces, trim stray
|
}
|
||||||
|
|
||||||
|
// 7. Whitespace cleanup. Collapse runs of spaces, trim stray
|
||||||
// indents around newlines, normalise blank-line gaps to one.
|
// indents around newlines, normalise blank-line gaps to one.
|
||||||
|
return text
|
||||||
.replace(/[ \t]+/g, ' ')
|
.replace(/[ \t]+/g, ' ')
|
||||||
.replace(/[ \t]*\n[ \t]*/g, '\n')
|
.replace(/[ \t]*\n[ \t]*/g, '\n')
|
||||||
.replace(/\n{3,}/g, '\n\n')
|
.replace(/\n{3,}/g, '\n\n')
|
||||||
|
|||||||
@@ -286,12 +286,6 @@ export default function TmdbDetailPage({ tmdbId, kind }: Props) {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{overview && (
|
|
||||||
<p className="text-[14px] text-white/85 leading-[1.65] mb-6 line-clamp-4 max-w-xl drop-shadow-sm">
|
|
||||||
{overview}
|
|
||||||
</p>
|
|
||||||
)}
|
|
||||||
|
|
||||||
<div className="flex items-center gap-2.5 flex-wrap">
|
<div className="flex items-center gap-2.5 flex-wrap">
|
||||||
<RequestButton tmdbId={tmdbId} kind={kind} tmdbData={data as any} />
|
<RequestButton tmdbId={tmdbId} kind={kind} tmdbData={data as any} />
|
||||||
{matchedLocal && (
|
{matchedLocal && (
|
||||||
|
|||||||
Reference in New Issue
Block a user