diff --git a/package-lock.json b/package-lock.json index fe0f06b..b440720 100644 --- a/package-lock.json +++ b/package-lock.json @@ -21,12 +21,14 @@ "marked": "12.0.0", "motion": "^12.29.2", "react": "^19.2.4", - "react-dom": "^19.2.4" + "react-dom": "^19.2.4", + "turndown": "^7.2.2" }, "devDependencies": { "@tailwindcss/postcss": "^4.1.18", "@tauri-apps/cli": "^2.9.6", "@types/node": "^22.14.0", + "@types/turndown": "^5.0.6", "@vitejs/plugin-react": "^5.0.0", "autoprefixer": "^10.4.23", "postcss": "^8.5.6", @@ -822,6 +824,12 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@mixmark-io/domino": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@mixmark-io/domino/-/domino-2.2.0.tgz", + "integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==", + "license": "BSD-2-Clause" + }, "node_modules/@rolldown/pluginutils": { "version": "1.0.0-beta.53", "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.53.tgz", @@ -1802,6 +1810,13 @@ "undici-types": "~6.21.0" } }, + "node_modules/@types/turndown": { + "version": "5.0.6", + "resolved": "https://registry.npmjs.org/@types/turndown/-/turndown-5.0.6.tgz", + "integrity": "sha512-ru00MoyeeouE5BX4gRL+6m/BsDfbRayOskWqUvh7CLGW+UXxHQItqALa38kKnOiZPqJrtzJUgAC2+F0rL1S4Pg==", + "dev": true, + "license": "MIT" + }, "node_modules/@vitejs/plugin-react": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-5.1.2.tgz", @@ -2890,6 +2905,15 @@ "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "license": "0BSD" }, + "node_modules/turndown": { + "version": "7.2.2", + "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.2.2.tgz", + "integrity": "sha512-1F7db8BiExOKxjSMU2b7if62D/XOyQyZbPKq/nUwopfgnHlqXHqQ0lvfUTeUIr1lZJzOPFn43dODyMSIfvWRKQ==", + "license": "MIT", + "dependencies": { + "@mixmark-io/domino": "^2.2.0" + } + }, "node_modules/typescript": { "version": "5.8.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz", diff --git a/package.json b/package.json index 2c4098d..7bc702d 100644 --- a/package.json +++ b/package.json @@ -18,21 +18,23 @@ "@tauri-apps/plugin-dialog": "^2.0.0", "@tauri-apps/plugin-fs": "^2.0.0", "@tauri-apps/plugin-http": "^2.5.6", + "@tauri-apps/plugin-opener": "^2.2.6", "@tauri-apps/plugin-shell": "^2.3.4", "@tauri-apps/plugin-store": "^2.4.2", "@tauri-apps/plugin-window-state": "^2.4.1", - "@tauri-apps/plugin-opener": "^2.2.6", "docx": "^8.5.0", "lucide-react": "^0.563.0", "marked": "12.0.0", "motion": "^12.29.2", "react": "^19.2.4", - "react-dom": "^19.2.4" + "react-dom": "^19.2.4", + "turndown": "^7.2.2" }, "devDependencies": { "@tailwindcss/postcss": "^4.1.18", "@tauri-apps/cli": "^2.9.6", "@types/node": "^22.14.0", + "@types/turndown": "^5.0.6", "@vitejs/plugin-react": "^5.0.0", "autoprefixer": "^10.4.23", "postcss": "^8.5.6", diff --git a/src-tauri/capabilities/default.json b/src-tauri/capabilities/default.json index 5797308..5e7e099 100644 --- a/src-tauri/capabilities/default.json +++ b/src-tauri/capabilities/default.json @@ -27,25 +27,23 @@ "fs:allow-exe-write", "store:default", "window-state:default", - "http:default", - "http:allow-fetch", "shell:default", "shell:allow-open", "opener:default", { - "identifier": "http:allow-fetch", + "identifier": "http:default", "allow": [ { - "url": "https://fonts.google.com/*" + "url": "https://*" }, { - "url": "https://github.com/*" + "url": "http://*" }, { - "url": "https://*.githubusercontent.com/*" + "url": "https://*:*" }, { - "url": "https://fonts.googleapis.com/*" + "url": "http://*:*" } ] }, diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index 467e0f5..3e6b493 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -28,7 +28,7 @@ } ], "security": { - "csp": "default-src 'self'; connect-src 'self' ipc: http://ipc.localhost https://fonts.googleapis.com https://fonts.gstatic.com https://github.com https://raw.githubusercontent.com https://fonts.google.com; font-src 'self' https://fonts.gstatic.com data:; style-src 'self' 'unsafe-inline' data: blob: https://fonts.googleapis.com; img-src 'self' data: blob:; script-src 'self' 'unsafe-inline'; frame-src 'self' blob: about:;", + "csp": "default-src 'self'; connect-src 'self' ipc: http://ipc.localhost https: http:; font-src 'self' https://fonts.gstatic.com data:; style-src 'self' 'unsafe-inline' data: blob: https://fonts.googleapis.com; img-src 'self' data: blob: https: http:; script-src 'self' 'unsafe-inline'; frame-src 'self' blob: about:;", "dangerousDisableAssetCspModification": true } }, diff --git a/src/App.tsx b/src/App.tsx index b5259d0..fd6ef18 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -10,7 +10,9 @@ import { useTemplates } from './hooks/useTemplates'; import { useDialog } from './hooks/useDialog'; // @ts-ignore import { parse } from 'marked'; -import { Sparkles, Loader2, FileType, Keyboard, X, RefreshCw } from 'lucide-react'; +import { Sparkles, Loader2, FileType, Keyboard, X, RefreshCw, AlertCircle } from 'lucide-react'; +import { detectContentType } from './utils/contentDetector'; +import { htmlToMarkdown } from './utils/htmlToMarkdown'; import { useKeyboardNavigation } from './hooks/useKeyboardNavigation'; @@ -92,6 +94,7 @@ const App: React.FC = () => { const [error, setError] = useState(null); const [showShortcuts, setShowShortcuts] = useState(false); const [statusMessage, setStatusMessage] = useState(''); + const [uploadError, setUploadError] = useState(null); const { uiZoom, setUiZoom, isLoaded } = useSettings(); const { templates, categories, isLoading: templatesLoading, error: templatesError, refresh, openFolder } = useTemplates(); @@ -135,9 +138,32 @@ const App: React.FC = () => { - const handleFileLoaded = (text: string, fileName: string = '') => { - setContent(text); - setInputFileName(fileName); + const handleFileLoaded = (text: string, fullFileName: string = '') => { + setUploadError(null); + + const ext = fullFileName.includes('.') + ? fullFileName.split('.').pop()?.toLowerCase() || '' + : ''; + const displayName = fullFileName.replace(/\.[^/.]+$/, '') || fullFileName; + + const detection = detectContentType(text, ext); + + if (detection.error) { + setUploadError(detection.error); + return; + } + + let processedContent = text; + if (detection.type === 'html') { + try { + processedContent = htmlToMarkdown(text); + } catch (err) { + console.error('HTML conversion failed:', err); + } + } + + setContent(processedContent); + setInputFileName(displayName); setAppState(AppState.CONFIG); }; @@ -174,6 +200,7 @@ const App: React.FC = () => { setGeneratedHtml(''); setSelectedStyle(null); setInputFileName(''); + setUploadError(null); }; const handleBackToConfig = () => { @@ -371,6 +398,17 @@ const App: React.FC = () => { + {uploadError && ( + + + )} )} diff --git a/src/components/ExportOptionsModal.tsx b/src/components/ExportOptionsModal.tsx index d82b541..563bd4f 100644 --- a/src/components/ExportOptionsModal.tsx +++ b/src/components/ExportOptionsModal.tsx @@ -23,7 +23,7 @@ export default function ExportOptionsModal({ isOpen, onClose, onExport }: Export ref={dialogRef} onClick={handleBackdropClick} aria-labelledby="export-title" - className="fixed inset-0 z-50 p-4" + className="fixed inset-0 z-50 p-4 m-0 w-full h-full border-none bg-black/50 flex items-center justify-center" >
= ({ onFileLoaded }) => { const handleFile = (file: File) => { setError(null); - if (!file.name.endsWith('.md') && !file.name.endsWith('.txt') && !file.name.endsWith('.markdown')) { - setError('Please upload a Markdown (.md) or Text (.txt) file.'); + const ext = file.name.split('.').pop()?.toLowerCase() || ''; + if (!['md', 'txt', 'markdown', 'html', 'htm'].includes(ext)) { + setError('Please upload a Markdown (.md), HTML (.html), or Text (.txt) file.'); return; } @@ -26,9 +27,7 @@ export const FileUpload: React.FC = ({ onFileLoaded }) => { reader.onload = (e) => { const text = e.target?.result; if (typeof text === 'string') { - // Extract filename without extension - const fileName = file.name.replace(/\.[^/.]+$/, ''); - onFileLoaded(text, fileName); + onFileLoaded(text, file.name); } }; reader.onerror = () => setError('Error reading file.'); @@ -132,7 +131,7 @@ export const FileUpload: React.FC = ({ onFileLoaded }) => { type="file" className="hidden" onChange={handleChange} - accept=".md,.txt,.markdown" + accept=".md,.txt,.markdown,.html,.htm" aria-label="Select file" /> @@ -172,7 +171,7 @@ export const FileUpload: React.FC = ({ onFileLoaded }) => { animate={{ opacity: 1 }} transition={{ delay: 0.7 }} > - Markdown or Plain Text files + Markdown, HTML, or Plain Text files = ({ const [showExportModal, setShowExportModal] = useState(false); const [focusedElement, setFocusedElement] = useState<'back' | 'fonts' | 'save'>('save'); const [exportError, setExportError] = useState(null); + const [missingFonts, setMissingFonts] = useState([]); + const [showFontWarning, setShowFontWarning] = useState(false); // Get current style from templates const style = templates.find(s => s.id === selectedStyleId) || templates[0] || null; - // Extract used fonts for display + // Extract used fonts for display (heading, body, and code) const usedFonts = style ? Array.from(new Set([ style.typography?.fonts?.heading || style.wordConfig?.heading1?.font || 'Arial', - style.typography?.fonts?.body || style.wordConfig?.body?.font || 'Arial' + style.typography?.fonts?.body || style.wordConfig?.body?.font || 'Arial', + style.typography?.fonts?.code || 'JetBrains Mono' ])).filter(Boolean) : []; useKeyboardNavigation({ @@ -165,6 +168,24 @@ export const Preview: React.FC = ({ }, []); const handleSave = async () => { + // Check if required fonts are installed using Local Font Access API + let missing: string[] = []; + try { + if ('queryLocalFonts' in window) { + const localFonts = await (window as any).queryLocalFonts(); + const installed = new Set(localFonts.map((f: any) => f.family)); + missing = usedFonts.filter(font => !installed.has(font)); + } + } catch { + // Permission denied or API unavailable - skip check + } + + if (missing.length > 0) { + setMissingFonts(missing); + setShowFontWarning(true); + return; + } + setShowExportModal(true); }; @@ -440,6 +461,48 @@ export const Preview: React.FC = ({
)} + + {showFontWarning && ( +
+ +
+
+ +
+

Missing Fonts

+
+

+ The following fonts are not installed on your system: +

+
    + {missingFonts.map(font => ( +
  • - {font}
  • + ))} +
+

+ Download and install them using the font buttons at the top of the page before opening the exported document. +

+
+ + +
+
+
+ )} ); }; diff --git a/src/components/StyleSelector.tsx b/src/components/StyleSelector.tsx index 689e764..3ee2282 100644 --- a/src/components/StyleSelector.tsx +++ b/src/components/StyleSelector.tsx @@ -229,6 +229,22 @@ export const StyleSelector: React.FC = ({
${SAMPLE_CONTENT}
+ `; @@ -384,6 +400,7 @@ export const StyleSelector: React.FC = ({ role="listbox" aria-label="Typography styles" aria-activedescendant={selectedStyle ? `style-${selectedStyle}` : undefined} + className="space-y-2" > {filteredStyles.length === 0 ? (
diff --git a/src/utils/contentDetector.ts b/src/utils/contentDetector.ts new file mode 100644 index 0000000..bd7ec4b --- /dev/null +++ b/src/utils/contentDetector.ts @@ -0,0 +1,136 @@ +export type ContentType = 'html' | 'markdown' | 'text'; + +export interface DetectionResult { + type: ContentType; + error?: string; + detectedFormat?: string; +} + +const BINARY_SIGNATURES: [string, string][] = [ + ['%PDF', 'PDF document'], + ['PK', 'Word document or ZIP archive'], + ['\x89PNG', 'PNG image'], + ['\xFF\xD8', 'JPEG image'], + ['GIF8', 'GIF image'], + ['RIFF', 'media file'], + ['Rar!', 'RAR archive'], +]; + +function detectBinaryFormat(content: string): string | null { + if (content.includes('\0')) { + for (const [sig, name] of BINARY_SIGNATURES) { + if (content.startsWith(sig)) return name; + } + return 'binary file'; + } + + let nonPrintable = 0; + const len = Math.min(content.length, 512); + for (let i = 0; i < len; i++) { + const code = content.charCodeAt(i); + if (code < 32 && code !== 9 && code !== 10 && code !== 13) { + nonPrintable++; + } + } + return nonPrintable / len > 0.1 ? 'binary file' : null; +} + +function stripCodeBlocks(content: string): string { + return content.replace(/```[\s\S]*?```/g, ''); +} + +const STRUCTURAL_TAG_RE = /<(div|p|table|tr|td|th|thead|tbody|tfoot|ul|ol|li|h[1-6]|section|article|header|footer|nav|main|aside|form|blockquote|pre|dl|dt|dd|figure|figcaption|hr)\b[^>]*>/gi; +const INLINE_TAG_RE = /<(span|b|i|u|strong|em|a|img|br|code|sub|sup|small|mark|del|ins|s|abbr)\b[^>]*>/gi; + +function countStructuralTags(content: string): number { + return (content.match(STRUCTURAL_TAG_RE) || []).length; +} + +function countInlineTags(content: string): number { + return (content.match(INLINE_TAG_RE) || []).length; +} + +function countMarkdownSyntax(content: string): number { + let score = 0; + const lines = content.split('\n'); + + for (const line of lines) { + const t = line.trim(); + if (/^#{1,6}\s/.test(t)) score += 3; + if (/^[-*+]\s/.test(t)) score += 2; + if (/^\d+\.\s/.test(t)) score += 2; + if (/^>\s/.test(t)) score += 2; + if (/^(---|\*\*\*|___)$/.test(t)) score += 2; + if (/^```/.test(t)) score += 3; + } + + const sample = content.substring(0, 5000); + score += (sample.match(/\*\*[^*]+\*\*/g) || []).length; + score += (sample.match(/\[([^\]]+)\]\(([^)]+)\)/g) || []).length * 2; + score += (sample.match(/!\[([^\]]*)\]\(([^)]+)\)/g) || []).length * 2; + + return score; +} + +export function detectContentType(content: string, extension: string): DetectionResult { + if (!content || !content.trim()) { + return { type: 'text' }; + } + + const binaryFormat = detectBinaryFormat(content); + if (binaryFormat) { + return { + type: 'text', + error: `This appears to be a ${binaryFormat}. TypoGenie accepts Markdown, HTML, and plain text files.`, + detectedFormat: binaryFormat, + }; + } + + // Full HTML document detection + const trimmed = content.trimStart().toLowerCase(); + if (trimmed.startsWith('= 3 && mdScore >= 5) { + return { type: 'markdown' }; + } + + // Strong HTML signal + if (structural >= 3) { + return { type: 'html' }; + } + + // Moderate HTML: few structural tags but heavy inline tags (Blogger/Google Docs style) + if (structural >= 1 && inline >= 10) { + return { type: 'html' }; + } + + // Strong markdown signal + if (mdScore >= 3) { + return { type: 'markdown' }; + } + + // Weak HTML with no markdown at all + if (structural >= 1 && mdScore === 0) { + return { type: 'html' }; + } + + // Extension as tiebreaker + if (extension === 'html' || extension === 'htm') { + return { type: 'html' }; + } + if (extension === 'md' || extension === 'markdown') { + return { type: 'markdown' }; + } + + return { type: 'text' }; +} diff --git a/src/utils/docxConverter.ts b/src/utils/docxConverter.ts index 1343e28..c5efda8 100644 --- a/src/utils/docxConverter.ts +++ b/src/utils/docxConverter.ts @@ -3,10 +3,11 @@ import { Document, Paragraph, TextRun, AlignmentType, HeadingLevel, BorderStyle, UnderlineType, ShadingType, LevelFormat, Packer, Table, TableCell, TableRow, WidthType, VerticalAlign, - ExternalHyperlink, TableBorders + ExternalHyperlink, TableBorders, ImageRun } from 'docx'; import { DocxStyleConfig, PaperSize, TemplateElementStyle } from '../types'; import { resolveColor, resolveFont } from '../services/templateRenderer'; +import { fetch as tauriFetch } from '@tauri-apps/plugin-http'; const pt = (points: number) => points * 2; const inchesToTwips = (inches: number) => Math.round(inches * 1440); @@ -185,6 +186,72 @@ export const generateDocxDocument = async ( const parser = new DOMParser(); const doc = parser.parseFromString(htmlContent, 'text/html'); + // Pre-fetch all images for embedding + const imageCache = new Map(); + const imgElements = doc.querySelectorAll('img'); + for (const img of Array.from(imgElements)) { + const src = img.getAttribute('src'); + if (!src || src.startsWith('data:')) continue; + + // Get dimensions from HTML attributes first + const htmlW = parseInt(img.getAttribute('data-original-width') || img.getAttribute('width') || '0'); + const htmlH = parseInt(img.getAttribute('data-original-height') || img.getAttribute('height') || '0'); + + try { + // Step 1: Fetch the image bytes + let data: Uint8Array | null = null; + + // Try Tauri HTTP plugin + try { + const resp = await tauriFetch(src, { method: 'GET' }); + if (resp.ok) { + data = new Uint8Array(await resp.arrayBuffer()); + } + } catch (e1) { + console.warn('tauriFetch failed, trying standard fetch:', e1); + } + + // Fallback to standard fetch + if (!data) { + try { + const resp = await globalThis.fetch(src, { mode: 'no-cors' }); + // no-cors gives opaque response, try cors mode + const resp2 = await globalThis.fetch(src); + if (resp2.ok) { + data = new Uint8Array(await resp2.arrayBuffer()); + } + } catch (e2) { + console.warn('Standard fetch also failed:', e2); + } + } + + if (!data || data.length === 0) { + console.warn('No image data received for:', src); + continue; + } + + // Step 2: Determine dimensions + let width = htmlW; + let height = htmlH; + if (!width || !height) { + try { + const bitmap = await createImageBitmap(new Blob([data])); + width = bitmap.width; + height = bitmap.height; + bitmap.close(); + } catch { + width = width || 600; + height = height || 400; + } + } + + imageCache.set(src, { data, width, height }); + console.log('Image cached:', src.substring(0, 60), width, 'x', height, data.length, 'bytes'); + } catch (err) { + console.warn('Image embed failed for:', src, err); + } + } + const children: (Paragraph | Table)[] = []; // Track separate ordered lists for independent numbering @@ -284,6 +351,26 @@ export const generateDocxDocument = async ( return elementConfig?.allCaps || false; }; + // Create an ImageRun from a cached image, scaled to fit the page + // Note: docx library transformation uses PIXELS (it converts to EMU internally) + const createInlineImageRun = (src: string): ImageRun | null => { + const cached = imageCache.get(src); + if (!cached) return null; + let width = cached.width; + let height = cached.height; + // Max width in pixels at 96 DPI + const pageWidthTwips = paperSize === 'A4' ? mmToTwips(210) : inchesToTwips(8.5); + const leftMargin = (options.page?.margins?.left || 72) * 20; + const rightMargin = (options.page?.margins?.right || 72) * 20; + const maxWidthPx = ((pageWidthTwips - leftMargin - rightMargin) / 1440) * 96; + if (width > maxWidthPx) { + const scale = maxWidthPx / width; + width = Math.round(maxWidthPx); + height = Math.round(height * scale); + } + return new ImageRun({ data: cached.data, transformation: { width, height } }); + }; + // Process text runs with support for links and formatting const processTextRuns = (element: HTMLElement, baseFormatting: any = {}, elementType?: string): (TextRun | ExternalHyperlink)[] => { const runs: (TextRun | ExternalHyperlink)[] = []; @@ -360,6 +447,14 @@ export const generateDocxDocument = async ( } else if (node.nodeType === Node.ELEMENT_NODE) { const childEl = node as HTMLElement; const childTag = childEl.tagName.toLowerCase(); + if (childTag === 'img') { + const imgSrc = childEl.getAttribute('src'); + if (imgSrc) { + const imgRun = createInlineImageRun(imgSrc); + if (imgRun) linkRuns.push(imgRun as any); + } + return; + } const childFmt = { ...fmt }; if (childTag === 'strong' || childTag === 'b') childFmt.bold = true; if (childTag === 'em' || childTag === 'i') childFmt.italics = true; @@ -498,6 +593,14 @@ export const generateDocxDocument = async ( } else if (node.nodeType === Node.ELEMENT_NODE) { const childEl = node as HTMLElement; const childTag = childEl.tagName.toLowerCase(); + if (childTag === 'img') { + const imgSrc = childEl.getAttribute('src'); + if (imgSrc) { + const imgRun = createInlineImageRun(imgSrc); + if (imgRun) linkRuns.push(imgRun as any); + } + return; + } const childFmt = { ...fmt }; if (childTag === 'strong' || childTag === 'b') childFmt.bold = true; if (childTag === 'em' || childTag === 'i') childFmt.italics = true; @@ -522,6 +625,16 @@ export const generateDocxDocument = async ( } } + // Handle standalone images in text runs + if (tag === 'img') { + const imgSrc = el.getAttribute('src'); + if (imgSrc) { + const imgRun = createInlineImageRun(imgSrc); + if (imgRun) runs.push(imgRun as any); + } + return; + } + const style = el.getAttribute('style') || ''; const colorMatch = style.match(/color:\s*#?([a-fA-F0-9]{6})/); if (colorMatch) fmt.color = colorMatch[1]; @@ -560,7 +673,12 @@ export const generateDocxDocument = async ( } })); - const cellBorders: any = {}; + const cellBorders: any = { + top: { style: BorderStyle.NIL, size: 0, color: '000000' }, + bottom: { style: BorderStyle.NIL, size: 0, color: '000000' }, + left: { style: BorderStyle.NIL, size: 0, color: '000000' }, + right: { style: BorderStyle.NIL, size: 0, color: '000000' } + }; if (cfg?.border) { const b = { color: resolveColorToHex(cfg.border.color) || '000000', style: mapBorderStyle(cfg.border.style), size: cfg.border.width * 8 }; cellBorders.top = b; @@ -1145,9 +1263,13 @@ export const generateDocxDocument = async ( }); } + // Center paragraphs that only contain an image + const isImageOnly = el.querySelector('img') !== null && !el.textContent?.trim(); + results.push(new Paragraph({ children: runs.length > 0 ? runs : [new TextRun({ text: el.textContent || '' })], - alignment: mapAlignment(body.align), + alignment: isImageOnly ? AlignmentType.CENTER : mapAlignment(body.align), + indent: isImageOnly ? undefined : (elements?.p?.indent ? { firstLine: elements.p.indent * 20 } : undefined), spacing: { before: (body.spacing?.before || 0) * 20, after: (body.spacing?.after || 0) * 20, @@ -1203,20 +1325,33 @@ export const generateDocxDocument = async ( return results; } - // Images - produce accessible placeholder text + // Images - embed if fetched, otherwise placeholder if (tag === 'img') { - const alt = el.getAttribute('alt') || ''; - const placeholderText = alt ? `[Image: ${alt}]` : '[Image]'; - results.push(new Paragraph({ - children: [new TextRun({ - text: placeholderText, - font: body.font, - size: pt(body.size), - color: formatColor(resolveColorToHex(body.color) || '666666'), - italics: true, - })], - spacing: { before: 120, after: 120 }, - })); + const src = el.getAttribute('src'); + const cached = src ? imageCache.get(src) : null; + + if (cached) { + const imgRun = createInlineImageRun(src); + if (imgRun) { + results.push(new Paragraph({ + children: [imgRun], + spacing: { before: 120, after: 120 }, + })); + } + } else { + const alt = el.getAttribute('alt') || ''; + const placeholderText = alt ? `[Image: ${alt}]` : '[Image]'; + results.push(new Paragraph({ + children: [new TextRun({ + text: placeholderText, + font: body.font, + size: pt(body.size), + color: formatColor(resolveColorToHex(body.color) || '666666'), + italics: true, + })], + spacing: { before: 120, after: 120 }, + })); + } return results; } diff --git a/src/utils/htmlToMarkdown.ts b/src/utils/htmlToMarkdown.ts new file mode 100644 index 0000000..7a37c2f --- /dev/null +++ b/src/utils/htmlToMarkdown.ts @@ -0,0 +1,38 @@ +import TurndownService from 'turndown'; + +export function htmlToMarkdown(html: string): string { + const turndown = new TurndownService({ + headingStyle: 'atx', + hr: '---', + bulletListMarker: '-', + codeBlockStyle: 'fenced', + emDelimiter: '*', + strongDelimiter: '**', + }); + + // Strip meaningless wrapper spans (Blogger, Google Docs, Word paste, etc.) + turndown.addRule('stripDecorativeSpans', { + filter: (node) => { + if (node.nodeName !== 'SPAN') return false; + const style = node.getAttribute('style') || ''; + if (!style) return true; + const meaningless = /font-family:\s*inherit|font-size:\s*(medium|inherit)|font-weight:\s*normal|color:\s*(black|inherit)/i; + const props = style.split(';').map(p => p.trim()).filter(Boolean); + return props.length > 0 && props.every(p => meaningless.test(p)); + }, + replacement: (content) => content, + }); + + let markdown = turndown.turndown(html); + + // Clean up excessive blank lines + markdown = markdown.replace(/\n{3,}/g, '\n\n'); + // Convert non-breaking spaces to   entities (NOT regular spaces) + // Regular spaces would trigger markdown code block detection at 4+ indent + //   entities pass through Marked.js as HTML and render as visible spaces + markdown = markdown.replace(/\u00A0/g, ' '); + // Clean up trailing whitespace on lines + markdown = markdown.replace(/[ \t]+$/gm, ''); + + return markdown.trim(); +}