diff --git a/README.md b/README.md index 4fc17cb..94d0149 100644 --- a/README.md +++ b/README.md @@ -228,7 +228,7 @@ This means all 165+ styles automatically meet WCAG AAA contrast requirements reg **TypoGenie is fully portable** - no installation, no registry entries, no files scattered across your system. Just download and run: -- 🪟 **Windows**: [`TypoGenie-v1.2.0-Portable.zip`](https://git.lashman.live/lashman/typogenie/releases) - Extract and run, nothing else needed +- 🪟 **Windows**: [`TypoGenie-v1.2.1-Portable.zip`](https://git.lashman.live/lashman/typogenie/releases) - Extract and run, nothing else needed **How it works:** ``` diff --git a/package-lock.json b/package-lock.json index b440720..058c315 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "typogenie", - "version": "1.0.0", + "version": "1.2.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "typogenie", - "version": "1.0.0", + "version": "1.2.0", "dependencies": { "@tauri-apps/api": "^2.0.0", "@tauri-apps/plugin-dialog": "^2.0.0", @@ -22,7 +22,8 @@ "motion": "^12.29.2", "react": "^19.2.4", "react-dom": "^19.2.4", - "turndown": "^7.2.2" + "turndown": "^7.2.2", + "turndown-plugin-gfm": "^1.0.2" }, "devDependencies": { "@tailwindcss/postcss": "^4.1.18", @@ -2914,6 +2915,12 @@ "@mixmark-io/domino": "^2.2.0" } }, + "node_modules/turndown-plugin-gfm": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz", + "integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg==", + "license": "MIT" + }, "node_modules/typescript": { "version": "5.8.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz", diff --git a/package.json b/package.json index ebbe0b7..4f486c1 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "typogenie", "private": true, - "version": "1.2.0", + "version": "1.2.1", "type": "module", "scripts": { "dev": "vite", @@ -28,7 +28,8 @@ "motion": "^12.29.2", "react": "^19.2.4", "react-dom": "^19.2.4", - "turndown": "^7.2.2" + "turndown": "^7.2.2", + "turndown-plugin-gfm": "^1.0.2" }, "devDependencies": { "@tailwindcss/postcss": "^4.1.18", diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index b9dc9cd..c3dccef 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -4918,7 +4918,7 @@ checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "typogenie" -version = "1.0.0" +version = "1.2.1" dependencies = [ "log", "opener", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 7c21bb0..7d92921 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "typogenie" -version = "1.2.0" +version = "1.2.1" description = "TypoGenie - Portable Markdown to Word document converter" authors = ["TypoGenie Contributors"] license = "CC0-1.0" diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index b109c02..c6b6306 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -1,7 +1,7 @@ { "$schema": "../node_modules/@tauri-apps/cli/config.schema.json", "productName": "TypoGenie", - "version": "1.2.0", + "version": "1.2.1", "identifier": "live.lashman.typogenie", "build": { "frontendDist": "../dist", diff --git a/src/App.tsx b/src/App.tsx index fd6ef18..3426e63 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -156,7 +156,7 @@ const App: React.FC = () => { let processedContent = text; if (detection.type === 'html') { try { - processedContent = htmlToMarkdown(text); + processedContent = htmlToMarkdown(text, true); } catch (err) { console.error('HTML conversion failed:', err); } diff --git a/src/components/Preview.tsx b/src/components/Preview.tsx index 414f339..c21f410 100644 --- a/src/components/Preview.tsx +++ b/src/components/Preview.tsx @@ -273,7 +273,6 @@ export const Preview: React.FC = ({ // Track blob URL for cleanup const blobUrlRef = useRef(null); - // Render preview whenever dependencies change useEffect(() => { if (!iframeRef.current || !style) return; @@ -322,7 +321,7 @@ export const Preview: React.FC = ({ `.page {`, ` width: ${paperSize === 'A4' ? '210mm' : '8.5in'};`, ` min-height: ${paperSize === 'A4' ? '297mm' : '11in'};`, - ` padding: 25mm;`, + ` padding: ${style.page?.margins ? `${style.page.margins.top}pt ${style.page.margins.right}pt ${style.page.margins.bottom}pt ${style.page.margins.left}pt` : '25mm'};`, ` box-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.4);`, ` box-sizing: border-box;`, ` margin: 0 auto;`, diff --git a/src/utils/docxConverter.ts b/src/utils/docxConverter.ts index c5efda8..039413f 100644 --- a/src/utils/docxConverter.ts +++ b/src/utils/docxConverter.ts @@ -193,9 +193,9 @@ export const generateDocxDocument = async ( const src = img.getAttribute('src'); if (!src || src.startsWith('data:')) continue; - // Get dimensions from HTML attributes first - const htmlW = parseInt(img.getAttribute('data-original-width') || img.getAttribute('width') || '0'); - const htmlH = parseInt(img.getAttribute('data-original-height') || img.getAttribute('height') || '0'); + // Get display dimensions from HTML attributes (prefer width/height over data-original-*) + const htmlW = parseInt(img.getAttribute('width') || '0'); + const htmlH = parseInt(img.getAttribute('height') || '0'); try { // Step 1: Fetch the image bytes @@ -447,6 +447,10 @@ export const generateDocxDocument = async ( } else if (node.nodeType === Node.ELEMENT_NODE) { const childEl = node as HTMLElement; const childTag = childEl.tagName.toLowerCase(); + if (childTag === 'br') { + linkRuns.push(new TextRun({ break: 1 }) as any); + return; + } if (childTag === 'img') { const imgSrc = childEl.getAttribute('src'); if (imgSrc) { @@ -566,9 +570,15 @@ export const generateDocxDocument = async ( if (tag === 's' || tag === 'strike') fmt.strike = true; if (tag === 'sub') fmt.subScript = true; if (tag === 'sup') fmt.superScript = true; + if (tag === 'br') { + runs.push(new TextRun({ break: 1 }) as any); + return; + } if (tag === 'code') { fmt.font = codeFontResolved; fmt.color = codeTextColor; + if (elements?.code?.size) fmt.size = pt(elements.code.size); + if (codeBgColor) fmt.shading = { fill: codeBgColor, type: ShadingType.CLEAR }; } // Handle links @@ -593,6 +603,10 @@ export const generateDocxDocument = async ( } else if (node.nodeType === Node.ELEMENT_NODE) { const childEl = node as HTMLElement; const childTag = childEl.tagName.toLowerCase(); + if (childTag === 'br') { + linkRuns.push(new TextRun({ break: 1 }) as any); + return; + } if (childTag === 'img') { const imgSrc = childEl.getAttribute('src'); if (imgSrc) { @@ -669,7 +683,7 @@ export const generateDocxDocument = async ( spacing: { before: 0, after: 0, - line: Math.round((cfg?.spacing?.line || 1.2) * 240), + line: Math.round(Math.max(cfg?.spacing?.line || 1.2, 1.5) * 240), } })); @@ -814,20 +828,27 @@ export const generateDocxDocument = async ( console.log('TABLE DOCX: Processing table'); - // Get table-level border config - const tableBorderConfig = elements?.table?.border; - const tableBorderColor = resolveColorToHex(tableBorderConfig?.color) || (isDark ? '444444' : 'CCCCCC'); - const tableBorderWidth = tableBorderConfig?.width || 1; - const tableBorderStyle = mapBorderStyle(tableBorderConfig?.style || 'single'); + // Get table-level border config - check both generic and per-side borders + const tblCfg = elements?.table; + const defaultBorderColor = isDark ? '444444' : 'CCCCCC'; + const makeBorder = (cfg: any, fallbackColor: string) => ({ + color: resolveColorToHex(cfg?.color) || fallbackColor, + size: (cfg?.width || 1) * 8, + style: mapBorderStyle(cfg?.style || 'single') + }); - // Create table-level borders (outer border only by default) + const noBorder = { style: BorderStyle.NONE, size: 0, color: 'auto' }; + const htmlBorderAttr = tableEl.getAttribute('border'); + const hasHtmlBorder = htmlBorderAttr && parseInt(htmlBorderAttr) > 0; + const genericBorder = tblCfg?.border ? makeBorder(tblCfg.border, defaultBorderColor) : null; const tableBorders = { - top: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle }, - bottom: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle }, - left: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle }, - right: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle }, - insideHorizontal: { style: BorderStyle.NIL, size: 0 }, - insideVertical: { style: BorderStyle.NIL, size: 0 } + top: tblCfg?.borderTop ? makeBorder(tblCfg.borderTop, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)), + bottom: tblCfg?.borderBottom ? makeBorder(tblCfg.borderBottom, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)), + left: tblCfg?.borderLeft ? makeBorder(tblCfg.borderLeft, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)), + right: tblCfg?.borderRight ? makeBorder(tblCfg.borderRight, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)), + insideHorizontal: elements?.th?.borderBottom ? makeBorder(elements.th.borderBottom, defaultBorderColor) : + (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder), + insideVertical: hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder }; for (const rowEl of Array.from(tableEl.querySelectorAll('tr'))) { @@ -855,8 +876,11 @@ export const generateDocxDocument = async ( bold: isHeader || undefined }); - // Get background from config - const cellBg = resolveColorToHex(cellConfig?.background); + // Get background: HTML bgcolor attribute takes priority, then template config + const htmlBgColor = cell.getAttribute('bgcolor'); + const cellBg = htmlBgColor + ? formatColor(htmlBgColor.replace('#', '')) + : resolveColorToHex(cellConfig?.background); console.log(`TABLE CELL DOCX [${isHeader ? 'TH' : 'TD'}]:`, { text: cell.textContent?.substring(0, 30) + (cell.textContent && cell.textContent.length > 30 ? '...' : ''), @@ -867,7 +891,8 @@ export const generateDocxDocument = async ( bold: isHeader || undefined }); - // Resolve cell-specific borders from template + // Resolve cell-specific borders from template config only + // (HTML border is handled at table level via insideH/insideV to avoid overriding thick outer borders) const cellBorders: any = {}; if (cellConfig?.border) { const b = { color: resolveColorToHex(cellConfig.border.color) || '000000', style: mapBorderStyle(cellConfig.border.style), size: (cellConfig.border.width || 1) * 8 }; @@ -884,10 +909,10 @@ export const generateDocxDocument = async ( cells.push(new TableCell({ children: [new Paragraph({ children: cellRuns.length > 0 ? cellRuns : [new TextRun({ text: cell.textContent || '' })], - alignment: isHeader ? AlignmentType.CENTER : mapAlignment(cellConfig?.align), + alignment: mapAlignment(cellConfig?.align || cell.getAttribute('align') || (cell.getAttribute('style')?.match(/text-align:\s*(\w+)/)?.[1]) || undefined), spacing: { after: 0, - line: Math.round((body.spacing?.line || 1.2) * 240) + line: Math.round(Math.max(elements?.table?.spacing?.line || body.spacing?.line || 1.2, 1.5) * 240) } })], shading: cellBg ? { fill: cellBg, type: ShadingType.CLEAR } : undefined, @@ -1029,8 +1054,8 @@ export const generateDocxDocument = async ( } } - const liSpacingBefore = (elements?.li?.spacing?.before || 4) * 20; - const liSpacingAfter = (elements?.li?.spacing?.after || 4) * 20; + const liSpacingBefore = (elements?.li?.spacing?.before ?? 4) * 20; + const liSpacingAfter = (elements?.li?.spacing?.after ?? 4) * 20; const liLineHeight = (elements?.li?.spacing?.line || body.spacing?.line || 1.2) * 240; // Log the actual text runs and their styling @@ -1233,9 +1258,12 @@ export const generateDocxDocument = async ( return results; } - // Tables + // Tables - with spacing paragraphs before/after if (tag === 'table') { + const tblSpacing = elements?.table?.spacing; + results.push(new Paragraph({ spacing: { before: (tblSpacing?.before || 18) * 20, after: 0 }, children: [] })); results.push(processTable(el)); + results.push(new Paragraph({ spacing: { before: 0, after: (tblSpacing?.after || 18) * 20 }, children: [] })); return results; } @@ -1266,62 +1294,110 @@ export const generateDocxDocument = async ( // Center paragraphs that only contain an image const isImageOnly = el.querySelector('img') !== null && !el.textContent?.trim(); + const pSpacing = elements?.p?.spacing || body.spacing; + // When template spacing is 0, CSS generator skips the margin, so browser default 1em applies + const pAfter = (pSpacing?.after || body.size) * 20; results.push(new Paragraph({ children: runs.length > 0 ? runs : [new TextRun({ text: el.textContent || '' })], - alignment: isImageOnly ? AlignmentType.CENTER : mapAlignment(body.align), + alignment: isImageOnly ? AlignmentType.CENTER : mapAlignment(elements?.p?.align || body.align), indent: isImageOnly ? undefined : (elements?.p?.indent ? { firstLine: elements.p.indent * 20 } : undefined), spacing: { - before: (body.spacing?.before || 0) * 20, - after: (body.spacing?.after || 0) * 20, - line: Math.round((body.spacing?.line || 1.2) * 240) + before: (pSpacing?.before || 0) * 20, + after: pAfter, + line: Math.round((pSpacing?.line || 1.2) * 240) }, shading: bgMatch ? { fill: formatColor(resolveColorToHex(bgMatch[1])), type: ShadingType.CLEAR } : undefined })); return results; } - // Blockquotes + // Blockquotes - process each inner

as a separate paragraph with blockquote styling + // Word groups adjacent paragraphs with identical borders, showing top/bottom only on outer edges if (tag === 'blockquote') { const bqConfig = elements?.blockquote; - const runs = processTextRuns(el, { - font: bqConfig?.font ? resolveFont(bqConfig.font, fonts || {}) : body.font, - size: pt(bqConfig?.size || body.size), - color: formatColor(resolveColorToHex(bqConfig?.color || body.color)), - italics: true + const bqFont = bqConfig?.font ? resolveFont(bqConfig.font, fonts || {}) : body.font; + const bqSize = pt(bqConfig?.size || body.size); + const bqColor = formatColor(resolveColorToHex(bqConfig?.color || body.color)); + const bqFmt = { font: bqFont, size: bqSize, color: bqColor, italics: bqConfig?.italic !== false }; + + console.log('DOCX BLOCKQUOTE:', { + font: bqFont, size: bqSize, color: bqColor, childCount: el.children.length }); - const borderColor = resolveColorToHex(bqConfig?.borderLeft?.color) || accentColor; - const borderWidth = bqConfig?.borderLeft?.width || 3; + const bqBorder: any = {}; + if (bqConfig?.border) { + const b = { color: resolveColorToHex(bqConfig.border.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.border.style), size: (bqConfig.border.width || 1) * 8 }; + bqBorder.top = b; bqBorder.bottom = b; bqBorder.left = b; bqBorder.right = b; + } + if (bqConfig?.borderTop) bqBorder.top = { color: resolveColorToHex(bqConfig.borderTop.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.borderTop.style), size: (bqConfig.borderTop.width || 1) * 8 }; + if (bqConfig?.borderBottom) bqBorder.bottom = { color: resolveColorToHex(bqConfig.borderBottom.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.borderBottom.style), size: (bqConfig.borderBottom.width || 1) * 8 }; + if (bqConfig?.borderLeft) bqBorder.left = { color: resolveColorToHex(bqConfig.borderLeft.color) || accentColor, space: 10, style: mapBorderStyle(bqConfig.borderLeft.style), size: (bqConfig.borderLeft.width || 1) * 8 }; + if (bqConfig?.borderRight) bqBorder.right = { color: resolveColorToHex(bqConfig.borderRight.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.borderRight.style), size: (bqConfig.borderRight.width || 1) * 8 }; - const debugKey = 'blockquote-debug'; - if (!visitedTags.has(debugKey)) { - visitedTags.add(debugKey); - console.log('DOCX BLOCKQUOTE CONFIG:', { - font: bqConfig?.font, - size: bqConfig?.size, - color: formatColor(resolveColorToHex(bqConfig?.color)), - border: { color: borderColor, width: borderWidth }, - background: bqConfig?.background + const bqLine = Math.max(bqConfig?.spacing?.line || body.spacing?.line || 1.2, 1.5); + const bqBorderObj = Object.keys(bqBorder).length > 0 ? bqBorder : undefined; + const bqShading = bqConfig?.background ? { fill: resolveColorToHex(bqConfig.background), type: ShadingType.CLEAR } : undefined; + const bqSpacing = { + before: 0, + after: body.size * 20, // 1em gap between inner paragraphs (matches browser default) + line: Math.round(bqLine * 240) + }; + + // Process children - each

becomes its own paragraph with blockquote styling + const childEls = Array.from(el.children); + const makeBqParagraph = (runs: any[], isFirst: boolean, isLast: boolean, align?: any) => new Paragraph({ + children: runs, + alignment: align || mapAlignment(bqConfig?.align), + indent: bqConfig?.indent ? { left: bqConfig.indent * 20 } : undefined, + border: bqBorderObj, + shading: bqShading, + spacing: { + ...bqSpacing, + before: isFirst ? (bqConfig?.spacing?.before || 12) * 20 : bqSpacing.before, + after: isLast ? (bqConfig?.spacing?.after || 12) * 20 : bqSpacing.after, + } + }); + + if (childEls.length === 0) { + // No child elements - process as single paragraph with full blockquote styling + const runs = processTextRuns(el, bqFmt); + results.push(makeBqParagraph(runs, true, true, mapAlignment(bqConfig?.align))); + } else { + childEls.forEach((child, i) => { + const childEl = child as HTMLElement; + const childTagName = childEl.tagName.toLowerCase(); + // Nested blockquotes - recurse + if (childTagName === 'blockquote') { + results.push(...processNode(childEl)); + return; + } + const isP = childTagName === 'p'; + // CSS specificity:

rules override inherited blockquote styles + // Only italic inherits since .page p doesn't set font-style + const childFmt = isP ? { + font: body.font, + size: pt(body.size), + color: formatColor(resolveColorToHex(body.color)), + italics: bqConfig?.italic !== false + } : bqFmt; + const childAlign = isP ? mapAlignment(elements?.p?.align || body.align) : mapAlignment(bqConfig?.align); + const runs = processTextRuns(childEl, childFmt); + if (runs.length > 0) { + results.push(makeBqParagraph(runs, i === 0, i === childEls.length - 1, childAlign)); + } }); } - - results.push(new Paragraph({ - children: runs, - indent: { left: 720 }, - border: { left: { color: borderColor, space: 10, style: BorderStyle.SINGLE, size: borderWidth * 8 } }, - shading: bqConfig?.background ? { fill: resolveColorToHex(bqConfig.background), type: ShadingType.CLEAR } : (isDark ? undefined : { fill: 'F8F8F8', type: ShadingType.CLEAR }), - spacing: { - before: (bqConfig?.spacing?.before || 12) * 20, - after: (bqConfig?.spacing?.after || 12) * 20, - line: Math.round((bqConfig?.spacing?.line || body.spacing?.line || 1.2) * 240) - } - })); return results; } - // Lists + // Lists - with spacing before/after the list container if (tag === 'ul' || tag === 'ol') { + const listCfg = tag === 'ul' ? elements?.ul : elements?.ol; + const listBefore = (listCfg?.spacing?.before ?? 12) * 20; + const listAfter = (listCfg?.spacing?.after ?? 12) * 20; + if (listBefore) results.push(new Paragraph({ spacing: { before: listBefore, after: 0 }, children: [] })); results.push(...processList(el, tag === 'ol', 0)); + if (listAfter) results.push(new Paragraph({ spacing: { before: 0, after: listAfter }, children: [] })); return results; } @@ -1335,7 +1411,8 @@ export const generateDocxDocument = async ( if (imgRun) { results.push(new Paragraph({ children: [imgRun], - spacing: { before: 120, after: 120 }, + alignment: AlignmentType.CENTER, + spacing: { before: (elements?.img?.spacing?.before ?? 18) * 20, after: (elements?.img?.spacing?.after ?? 18) * 20 }, })); } } else { @@ -1369,12 +1446,82 @@ export const generateDocxDocument = async ( }, spacing: { before: (hrConfig?.spacing?.before || 12) * 20, - after: (hrConfig?.spacing?.after || 12) * 20 + after: 0 } })); return results; } + // Divs - split into paragraphs for text/inline content, recurse for nested block elements + if (tag === 'div') { + const style = el.getAttribute('style') || ''; + const alignMatch = style.match(/text-align:\s*(left|center|right|justify)/i); + const divAlign = alignMatch ? alignMatch[1].toLowerCase() : undefined; + const divSpacing = elements?.p?.spacing || body.spacing; + const blockTags = new Set(['div', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'ul', 'ol', 'table', 'pre', 'hr']); + + // Check if div has nested block elements + const hasBlockChildren = Array.from(el.children).some(c => blockTags.has(c.tagName.toLowerCase())); + + if (hasBlockChildren) { + // Split: group consecutive inline/text nodes into paragraphs, recurse block elements + let inlineNodes: Node[] = []; + + const flushInline = () => { + if (inlineNodes.length === 0) return; + // Create a temp container in the parsed document to process inline nodes + const temp = doc.createElement('span'); + inlineNodes.forEach(n => temp.appendChild(n.cloneNode(true))); + const text = temp.textContent?.trim(); + if (text) { + const runs = processTextRuns(temp as any, { + font: body.font, size: pt(body.size), color: formatColor(resolveColorToHex(body.color)) + }); + if (runs.length > 0) { + results.push(new Paragraph({ + children: runs, + alignment: divAlign ? mapAlignment(divAlign === 'justify' ? 'both' : divAlign) : mapAlignment(body.align), + spacing: { before: 0, after: (divSpacing?.after || body.size) * 20, line: Math.round((divSpacing?.line || 1.2) * 240) } + })); + } + } + inlineNodes = []; + }; + + for (const child of Array.from(el.childNodes)) { + if (child.nodeType === Node.ELEMENT_NODE && blockTags.has((child as HTMLElement).tagName.toLowerCase())) { + flushInline(); + results.push(...processNode(child)); + } else { + inlineNodes.push(child); + } + } + flushInline(); + } else { + // No nested blocks - treat entire div as one paragraph + const hasContent = el.textContent?.trim(); + if (hasContent) { + const runs = processTextRuns(el, { + font: body.font, size: pt(body.size), color: formatColor(resolveColorToHex(body.color)) + }); + if (runs.length > 0) { + const isImgOnly = el.querySelector('img') !== null && !hasContent; + results.push(new Paragraph({ + children: runs, + alignment: isImgOnly ? AlignmentType.CENTER : (divAlign ? mapAlignment(divAlign === 'justify' ? 'both' : divAlign) : mapAlignment(body.align)), + spacing: { before: 0, after: (divSpacing?.after || body.size) * 20, line: Math.round((divSpacing?.line || 1.2) * 240) } + })); + return results; + } + } + // No text - process children for images etc. + for (const child of Array.from(el.childNodes)) { + results.push(...processNode(child)); + } + } + return results; + } + // Default: process children for (const child of Array.from(el.childNodes)) { results.push(...processNode(child)); diff --git a/src/utils/htmlToMarkdown.ts b/src/utils/htmlToMarkdown.ts index 7a37c2f..cde9fc6 100644 --- a/src/utils/htmlToMarkdown.ts +++ b/src/utils/htmlToMarkdown.ts @@ -1,6 +1,8 @@ import TurndownService from 'turndown'; +// @ts-ignore +import { gfm } from 'turndown-plugin-gfm'; -export function htmlToMarkdown(html: string): string { +export function htmlToMarkdown(html: string, preserveAlignment = false): string { const turndown = new TurndownService({ headingStyle: 'atx', hr: '---', @@ -10,6 +12,39 @@ export function htmlToMarkdown(html: string): string { strongDelimiter: '**', }); + // Enable GFM tables + turndown.use(gfm); + + // Convert Blogger caption tables (image + caption) to image + italic caption + // These are layout tables, not data tables - without this rule, GFM converts them + // to markdown tables which then get data table borders in the export + turndown.addRule('bloggerCaptionTable', { + filter: (node) => { + return node.nodeName === 'TABLE' && + (node as HTMLElement).classList.contains('tr-caption-container'); + }, + replacement: (_content, node) => { + const el = node as HTMLElement; + const img = el.querySelector('img'); + const caption = el.querySelector('.tr-caption'); + let result = '\n\n'; + if (img) { + const src = img.getAttribute('src') || ''; + const alt = img.getAttribute('alt') || ''; + const link = img.closest('a'); + if (link) { + result += `[![${alt}](${src})](${link.getAttribute('href')})\n`; + } else { + result += `![${alt}](${src})\n`; + } + } + if (caption && caption.textContent?.trim()) { + result += `\n*${caption.textContent.trim()}*\n`; + } + return result + '\n'; + }, + }); + // Strip meaningless wrapper spans (Blogger, Google Docs, Word paste, etc.) turndown.addRule('stripDecorativeSpans', { filter: (node) => { @@ -23,6 +58,21 @@ export function htmlToMarkdown(html: string): string { replacement: (content) => content, }); + // For HTML content: preserve divs with text-align as raw HTML pass-through + if (preserveAlignment) { + turndown.addRule('preserveAlignment', { + filter: (node) => { + if (node.nodeName !== 'DIV') return false; + const style = node.getAttribute('style') || ''; + return /text-align:\s*(right|center)/i.test(style); + }, + replacement: (_content, node) => { + const el = node as HTMLElement; + return '\n\n' + el.outerHTML + '\n\n'; + }, + }); + } + let markdown = turndown.turndown(html); // Clean up excessive blank lines @@ -31,8 +81,9 @@ export function htmlToMarkdown(html: string): string { // Regular spaces would trigger markdown code block detection at 4+ indent //   entities pass through Marked.js as HTML and render as visible spaces markdown = markdown.replace(/\u00A0/g, ' '); - // Clean up trailing whitespace on lines - markdown = markdown.replace(/[ \t]+$/gm, ''); + // Clean up whitespace-only lines but DON'T strip trailing spaces on content lines + // Turndown uses two trailing spaces for
line breaks - stripping them breaks line breaks + markdown = markdown.replace(/^[ \t]+$/gm, ''); return markdown.trim(); }