1 Commits
v1.2.0 ... main

Author SHA1 Message Date
a3f4ffdec8 improved html conversion fidelity and docx export accuracy 2026-03-22 08:58:25 +02:00
10 changed files with 280 additions and 75 deletions

View File

@@ -228,7 +228,7 @@ This means all 165+ styles automatically meet WCAG AAA contrast requirements reg
**TypoGenie is fully portable** - no installation, no registry entries, no files scattered across your system.
Just download and run:
- 🪟 **Windows**: [`TypoGenie-v1.2.0-Portable.zip`](https://git.lashman.live/lashman/typogenie/releases) - Extract and run, nothing else needed
- 🪟 **Windows**: [`TypoGenie-v1.2.1-Portable.zip`](https://git.lashman.live/lashman/typogenie/releases) - Extract and run, nothing else needed
**How it works:**
```

13
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "typogenie",
"version": "1.0.0",
"version": "1.2.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "typogenie",
"version": "1.0.0",
"version": "1.2.0",
"dependencies": {
"@tauri-apps/api": "^2.0.0",
"@tauri-apps/plugin-dialog": "^2.0.0",
@@ -22,7 +22,8 @@
"motion": "^12.29.2",
"react": "^19.2.4",
"react-dom": "^19.2.4",
"turndown": "^7.2.2"
"turndown": "^7.2.2",
"turndown-plugin-gfm": "^1.0.2"
},
"devDependencies": {
"@tailwindcss/postcss": "^4.1.18",
@@ -2914,6 +2915,12 @@
"@mixmark-io/domino": "^2.2.0"
}
},
"node_modules/turndown-plugin-gfm": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz",
"integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg==",
"license": "MIT"
},
"node_modules/typescript": {
"version": "5.8.3",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz",

View File

@@ -1,7 +1,7 @@
{
"name": "typogenie",
"private": true,
"version": "1.2.0",
"version": "1.2.1",
"type": "module",
"scripts": {
"dev": "vite",
@@ -28,7 +28,8 @@
"motion": "^12.29.2",
"react": "^19.2.4",
"react-dom": "^19.2.4",
"turndown": "^7.2.2"
"turndown": "^7.2.2",
"turndown-plugin-gfm": "^1.0.2"
},
"devDependencies": {
"@tailwindcss/postcss": "^4.1.18",

2
src-tauri/Cargo.lock generated
View File

@@ -4918,7 +4918,7 @@ checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
[[package]]
name = "typogenie"
version = "1.0.0"
version = "1.2.1"
dependencies = [
"log",
"opener",

View File

@@ -1,6 +1,6 @@
[package]
name = "typogenie"
version = "1.2.0"
version = "1.2.1"
description = "TypoGenie - Portable Markdown to Word document converter"
authors = ["TypoGenie Contributors"]
license = "CC0-1.0"

View File

@@ -1,7 +1,7 @@
{
"$schema": "../node_modules/@tauri-apps/cli/config.schema.json",
"productName": "TypoGenie",
"version": "1.2.0",
"version": "1.2.1",
"identifier": "live.lashman.typogenie",
"build": {
"frontendDist": "../dist",

View File

@@ -156,7 +156,7 @@ const App: React.FC = () => {
let processedContent = text;
if (detection.type === 'html') {
try {
processedContent = htmlToMarkdown(text);
processedContent = htmlToMarkdown(text, true);
} catch (err) {
console.error('HTML conversion failed:', err);
}

View File

@@ -273,7 +273,6 @@ export const Preview: React.FC<PreviewProps> = ({
// Track blob URL for cleanup
const blobUrlRef = useRef<string | null>(null);
// Render preview whenever dependencies change
useEffect(() => {
if (!iframeRef.current || !style) return;
@@ -322,7 +321,7 @@ export const Preview: React.FC<PreviewProps> = ({
`.page {`,
` width: ${paperSize === 'A4' ? '210mm' : '8.5in'};`,
` min-height: ${paperSize === 'A4' ? '297mm' : '11in'};`,
` padding: 25mm;`,
` padding: ${style.page?.margins ? `${style.page.margins.top}pt ${style.page.margins.right}pt ${style.page.margins.bottom}pt ${style.page.margins.left}pt` : '25mm'};`,
` box-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.4);`,
` box-sizing: border-box;`,
` margin: 0 auto;`,

View File

@@ -193,9 +193,9 @@ export const generateDocxDocument = async (
const src = img.getAttribute('src');
if (!src || src.startsWith('data:')) continue;
// Get dimensions from HTML attributes first
const htmlW = parseInt(img.getAttribute('data-original-width') || img.getAttribute('width') || '0');
const htmlH = parseInt(img.getAttribute('data-original-height') || img.getAttribute('height') || '0');
// Get display dimensions from HTML attributes (prefer width/height over data-original-*)
const htmlW = parseInt(img.getAttribute('width') || '0');
const htmlH = parseInt(img.getAttribute('height') || '0');
try {
// Step 1: Fetch the image bytes
@@ -447,6 +447,10 @@ export const generateDocxDocument = async (
} else if (node.nodeType === Node.ELEMENT_NODE) {
const childEl = node as HTMLElement;
const childTag = childEl.tagName.toLowerCase();
if (childTag === 'br') {
linkRuns.push(new TextRun({ break: 1 }) as any);
return;
}
if (childTag === 'img') {
const imgSrc = childEl.getAttribute('src');
if (imgSrc) {
@@ -566,9 +570,15 @@ export const generateDocxDocument = async (
if (tag === 's' || tag === 'strike') fmt.strike = true;
if (tag === 'sub') fmt.subScript = true;
if (tag === 'sup') fmt.superScript = true;
if (tag === 'br') {
runs.push(new TextRun({ break: 1 }) as any);
return;
}
if (tag === 'code') {
fmt.font = codeFontResolved;
fmt.color = codeTextColor;
if (elements?.code?.size) fmt.size = pt(elements.code.size);
if (codeBgColor) fmt.shading = { fill: codeBgColor, type: ShadingType.CLEAR };
}
// Handle links
@@ -593,6 +603,10 @@ export const generateDocxDocument = async (
} else if (node.nodeType === Node.ELEMENT_NODE) {
const childEl = node as HTMLElement;
const childTag = childEl.tagName.toLowerCase();
if (childTag === 'br') {
linkRuns.push(new TextRun({ break: 1 }) as any);
return;
}
if (childTag === 'img') {
const imgSrc = childEl.getAttribute('src');
if (imgSrc) {
@@ -669,7 +683,7 @@ export const generateDocxDocument = async (
spacing: {
before: 0,
after: 0,
line: Math.round((cfg?.spacing?.line || 1.2) * 240),
line: Math.round(Math.max(cfg?.spacing?.line || 1.2, 1.5) * 240),
}
}));
@@ -814,20 +828,27 @@ export const generateDocxDocument = async (
console.log('TABLE DOCX: Processing table');
// Get table-level border config
const tableBorderConfig = elements?.table?.border;
const tableBorderColor = resolveColorToHex(tableBorderConfig?.color) || (isDark ? '444444' : 'CCCCCC');
const tableBorderWidth = tableBorderConfig?.width || 1;
const tableBorderStyle = mapBorderStyle(tableBorderConfig?.style || 'single');
// Get table-level border config - check both generic and per-side borders
const tblCfg = elements?.table;
const defaultBorderColor = isDark ? '444444' : 'CCCCCC';
const makeBorder = (cfg: any, fallbackColor: string) => ({
color: resolveColorToHex(cfg?.color) || fallbackColor,
size: (cfg?.width || 1) * 8,
style: mapBorderStyle(cfg?.style || 'single')
});
// Create table-level borders (outer border only by default)
const noBorder = { style: BorderStyle.NONE, size: 0, color: 'auto' };
const htmlBorderAttr = tableEl.getAttribute('border');
const hasHtmlBorder = htmlBorderAttr && parseInt(htmlBorderAttr) > 0;
const genericBorder = tblCfg?.border ? makeBorder(tblCfg.border, defaultBorderColor) : null;
const tableBorders = {
top: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle },
bottom: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle },
left: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle },
right: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle },
insideHorizontal: { style: BorderStyle.NIL, size: 0 },
insideVertical: { style: BorderStyle.NIL, size: 0 }
top: tblCfg?.borderTop ? makeBorder(tblCfg.borderTop, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)),
bottom: tblCfg?.borderBottom ? makeBorder(tblCfg.borderBottom, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)),
left: tblCfg?.borderLeft ? makeBorder(tblCfg.borderLeft, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)),
right: tblCfg?.borderRight ? makeBorder(tblCfg.borderRight, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)),
insideHorizontal: elements?.th?.borderBottom ? makeBorder(elements.th.borderBottom, defaultBorderColor) :
(hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder),
insideVertical: hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder
};
for (const rowEl of Array.from(tableEl.querySelectorAll('tr'))) {
@@ -855,8 +876,11 @@ export const generateDocxDocument = async (
bold: isHeader || undefined
});
// Get background from config
const cellBg = resolveColorToHex(cellConfig?.background);
// Get background: HTML bgcolor attribute takes priority, then template config
const htmlBgColor = cell.getAttribute('bgcolor');
const cellBg = htmlBgColor
? formatColor(htmlBgColor.replace('#', ''))
: resolveColorToHex(cellConfig?.background);
console.log(`TABLE CELL DOCX [${isHeader ? 'TH' : 'TD'}]:`, {
text: cell.textContent?.substring(0, 30) + (cell.textContent && cell.textContent.length > 30 ? '...' : ''),
@@ -867,7 +891,8 @@ export const generateDocxDocument = async (
bold: isHeader || undefined
});
// Resolve cell-specific borders from template
// Resolve cell-specific borders from template config only
// (HTML border is handled at table level via insideH/insideV to avoid overriding thick outer borders)
const cellBorders: any = {};
if (cellConfig?.border) {
const b = { color: resolveColorToHex(cellConfig.border.color) || '000000', style: mapBorderStyle(cellConfig.border.style), size: (cellConfig.border.width || 1) * 8 };
@@ -884,10 +909,10 @@ export const generateDocxDocument = async (
cells.push(new TableCell({
children: [new Paragraph({
children: cellRuns.length > 0 ? cellRuns : [new TextRun({ text: cell.textContent || '' })],
alignment: isHeader ? AlignmentType.CENTER : mapAlignment(cellConfig?.align),
alignment: mapAlignment(cellConfig?.align || cell.getAttribute('align') || (cell.getAttribute('style')?.match(/text-align:\s*(\w+)/)?.[1]) || undefined),
spacing: {
after: 0,
line: Math.round((body.spacing?.line || 1.2) * 240)
line: Math.round(Math.max(elements?.table?.spacing?.line || body.spacing?.line || 1.2, 1.5) * 240)
}
})],
shading: cellBg ? { fill: cellBg, type: ShadingType.CLEAR } : undefined,
@@ -1029,8 +1054,8 @@ export const generateDocxDocument = async (
}
}
const liSpacingBefore = (elements?.li?.spacing?.before || 4) * 20;
const liSpacingAfter = (elements?.li?.spacing?.after || 4) * 20;
const liSpacingBefore = (elements?.li?.spacing?.before ?? 4) * 20;
const liSpacingAfter = (elements?.li?.spacing?.after ?? 4) * 20;
const liLineHeight = (elements?.li?.spacing?.line || body.spacing?.line || 1.2) * 240;
// Log the actual text runs and their styling
@@ -1233,9 +1258,12 @@ export const generateDocxDocument = async (
return results;
}
// Tables
// Tables - with spacing paragraphs before/after
if (tag === 'table') {
const tblSpacing = elements?.table?.spacing;
results.push(new Paragraph({ spacing: { before: (tblSpacing?.before || 18) * 20, after: 0 }, children: [] }));
results.push(processTable(el));
results.push(new Paragraph({ spacing: { before: 0, after: (tblSpacing?.after || 18) * 20 }, children: [] }));
return results;
}
@@ -1266,62 +1294,110 @@ export const generateDocxDocument = async (
// Center paragraphs that only contain an image
const isImageOnly = el.querySelector('img') !== null && !el.textContent?.trim();
const pSpacing = elements?.p?.spacing || body.spacing;
// When template spacing is 0, CSS generator skips the margin, so browser default 1em applies
const pAfter = (pSpacing?.after || body.size) * 20;
results.push(new Paragraph({
children: runs.length > 0 ? runs : [new TextRun({ text: el.textContent || '' })],
alignment: isImageOnly ? AlignmentType.CENTER : mapAlignment(body.align),
alignment: isImageOnly ? AlignmentType.CENTER : mapAlignment(elements?.p?.align || body.align),
indent: isImageOnly ? undefined : (elements?.p?.indent ? { firstLine: elements.p.indent * 20 } : undefined),
spacing: {
before: (body.spacing?.before || 0) * 20,
after: (body.spacing?.after || 0) * 20,
line: Math.round((body.spacing?.line || 1.2) * 240)
before: (pSpacing?.before || 0) * 20,
after: pAfter,
line: Math.round((pSpacing?.line || 1.2) * 240)
},
shading: bgMatch ? { fill: formatColor(resolveColorToHex(bgMatch[1])), type: ShadingType.CLEAR } : undefined
}));
return results;
}
// Blockquotes
// Blockquotes - process each inner <p> as a separate paragraph with blockquote styling
// Word groups adjacent paragraphs with identical borders, showing top/bottom only on outer edges
if (tag === 'blockquote') {
const bqConfig = elements?.blockquote;
const runs = processTextRuns(el, {
font: bqConfig?.font ? resolveFont(bqConfig.font, fonts || {}) : body.font,
size: pt(bqConfig?.size || body.size),
color: formatColor(resolveColorToHex(bqConfig?.color || body.color)),
italics: true
const bqFont = bqConfig?.font ? resolveFont(bqConfig.font, fonts || {}) : body.font;
const bqSize = pt(bqConfig?.size || body.size);
const bqColor = formatColor(resolveColorToHex(bqConfig?.color || body.color));
const bqFmt = { font: bqFont, size: bqSize, color: bqColor, italics: bqConfig?.italic !== false };
console.log('DOCX BLOCKQUOTE:', {
font: bqFont, size: bqSize, color: bqColor, childCount: el.children.length
});
const borderColor = resolveColorToHex(bqConfig?.borderLeft?.color) || accentColor;
const borderWidth = bqConfig?.borderLeft?.width || 3;
const debugKey = 'blockquote-debug';
if (!visitedTags.has(debugKey)) {
visitedTags.add(debugKey);
console.log('DOCX BLOCKQUOTE CONFIG:', {
font: bqConfig?.font,
size: bqConfig?.size,
color: formatColor(resolveColorToHex(bqConfig?.color)),
border: { color: borderColor, width: borderWidth },
background: bqConfig?.background
});
const bqBorder: any = {};
if (bqConfig?.border) {
const b = { color: resolveColorToHex(bqConfig.border.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.border.style), size: (bqConfig.border.width || 1) * 8 };
bqBorder.top = b; bqBorder.bottom = b; bqBorder.left = b; bqBorder.right = b;
}
if (bqConfig?.borderTop) bqBorder.top = { color: resolveColorToHex(bqConfig.borderTop.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.borderTop.style), size: (bqConfig.borderTop.width || 1) * 8 };
if (bqConfig?.borderBottom) bqBorder.bottom = { color: resolveColorToHex(bqConfig.borderBottom.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.borderBottom.style), size: (bqConfig.borderBottom.width || 1) * 8 };
if (bqConfig?.borderLeft) bqBorder.left = { color: resolveColorToHex(bqConfig.borderLeft.color) || accentColor, space: 10, style: mapBorderStyle(bqConfig.borderLeft.style), size: (bqConfig.borderLeft.width || 1) * 8 };
if (bqConfig?.borderRight) bqBorder.right = { color: resolveColorToHex(bqConfig.borderRight.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.borderRight.style), size: (bqConfig.borderRight.width || 1) * 8 };
results.push(new Paragraph({
const bqLine = Math.max(bqConfig?.spacing?.line || body.spacing?.line || 1.2, 1.5);
const bqBorderObj = Object.keys(bqBorder).length > 0 ? bqBorder : undefined;
const bqShading = bqConfig?.background ? { fill: resolveColorToHex(bqConfig.background), type: ShadingType.CLEAR } : undefined;
const bqSpacing = {
before: 0,
after: body.size * 20, // 1em gap between inner paragraphs (matches browser default)
line: Math.round(bqLine * 240)
};
// Process children - each <p> becomes its own paragraph with blockquote styling
const childEls = Array.from(el.children);
const makeBqParagraph = (runs: any[], isFirst: boolean, isLast: boolean, align?: any) => new Paragraph({
children: runs,
indent: { left: 720 },
border: { left: { color: borderColor, space: 10, style: BorderStyle.SINGLE, size: borderWidth * 8 } },
shading: bqConfig?.background ? { fill: resolveColorToHex(bqConfig.background), type: ShadingType.CLEAR } : (isDark ? undefined : { fill: 'F8F8F8', type: ShadingType.CLEAR }),
alignment: align || mapAlignment(bqConfig?.align),
indent: bqConfig?.indent ? { left: bqConfig.indent * 20 } : undefined,
border: bqBorderObj,
shading: bqShading,
spacing: {
before: (bqConfig?.spacing?.before || 12) * 20,
after: (bqConfig?.spacing?.after || 12) * 20,
line: Math.round((bqConfig?.spacing?.line || body.spacing?.line || 1.2) * 240)
...bqSpacing,
before: isFirst ? (bqConfig?.spacing?.before || 12) * 20 : bqSpacing.before,
after: isLast ? (bqConfig?.spacing?.after || 12) * 20 : bqSpacing.after,
}
});
if (childEls.length === 0) {
// No child elements - process as single paragraph with full blockquote styling
const runs = processTextRuns(el, bqFmt);
results.push(makeBqParagraph(runs, true, true, mapAlignment(bqConfig?.align)));
} else {
childEls.forEach((child, i) => {
const childEl = child as HTMLElement;
const childTagName = childEl.tagName.toLowerCase();
// Nested blockquotes - recurse
if (childTagName === 'blockquote') {
results.push(...processNode(childEl));
return;
}
const isP = childTagName === 'p';
// CSS specificity: <p> rules override inherited blockquote styles
// Only italic inherits since .page p doesn't set font-style
const childFmt = isP ? {
font: body.font,
size: pt(body.size),
color: formatColor(resolveColorToHex(body.color)),
italics: bqConfig?.italic !== false
} : bqFmt;
const childAlign = isP ? mapAlignment(elements?.p?.align || body.align) : mapAlignment(bqConfig?.align);
const runs = processTextRuns(childEl, childFmt);
if (runs.length > 0) {
results.push(makeBqParagraph(runs, i === 0, i === childEls.length - 1, childAlign));
}
});
}
}));
return results;
}
// Lists
// Lists - with spacing before/after the list container
if (tag === 'ul' || tag === 'ol') {
const listCfg = tag === 'ul' ? elements?.ul : elements?.ol;
const listBefore = (listCfg?.spacing?.before ?? 12) * 20;
const listAfter = (listCfg?.spacing?.after ?? 12) * 20;
if (listBefore) results.push(new Paragraph({ spacing: { before: listBefore, after: 0 }, children: [] }));
results.push(...processList(el, tag === 'ol', 0));
if (listAfter) results.push(new Paragraph({ spacing: { before: 0, after: listAfter }, children: [] }));
return results;
}
@@ -1335,7 +1411,8 @@ export const generateDocxDocument = async (
if (imgRun) {
results.push(new Paragraph({
children: [imgRun],
spacing: { before: 120, after: 120 },
alignment: AlignmentType.CENTER,
spacing: { before: (elements?.img?.spacing?.before ?? 18) * 20, after: (elements?.img?.spacing?.after ?? 18) * 20 },
}));
}
} else {
@@ -1369,12 +1446,82 @@ export const generateDocxDocument = async (
},
spacing: {
before: (hrConfig?.spacing?.before || 12) * 20,
after: (hrConfig?.spacing?.after || 12) * 20
after: 0
}
}));
return results;
}
// Divs - split into paragraphs for text/inline content, recurse for nested block elements
if (tag === 'div') {
const style = el.getAttribute('style') || '';
const alignMatch = style.match(/text-align:\s*(left|center|right|justify)/i);
const divAlign = alignMatch ? alignMatch[1].toLowerCase() : undefined;
const divSpacing = elements?.p?.spacing || body.spacing;
const blockTags = new Set(['div', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'ul', 'ol', 'table', 'pre', 'hr']);
// Check if div has nested block elements
const hasBlockChildren = Array.from(el.children).some(c => blockTags.has(c.tagName.toLowerCase()));
if (hasBlockChildren) {
// Split: group consecutive inline/text nodes into paragraphs, recurse block elements
let inlineNodes: Node[] = [];
const flushInline = () => {
if (inlineNodes.length === 0) return;
// Create a temp container in the parsed document to process inline nodes
const temp = doc.createElement('span');
inlineNodes.forEach(n => temp.appendChild(n.cloneNode(true)));
const text = temp.textContent?.trim();
if (text) {
const runs = processTextRuns(temp as any, {
font: body.font, size: pt(body.size), color: formatColor(resolveColorToHex(body.color))
});
if (runs.length > 0) {
results.push(new Paragraph({
children: runs,
alignment: divAlign ? mapAlignment(divAlign === 'justify' ? 'both' : divAlign) : mapAlignment(body.align),
spacing: { before: 0, after: (divSpacing?.after || body.size) * 20, line: Math.round((divSpacing?.line || 1.2) * 240) }
}));
}
}
inlineNodes = [];
};
for (const child of Array.from(el.childNodes)) {
if (child.nodeType === Node.ELEMENT_NODE && blockTags.has((child as HTMLElement).tagName.toLowerCase())) {
flushInline();
results.push(...processNode(child));
} else {
inlineNodes.push(child);
}
}
flushInline();
} else {
// No nested blocks - treat entire div as one paragraph
const hasContent = el.textContent?.trim();
if (hasContent) {
const runs = processTextRuns(el, {
font: body.font, size: pt(body.size), color: formatColor(resolveColorToHex(body.color))
});
if (runs.length > 0) {
const isImgOnly = el.querySelector('img') !== null && !hasContent;
results.push(new Paragraph({
children: runs,
alignment: isImgOnly ? AlignmentType.CENTER : (divAlign ? mapAlignment(divAlign === 'justify' ? 'both' : divAlign) : mapAlignment(body.align)),
spacing: { before: 0, after: (divSpacing?.after || body.size) * 20, line: Math.round((divSpacing?.line || 1.2) * 240) }
}));
return results;
}
}
// No text - process children for images etc.
for (const child of Array.from(el.childNodes)) {
results.push(...processNode(child));
}
}
return results;
}
// Default: process children
for (const child of Array.from(el.childNodes)) {
results.push(...processNode(child));

View File

@@ -1,6 +1,8 @@
import TurndownService from 'turndown';
// @ts-ignore
import { gfm } from 'turndown-plugin-gfm';
export function htmlToMarkdown(html: string): string {
export function htmlToMarkdown(html: string, preserveAlignment = false): string {
const turndown = new TurndownService({
headingStyle: 'atx',
hr: '---',
@@ -10,6 +12,39 @@ export function htmlToMarkdown(html: string): string {
strongDelimiter: '**',
});
// Enable GFM tables
turndown.use(gfm);
// Convert Blogger caption tables (image + caption) to image + italic caption
// These are layout tables, not data tables - without this rule, GFM converts them
// to markdown tables which then get data table borders in the export
turndown.addRule('bloggerCaptionTable', {
filter: (node) => {
return node.nodeName === 'TABLE' &&
(node as HTMLElement).classList.contains('tr-caption-container');
},
replacement: (_content, node) => {
const el = node as HTMLElement;
const img = el.querySelector('img');
const caption = el.querySelector('.tr-caption');
let result = '\n\n';
if (img) {
const src = img.getAttribute('src') || '';
const alt = img.getAttribute('alt') || '';
const link = img.closest('a');
if (link) {
result += `[![${alt}](${src})](${link.getAttribute('href')})\n`;
} else {
result += `![${alt}](${src})\n`;
}
}
if (caption && caption.textContent?.trim()) {
result += `\n*${caption.textContent.trim()}*\n`;
}
return result + '\n';
},
});
// Strip meaningless wrapper spans (Blogger, Google Docs, Word paste, etc.)
turndown.addRule('stripDecorativeSpans', {
filter: (node) => {
@@ -23,6 +58,21 @@ export function htmlToMarkdown(html: string): string {
replacement: (content) => content,
});
// For HTML content: preserve divs with text-align as raw HTML pass-through
if (preserveAlignment) {
turndown.addRule('preserveAlignment', {
filter: (node) => {
if (node.nodeName !== 'DIV') return false;
const style = node.getAttribute('style') || '';
return /text-align:\s*(right|center)/i.test(style);
},
replacement: (_content, node) => {
const el = node as HTMLElement;
return '\n\n' + el.outerHTML + '\n\n';
},
});
}
let markdown = turndown.turndown(html);
// Clean up excessive blank lines
@@ -31,8 +81,9 @@ export function htmlToMarkdown(html: string): string {
// Regular spaces would trigger markdown code block detection at 4+ indent
// &nbsp; entities pass through Marked.js as HTML and render as visible spaces
markdown = markdown.replace(/\u00A0/g, '&nbsp;');
// Clean up trailing whitespace on lines
markdown = markdown.replace(/[ \t]+$/gm, '');
// Clean up whitespace-only lines but DON'T strip trailing spaces on content lines
// Turndown uses two trailing spaces for <br> line breaks - stripping them breaks line breaks
markdown = markdown.replace(/^[ \t]+$/gm, '');
return markdown.trim();
}