improved html conversion fidelity and docx export accuracy
This commit is contained in:
@@ -228,7 +228,7 @@ This means all 165+ styles automatically meet WCAG AAA contrast requirements reg
|
||||
**TypoGenie is fully portable** - no installation, no registry entries, no files scattered across your system.
|
||||
|
||||
Just download and run:
|
||||
- 🪟 **Windows**: [`TypoGenie-v1.2.0-Portable.zip`](https://git.lashman.live/lashman/typogenie/releases) - Extract and run, nothing else needed
|
||||
- 🪟 **Windows**: [`TypoGenie-v1.2.1-Portable.zip`](https://git.lashman.live/lashman/typogenie/releases) - Extract and run, nothing else needed
|
||||
|
||||
**How it works:**
|
||||
```
|
||||
|
||||
Generated
+10
-3
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "typogenie",
|
||||
"version": "1.0.0",
|
||||
"version": "1.2.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "typogenie",
|
||||
"version": "1.0.0",
|
||||
"version": "1.2.0",
|
||||
"dependencies": {
|
||||
"@tauri-apps/api": "^2.0.0",
|
||||
"@tauri-apps/plugin-dialog": "^2.0.0",
|
||||
@@ -22,7 +22,8 @@
|
||||
"motion": "^12.29.2",
|
||||
"react": "^19.2.4",
|
||||
"react-dom": "^19.2.4",
|
||||
"turndown": "^7.2.2"
|
||||
"turndown": "^7.2.2",
|
||||
"turndown-plugin-gfm": "^1.0.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tailwindcss/postcss": "^4.1.18",
|
||||
@@ -2914,6 +2915,12 @@
|
||||
"@mixmark-io/domino": "^2.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/turndown-plugin-gfm": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz",
|
||||
"integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.8.3",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz",
|
||||
|
||||
+3
-2
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "typogenie",
|
||||
"private": true,
|
||||
"version": "1.2.0",
|
||||
"version": "1.2.1",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
@@ -28,7 +28,8 @@
|
||||
"motion": "^12.29.2",
|
||||
"react": "^19.2.4",
|
||||
"react-dom": "^19.2.4",
|
||||
"turndown": "^7.2.2"
|
||||
"turndown": "^7.2.2",
|
||||
"turndown-plugin-gfm": "^1.0.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tailwindcss/postcss": "^4.1.18",
|
||||
|
||||
Generated
+1
-1
@@ -4918,7 +4918,7 @@ checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
|
||||
|
||||
[[package]]
|
||||
name = "typogenie"
|
||||
version = "1.0.0"
|
||||
version = "1.2.1"
|
||||
dependencies = [
|
||||
"log",
|
||||
"opener",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "typogenie"
|
||||
version = "1.2.0"
|
||||
version = "1.2.1"
|
||||
description = "TypoGenie - Portable Markdown to Word document converter"
|
||||
authors = ["TypoGenie Contributors"]
|
||||
license = "CC0-1.0"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"$schema": "../node_modules/@tauri-apps/cli/config.schema.json",
|
||||
"productName": "TypoGenie",
|
||||
"version": "1.2.0",
|
||||
"version": "1.2.1",
|
||||
"identifier": "live.lashman.typogenie",
|
||||
"build": {
|
||||
"frontendDist": "../dist",
|
||||
|
||||
+1
-1
@@ -156,7 +156,7 @@ const App: React.FC = () => {
|
||||
let processedContent = text;
|
||||
if (detection.type === 'html') {
|
||||
try {
|
||||
processedContent = htmlToMarkdown(text);
|
||||
processedContent = htmlToMarkdown(text, true);
|
||||
} catch (err) {
|
||||
console.error('HTML conversion failed:', err);
|
||||
}
|
||||
|
||||
@@ -273,7 +273,6 @@ export const Preview: React.FC<PreviewProps> = ({
|
||||
|
||||
// Track blob URL for cleanup
|
||||
const blobUrlRef = useRef<string | null>(null);
|
||||
|
||||
// Render preview whenever dependencies change
|
||||
useEffect(() => {
|
||||
if (!iframeRef.current || !style) return;
|
||||
@@ -322,7 +321,7 @@ export const Preview: React.FC<PreviewProps> = ({
|
||||
`.page {`,
|
||||
` width: ${paperSize === 'A4' ? '210mm' : '8.5in'};`,
|
||||
` min-height: ${paperSize === 'A4' ? '297mm' : '11in'};`,
|
||||
` padding: 25mm;`,
|
||||
` padding: ${style.page?.margins ? `${style.page.margins.top}pt ${style.page.margins.right}pt ${style.page.margins.bottom}pt ${style.page.margins.left}pt` : '25mm'};`,
|
||||
` box-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.4);`,
|
||||
` box-sizing: border-box;`,
|
||||
` margin: 0 auto;`,
|
||||
|
||||
+205
-58
@@ -193,9 +193,9 @@ export const generateDocxDocument = async (
|
||||
const src = img.getAttribute('src');
|
||||
if (!src || src.startsWith('data:')) continue;
|
||||
|
||||
// Get dimensions from HTML attributes first
|
||||
const htmlW = parseInt(img.getAttribute('data-original-width') || img.getAttribute('width') || '0');
|
||||
const htmlH = parseInt(img.getAttribute('data-original-height') || img.getAttribute('height') || '0');
|
||||
// Get display dimensions from HTML attributes (prefer width/height over data-original-*)
|
||||
const htmlW = parseInt(img.getAttribute('width') || '0');
|
||||
const htmlH = parseInt(img.getAttribute('height') || '0');
|
||||
|
||||
try {
|
||||
// Step 1: Fetch the image bytes
|
||||
@@ -447,6 +447,10 @@ export const generateDocxDocument = async (
|
||||
} else if (node.nodeType === Node.ELEMENT_NODE) {
|
||||
const childEl = node as HTMLElement;
|
||||
const childTag = childEl.tagName.toLowerCase();
|
||||
if (childTag === 'br') {
|
||||
linkRuns.push(new TextRun({ break: 1 }) as any);
|
||||
return;
|
||||
}
|
||||
if (childTag === 'img') {
|
||||
const imgSrc = childEl.getAttribute('src');
|
||||
if (imgSrc) {
|
||||
@@ -566,9 +570,15 @@ export const generateDocxDocument = async (
|
||||
if (tag === 's' || tag === 'strike') fmt.strike = true;
|
||||
if (tag === 'sub') fmt.subScript = true;
|
||||
if (tag === 'sup') fmt.superScript = true;
|
||||
if (tag === 'br') {
|
||||
runs.push(new TextRun({ break: 1 }) as any);
|
||||
return;
|
||||
}
|
||||
if (tag === 'code') {
|
||||
fmt.font = codeFontResolved;
|
||||
fmt.color = codeTextColor;
|
||||
if (elements?.code?.size) fmt.size = pt(elements.code.size);
|
||||
if (codeBgColor) fmt.shading = { fill: codeBgColor, type: ShadingType.CLEAR };
|
||||
}
|
||||
|
||||
// Handle links
|
||||
@@ -593,6 +603,10 @@ export const generateDocxDocument = async (
|
||||
} else if (node.nodeType === Node.ELEMENT_NODE) {
|
||||
const childEl = node as HTMLElement;
|
||||
const childTag = childEl.tagName.toLowerCase();
|
||||
if (childTag === 'br') {
|
||||
linkRuns.push(new TextRun({ break: 1 }) as any);
|
||||
return;
|
||||
}
|
||||
if (childTag === 'img') {
|
||||
const imgSrc = childEl.getAttribute('src');
|
||||
if (imgSrc) {
|
||||
@@ -669,7 +683,7 @@ export const generateDocxDocument = async (
|
||||
spacing: {
|
||||
before: 0,
|
||||
after: 0,
|
||||
line: Math.round((cfg?.spacing?.line || 1.2) * 240),
|
||||
line: Math.round(Math.max(cfg?.spacing?.line || 1.2, 1.5) * 240),
|
||||
}
|
||||
}));
|
||||
|
||||
@@ -814,20 +828,27 @@ export const generateDocxDocument = async (
|
||||
|
||||
console.log('TABLE DOCX: Processing table');
|
||||
|
||||
// Get table-level border config
|
||||
const tableBorderConfig = elements?.table?.border;
|
||||
const tableBorderColor = resolveColorToHex(tableBorderConfig?.color) || (isDark ? '444444' : 'CCCCCC');
|
||||
const tableBorderWidth = tableBorderConfig?.width || 1;
|
||||
const tableBorderStyle = mapBorderStyle(tableBorderConfig?.style || 'single');
|
||||
// Get table-level border config - check both generic and per-side borders
|
||||
const tblCfg = elements?.table;
|
||||
const defaultBorderColor = isDark ? '444444' : 'CCCCCC';
|
||||
const makeBorder = (cfg: any, fallbackColor: string) => ({
|
||||
color: resolveColorToHex(cfg?.color) || fallbackColor,
|
||||
size: (cfg?.width || 1) * 8,
|
||||
style: mapBorderStyle(cfg?.style || 'single')
|
||||
});
|
||||
|
||||
// Create table-level borders (outer border only by default)
|
||||
const noBorder = { style: BorderStyle.NONE, size: 0, color: 'auto' };
|
||||
const htmlBorderAttr = tableEl.getAttribute('border');
|
||||
const hasHtmlBorder = htmlBorderAttr && parseInt(htmlBorderAttr) > 0;
|
||||
const genericBorder = tblCfg?.border ? makeBorder(tblCfg.border, defaultBorderColor) : null;
|
||||
const tableBorders = {
|
||||
top: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle },
|
||||
bottom: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle },
|
||||
left: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle },
|
||||
right: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle },
|
||||
insideHorizontal: { style: BorderStyle.NIL, size: 0 },
|
||||
insideVertical: { style: BorderStyle.NIL, size: 0 }
|
||||
top: tblCfg?.borderTop ? makeBorder(tblCfg.borderTop, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)),
|
||||
bottom: tblCfg?.borderBottom ? makeBorder(tblCfg.borderBottom, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)),
|
||||
left: tblCfg?.borderLeft ? makeBorder(tblCfg.borderLeft, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)),
|
||||
right: tblCfg?.borderRight ? makeBorder(tblCfg.borderRight, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)),
|
||||
insideHorizontal: elements?.th?.borderBottom ? makeBorder(elements.th.borderBottom, defaultBorderColor) :
|
||||
(hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder),
|
||||
insideVertical: hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder
|
||||
};
|
||||
|
||||
for (const rowEl of Array.from(tableEl.querySelectorAll('tr'))) {
|
||||
@@ -855,8 +876,11 @@ export const generateDocxDocument = async (
|
||||
bold: isHeader || undefined
|
||||
});
|
||||
|
||||
// Get background from config
|
||||
const cellBg = resolveColorToHex(cellConfig?.background);
|
||||
// Get background: HTML bgcolor attribute takes priority, then template config
|
||||
const htmlBgColor = cell.getAttribute('bgcolor');
|
||||
const cellBg = htmlBgColor
|
||||
? formatColor(htmlBgColor.replace('#', ''))
|
||||
: resolveColorToHex(cellConfig?.background);
|
||||
|
||||
console.log(`TABLE CELL DOCX [${isHeader ? 'TH' : 'TD'}]:`, {
|
||||
text: cell.textContent?.substring(0, 30) + (cell.textContent && cell.textContent.length > 30 ? '...' : ''),
|
||||
@@ -867,7 +891,8 @@ export const generateDocxDocument = async (
|
||||
bold: isHeader || undefined
|
||||
});
|
||||
|
||||
// Resolve cell-specific borders from template
|
||||
// Resolve cell-specific borders from template config only
|
||||
// (HTML border is handled at table level via insideH/insideV to avoid overriding thick outer borders)
|
||||
const cellBorders: any = {};
|
||||
if (cellConfig?.border) {
|
||||
const b = { color: resolveColorToHex(cellConfig.border.color) || '000000', style: mapBorderStyle(cellConfig.border.style), size: (cellConfig.border.width || 1) * 8 };
|
||||
@@ -884,10 +909,10 @@ export const generateDocxDocument = async (
|
||||
cells.push(new TableCell({
|
||||
children: [new Paragraph({
|
||||
children: cellRuns.length > 0 ? cellRuns : [new TextRun({ text: cell.textContent || '' })],
|
||||
alignment: isHeader ? AlignmentType.CENTER : mapAlignment(cellConfig?.align),
|
||||
alignment: mapAlignment(cellConfig?.align || cell.getAttribute('align') || (cell.getAttribute('style')?.match(/text-align:\s*(\w+)/)?.[1]) || undefined),
|
||||
spacing: {
|
||||
after: 0,
|
||||
line: Math.round((body.spacing?.line || 1.2) * 240)
|
||||
line: Math.round(Math.max(elements?.table?.spacing?.line || body.spacing?.line || 1.2, 1.5) * 240)
|
||||
}
|
||||
})],
|
||||
shading: cellBg ? { fill: cellBg, type: ShadingType.CLEAR } : undefined,
|
||||
@@ -1029,8 +1054,8 @@ export const generateDocxDocument = async (
|
||||
}
|
||||
}
|
||||
|
||||
const liSpacingBefore = (elements?.li?.spacing?.before || 4) * 20;
|
||||
const liSpacingAfter = (elements?.li?.spacing?.after || 4) * 20;
|
||||
const liSpacingBefore = (elements?.li?.spacing?.before ?? 4) * 20;
|
||||
const liSpacingAfter = (elements?.li?.spacing?.after ?? 4) * 20;
|
||||
const liLineHeight = (elements?.li?.spacing?.line || body.spacing?.line || 1.2) * 240;
|
||||
|
||||
// Log the actual text runs and their styling
|
||||
@@ -1233,9 +1258,12 @@ export const generateDocxDocument = async (
|
||||
return results;
|
||||
}
|
||||
|
||||
// Tables
|
||||
// Tables - with spacing paragraphs before/after
|
||||
if (tag === 'table') {
|
||||
const tblSpacing = elements?.table?.spacing;
|
||||
results.push(new Paragraph({ spacing: { before: (tblSpacing?.before || 18) * 20, after: 0 }, children: [] }));
|
||||
results.push(processTable(el));
|
||||
results.push(new Paragraph({ spacing: { before: 0, after: (tblSpacing?.after || 18) * 20 }, children: [] }));
|
||||
return results;
|
||||
}
|
||||
|
||||
@@ -1266,62 +1294,110 @@ export const generateDocxDocument = async (
|
||||
// Center paragraphs that only contain an image
|
||||
const isImageOnly = el.querySelector('img') !== null && !el.textContent?.trim();
|
||||
|
||||
const pSpacing = elements?.p?.spacing || body.spacing;
|
||||
// When template spacing is 0, CSS generator skips the margin, so browser default 1em applies
|
||||
const pAfter = (pSpacing?.after || body.size) * 20;
|
||||
results.push(new Paragraph({
|
||||
children: runs.length > 0 ? runs : [new TextRun({ text: el.textContent || '' })],
|
||||
alignment: isImageOnly ? AlignmentType.CENTER : mapAlignment(body.align),
|
||||
alignment: isImageOnly ? AlignmentType.CENTER : mapAlignment(elements?.p?.align || body.align),
|
||||
indent: isImageOnly ? undefined : (elements?.p?.indent ? { firstLine: elements.p.indent * 20 } : undefined),
|
||||
spacing: {
|
||||
before: (body.spacing?.before || 0) * 20,
|
||||
after: (body.spacing?.after || 0) * 20,
|
||||
line: Math.round((body.spacing?.line || 1.2) * 240)
|
||||
before: (pSpacing?.before || 0) * 20,
|
||||
after: pAfter,
|
||||
line: Math.round((pSpacing?.line || 1.2) * 240)
|
||||
},
|
||||
shading: bgMatch ? { fill: formatColor(resolveColorToHex(bgMatch[1])), type: ShadingType.CLEAR } : undefined
|
||||
}));
|
||||
return results;
|
||||
}
|
||||
|
||||
// Blockquotes
|
||||
// Blockquotes - process each inner <p> as a separate paragraph with blockquote styling
|
||||
// Word groups adjacent paragraphs with identical borders, showing top/bottom only on outer edges
|
||||
if (tag === 'blockquote') {
|
||||
const bqConfig = elements?.blockquote;
|
||||
const runs = processTextRuns(el, {
|
||||
font: bqConfig?.font ? resolveFont(bqConfig.font, fonts || {}) : body.font,
|
||||
size: pt(bqConfig?.size || body.size),
|
||||
color: formatColor(resolveColorToHex(bqConfig?.color || body.color)),
|
||||
italics: true
|
||||
const bqFont = bqConfig?.font ? resolveFont(bqConfig.font, fonts || {}) : body.font;
|
||||
const bqSize = pt(bqConfig?.size || body.size);
|
||||
const bqColor = formatColor(resolveColorToHex(bqConfig?.color || body.color));
|
||||
const bqFmt = { font: bqFont, size: bqSize, color: bqColor, italics: bqConfig?.italic !== false };
|
||||
|
||||
console.log('DOCX BLOCKQUOTE:', {
|
||||
font: bqFont, size: bqSize, color: bqColor, childCount: el.children.length
|
||||
});
|
||||
|
||||
const borderColor = resolveColorToHex(bqConfig?.borderLeft?.color) || accentColor;
|
||||
const borderWidth = bqConfig?.borderLeft?.width || 3;
|
||||
|
||||
const debugKey = 'blockquote-debug';
|
||||
if (!visitedTags.has(debugKey)) {
|
||||
visitedTags.add(debugKey);
|
||||
console.log('DOCX BLOCKQUOTE CONFIG:', {
|
||||
font: bqConfig?.font,
|
||||
size: bqConfig?.size,
|
||||
color: formatColor(resolveColorToHex(bqConfig?.color)),
|
||||
border: { color: borderColor, width: borderWidth },
|
||||
background: bqConfig?.background
|
||||
});
|
||||
const bqBorder: any = {};
|
||||
if (bqConfig?.border) {
|
||||
const b = { color: resolveColorToHex(bqConfig.border.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.border.style), size: (bqConfig.border.width || 1) * 8 };
|
||||
bqBorder.top = b; bqBorder.bottom = b; bqBorder.left = b; bqBorder.right = b;
|
||||
}
|
||||
if (bqConfig?.borderTop) bqBorder.top = { color: resolveColorToHex(bqConfig.borderTop.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.borderTop.style), size: (bqConfig.borderTop.width || 1) * 8 };
|
||||
if (bqConfig?.borderBottom) bqBorder.bottom = { color: resolveColorToHex(bqConfig.borderBottom.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.borderBottom.style), size: (bqConfig.borderBottom.width || 1) * 8 };
|
||||
if (bqConfig?.borderLeft) bqBorder.left = { color: resolveColorToHex(bqConfig.borderLeft.color) || accentColor, space: 10, style: mapBorderStyle(bqConfig.borderLeft.style), size: (bqConfig.borderLeft.width || 1) * 8 };
|
||||
if (bqConfig?.borderRight) bqBorder.right = { color: resolveColorToHex(bqConfig.borderRight.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.borderRight.style), size: (bqConfig.borderRight.width || 1) * 8 };
|
||||
|
||||
results.push(new Paragraph({
|
||||
const bqLine = Math.max(bqConfig?.spacing?.line || body.spacing?.line || 1.2, 1.5);
|
||||
const bqBorderObj = Object.keys(bqBorder).length > 0 ? bqBorder : undefined;
|
||||
const bqShading = bqConfig?.background ? { fill: resolveColorToHex(bqConfig.background), type: ShadingType.CLEAR } : undefined;
|
||||
const bqSpacing = {
|
||||
before: 0,
|
||||
after: body.size * 20, // 1em gap between inner paragraphs (matches browser default)
|
||||
line: Math.round(bqLine * 240)
|
||||
};
|
||||
|
||||
// Process children - each <p> becomes its own paragraph with blockquote styling
|
||||
const childEls = Array.from(el.children);
|
||||
const makeBqParagraph = (runs: any[], isFirst: boolean, isLast: boolean, align?: any) => new Paragraph({
|
||||
children: runs,
|
||||
indent: { left: 720 },
|
||||
border: { left: { color: borderColor, space: 10, style: BorderStyle.SINGLE, size: borderWidth * 8 } },
|
||||
shading: bqConfig?.background ? { fill: resolveColorToHex(bqConfig.background), type: ShadingType.CLEAR } : (isDark ? undefined : { fill: 'F8F8F8', type: ShadingType.CLEAR }),
|
||||
alignment: align || mapAlignment(bqConfig?.align),
|
||||
indent: bqConfig?.indent ? { left: bqConfig.indent * 20 } : undefined,
|
||||
border: bqBorderObj,
|
||||
shading: bqShading,
|
||||
spacing: {
|
||||
before: (bqConfig?.spacing?.before || 12) * 20,
|
||||
after: (bqConfig?.spacing?.after || 12) * 20,
|
||||
line: Math.round((bqConfig?.spacing?.line || body.spacing?.line || 1.2) * 240)
|
||||
...bqSpacing,
|
||||
before: isFirst ? (bqConfig?.spacing?.before || 12) * 20 : bqSpacing.before,
|
||||
after: isLast ? (bqConfig?.spacing?.after || 12) * 20 : bqSpacing.after,
|
||||
}
|
||||
});
|
||||
|
||||
if (childEls.length === 0) {
|
||||
// No child elements - process as single paragraph with full blockquote styling
|
||||
const runs = processTextRuns(el, bqFmt);
|
||||
results.push(makeBqParagraph(runs, true, true, mapAlignment(bqConfig?.align)));
|
||||
} else {
|
||||
childEls.forEach((child, i) => {
|
||||
const childEl = child as HTMLElement;
|
||||
const childTagName = childEl.tagName.toLowerCase();
|
||||
// Nested blockquotes - recurse
|
||||
if (childTagName === 'blockquote') {
|
||||
results.push(...processNode(childEl));
|
||||
return;
|
||||
}
|
||||
const isP = childTagName === 'p';
|
||||
// CSS specificity: <p> rules override inherited blockquote styles
|
||||
// Only italic inherits since .page p doesn't set font-style
|
||||
const childFmt = isP ? {
|
||||
font: body.font,
|
||||
size: pt(body.size),
|
||||
color: formatColor(resolveColorToHex(body.color)),
|
||||
italics: bqConfig?.italic !== false
|
||||
} : bqFmt;
|
||||
const childAlign = isP ? mapAlignment(elements?.p?.align || body.align) : mapAlignment(bqConfig?.align);
|
||||
const runs = processTextRuns(childEl, childFmt);
|
||||
if (runs.length > 0) {
|
||||
results.push(makeBqParagraph(runs, i === 0, i === childEls.length - 1, childAlign));
|
||||
}
|
||||
});
|
||||
}
|
||||
}));
|
||||
return results;
|
||||
}
|
||||
|
||||
// Lists
|
||||
// Lists - with spacing before/after the list container
|
||||
if (tag === 'ul' || tag === 'ol') {
|
||||
const listCfg = tag === 'ul' ? elements?.ul : elements?.ol;
|
||||
const listBefore = (listCfg?.spacing?.before ?? 12) * 20;
|
||||
const listAfter = (listCfg?.spacing?.after ?? 12) * 20;
|
||||
if (listBefore) results.push(new Paragraph({ spacing: { before: listBefore, after: 0 }, children: [] }));
|
||||
results.push(...processList(el, tag === 'ol', 0));
|
||||
if (listAfter) results.push(new Paragraph({ spacing: { before: 0, after: listAfter }, children: [] }));
|
||||
return results;
|
||||
}
|
||||
|
||||
@@ -1335,7 +1411,8 @@ export const generateDocxDocument = async (
|
||||
if (imgRun) {
|
||||
results.push(new Paragraph({
|
||||
children: [imgRun],
|
||||
spacing: { before: 120, after: 120 },
|
||||
alignment: AlignmentType.CENTER,
|
||||
spacing: { before: (elements?.img?.spacing?.before ?? 18) * 20, after: (elements?.img?.spacing?.after ?? 18) * 20 },
|
||||
}));
|
||||
}
|
||||
} else {
|
||||
@@ -1369,12 +1446,82 @@ export const generateDocxDocument = async (
|
||||
},
|
||||
spacing: {
|
||||
before: (hrConfig?.spacing?.before || 12) * 20,
|
||||
after: (hrConfig?.spacing?.after || 12) * 20
|
||||
after: 0
|
||||
}
|
||||
}));
|
||||
return results;
|
||||
}
|
||||
|
||||
// Divs - split into paragraphs for text/inline content, recurse for nested block elements
|
||||
if (tag === 'div') {
|
||||
const style = el.getAttribute('style') || '';
|
||||
const alignMatch = style.match(/text-align:\s*(left|center|right|justify)/i);
|
||||
const divAlign = alignMatch ? alignMatch[1].toLowerCase() : undefined;
|
||||
const divSpacing = elements?.p?.spacing || body.spacing;
|
||||
const blockTags = new Set(['div', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'ul', 'ol', 'table', 'pre', 'hr']);
|
||||
|
||||
// Check if div has nested block elements
|
||||
const hasBlockChildren = Array.from(el.children).some(c => blockTags.has(c.tagName.toLowerCase()));
|
||||
|
||||
if (hasBlockChildren) {
|
||||
// Split: group consecutive inline/text nodes into paragraphs, recurse block elements
|
||||
let inlineNodes: Node[] = [];
|
||||
|
||||
const flushInline = () => {
|
||||
if (inlineNodes.length === 0) return;
|
||||
// Create a temp container in the parsed document to process inline nodes
|
||||
const temp = doc.createElement('span');
|
||||
inlineNodes.forEach(n => temp.appendChild(n.cloneNode(true)));
|
||||
const text = temp.textContent?.trim();
|
||||
if (text) {
|
||||
const runs = processTextRuns(temp as any, {
|
||||
font: body.font, size: pt(body.size), color: formatColor(resolveColorToHex(body.color))
|
||||
});
|
||||
if (runs.length > 0) {
|
||||
results.push(new Paragraph({
|
||||
children: runs,
|
||||
alignment: divAlign ? mapAlignment(divAlign === 'justify' ? 'both' : divAlign) : mapAlignment(body.align),
|
||||
spacing: { before: 0, after: (divSpacing?.after || body.size) * 20, line: Math.round((divSpacing?.line || 1.2) * 240) }
|
||||
}));
|
||||
}
|
||||
}
|
||||
inlineNodes = [];
|
||||
};
|
||||
|
||||
for (const child of Array.from(el.childNodes)) {
|
||||
if (child.nodeType === Node.ELEMENT_NODE && blockTags.has((child as HTMLElement).tagName.toLowerCase())) {
|
||||
flushInline();
|
||||
results.push(...processNode(child));
|
||||
} else {
|
||||
inlineNodes.push(child);
|
||||
}
|
||||
}
|
||||
flushInline();
|
||||
} else {
|
||||
// No nested blocks - treat entire div as one paragraph
|
||||
const hasContent = el.textContent?.trim();
|
||||
if (hasContent) {
|
||||
const runs = processTextRuns(el, {
|
||||
font: body.font, size: pt(body.size), color: formatColor(resolveColorToHex(body.color))
|
||||
});
|
||||
if (runs.length > 0) {
|
||||
const isImgOnly = el.querySelector('img') !== null && !hasContent;
|
||||
results.push(new Paragraph({
|
||||
children: runs,
|
||||
alignment: isImgOnly ? AlignmentType.CENTER : (divAlign ? mapAlignment(divAlign === 'justify' ? 'both' : divAlign) : mapAlignment(body.align)),
|
||||
spacing: { before: 0, after: (divSpacing?.after || body.size) * 20, line: Math.round((divSpacing?.line || 1.2) * 240) }
|
||||
}));
|
||||
return results;
|
||||
}
|
||||
}
|
||||
// No text - process children for images etc.
|
||||
for (const child of Array.from(el.childNodes)) {
|
||||
results.push(...processNode(child));
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
// Default: process children
|
||||
for (const child of Array.from(el.childNodes)) {
|
||||
results.push(...processNode(child));
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import TurndownService from 'turndown';
|
||||
// @ts-ignore
|
||||
import { gfm } from 'turndown-plugin-gfm';
|
||||
|
||||
export function htmlToMarkdown(html: string): string {
|
||||
export function htmlToMarkdown(html: string, preserveAlignment = false): string {
|
||||
const turndown = new TurndownService({
|
||||
headingStyle: 'atx',
|
||||
hr: '---',
|
||||
@@ -10,6 +12,39 @@ export function htmlToMarkdown(html: string): string {
|
||||
strongDelimiter: '**',
|
||||
});
|
||||
|
||||
// Enable GFM tables
|
||||
turndown.use(gfm);
|
||||
|
||||
// Convert Blogger caption tables (image + caption) to image + italic caption
|
||||
// These are layout tables, not data tables - without this rule, GFM converts them
|
||||
// to markdown tables which then get data table borders in the export
|
||||
turndown.addRule('bloggerCaptionTable', {
|
||||
filter: (node) => {
|
||||
return node.nodeName === 'TABLE' &&
|
||||
(node as HTMLElement).classList.contains('tr-caption-container');
|
||||
},
|
||||
replacement: (_content, node) => {
|
||||
const el = node as HTMLElement;
|
||||
const img = el.querySelector('img');
|
||||
const caption = el.querySelector('.tr-caption');
|
||||
let result = '\n\n';
|
||||
if (img) {
|
||||
const src = img.getAttribute('src') || '';
|
||||
const alt = img.getAttribute('alt') || '';
|
||||
const link = img.closest('a');
|
||||
if (link) {
|
||||
result += `[](${link.getAttribute('href')})\n`;
|
||||
} else {
|
||||
result += `\n`;
|
||||
}
|
||||
}
|
||||
if (caption && caption.textContent?.trim()) {
|
||||
result += `\n*${caption.textContent.trim()}*\n`;
|
||||
}
|
||||
return result + '\n';
|
||||
},
|
||||
});
|
||||
|
||||
// Strip meaningless wrapper spans (Blogger, Google Docs, Word paste, etc.)
|
||||
turndown.addRule('stripDecorativeSpans', {
|
||||
filter: (node) => {
|
||||
@@ -23,6 +58,21 @@ export function htmlToMarkdown(html: string): string {
|
||||
replacement: (content) => content,
|
||||
});
|
||||
|
||||
// For HTML content: preserve divs with text-align as raw HTML pass-through
|
||||
if (preserveAlignment) {
|
||||
turndown.addRule('preserveAlignment', {
|
||||
filter: (node) => {
|
||||
if (node.nodeName !== 'DIV') return false;
|
||||
const style = node.getAttribute('style') || '';
|
||||
return /text-align:\s*(right|center)/i.test(style);
|
||||
},
|
||||
replacement: (_content, node) => {
|
||||
const el = node as HTMLElement;
|
||||
return '\n\n' + el.outerHTML + '\n\n';
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
let markdown = turndown.turndown(html);
|
||||
|
||||
// Clean up excessive blank lines
|
||||
@@ -31,8 +81,9 @@ export function htmlToMarkdown(html: string): string {
|
||||
// Regular spaces would trigger markdown code block detection at 4+ indent
|
||||
// entities pass through Marked.js as HTML and render as visible spaces
|
||||
markdown = markdown.replace(/\u00A0/g, ' ');
|
||||
// Clean up trailing whitespace on lines
|
||||
markdown = markdown.replace(/[ \t]+$/gm, '');
|
||||
// Clean up whitespace-only lines but DON'T strip trailing spaces on content lines
|
||||
// Turndown uses two trailing spaces for <br> line breaks - stripping them breaks line breaks
|
||||
markdown = markdown.replace(/^[ \t]+$/gm, '');
|
||||
|
||||
return markdown.trim();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user