improved html conversion fidelity and docx export accuracy
This commit is contained in:
@@ -228,7 +228,7 @@ This means all 165+ styles automatically meet WCAG AAA contrast requirements reg
|
|||||||
**TypoGenie is fully portable** - no installation, no registry entries, no files scattered across your system.
|
**TypoGenie is fully portable** - no installation, no registry entries, no files scattered across your system.
|
||||||
|
|
||||||
Just download and run:
|
Just download and run:
|
||||||
- 🪟 **Windows**: [`TypoGenie-v1.2.0-Portable.zip`](https://git.lashman.live/lashman/typogenie/releases) - Extract and run, nothing else needed
|
- 🪟 **Windows**: [`TypoGenie-v1.2.1-Portable.zip`](https://git.lashman.live/lashman/typogenie/releases) - Extract and run, nothing else needed
|
||||||
|
|
||||||
**How it works:**
|
**How it works:**
|
||||||
```
|
```
|
||||||
|
|||||||
13
package-lock.json
generated
13
package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "typogenie",
|
"name": "typogenie",
|
||||||
"version": "1.0.0",
|
"version": "1.2.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "typogenie",
|
"name": "typogenie",
|
||||||
"version": "1.0.0",
|
"version": "1.2.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@tauri-apps/api": "^2.0.0",
|
"@tauri-apps/api": "^2.0.0",
|
||||||
"@tauri-apps/plugin-dialog": "^2.0.0",
|
"@tauri-apps/plugin-dialog": "^2.0.0",
|
||||||
@@ -22,7 +22,8 @@
|
|||||||
"motion": "^12.29.2",
|
"motion": "^12.29.2",
|
||||||
"react": "^19.2.4",
|
"react": "^19.2.4",
|
||||||
"react-dom": "^19.2.4",
|
"react-dom": "^19.2.4",
|
||||||
"turndown": "^7.2.2"
|
"turndown": "^7.2.2",
|
||||||
|
"turndown-plugin-gfm": "^1.0.2"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@tailwindcss/postcss": "^4.1.18",
|
"@tailwindcss/postcss": "^4.1.18",
|
||||||
@@ -2914,6 +2915,12 @@
|
|||||||
"@mixmark-io/domino": "^2.2.0"
|
"@mixmark-io/domino": "^2.2.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/turndown-plugin-gfm": {
|
||||||
|
"version": "1.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz",
|
||||||
|
"integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/typescript": {
|
"node_modules/typescript": {
|
||||||
"version": "5.8.3",
|
"version": "5.8.3",
|
||||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz",
|
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz",
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "typogenie",
|
"name": "typogenie",
|
||||||
"private": true,
|
"private": true,
|
||||||
"version": "1.2.0",
|
"version": "1.2.1",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"dev": "vite",
|
"dev": "vite",
|
||||||
@@ -28,7 +28,8 @@
|
|||||||
"motion": "^12.29.2",
|
"motion": "^12.29.2",
|
||||||
"react": "^19.2.4",
|
"react": "^19.2.4",
|
||||||
"react-dom": "^19.2.4",
|
"react-dom": "^19.2.4",
|
||||||
"turndown": "^7.2.2"
|
"turndown": "^7.2.2",
|
||||||
|
"turndown-plugin-gfm": "^1.0.2"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@tailwindcss/postcss": "^4.1.18",
|
"@tailwindcss/postcss": "^4.1.18",
|
||||||
|
|||||||
2
src-tauri/Cargo.lock
generated
2
src-tauri/Cargo.lock
generated
@@ -4918,7 +4918,7 @@ checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "typogenie"
|
name = "typogenie"
|
||||||
version = "1.0.0"
|
version = "1.2.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log",
|
"log",
|
||||||
"opener",
|
"opener",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "typogenie"
|
name = "typogenie"
|
||||||
version = "1.2.0"
|
version = "1.2.1"
|
||||||
description = "TypoGenie - Portable Markdown to Word document converter"
|
description = "TypoGenie - Portable Markdown to Word document converter"
|
||||||
authors = ["TypoGenie Contributors"]
|
authors = ["TypoGenie Contributors"]
|
||||||
license = "CC0-1.0"
|
license = "CC0-1.0"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"$schema": "../node_modules/@tauri-apps/cli/config.schema.json",
|
"$schema": "../node_modules/@tauri-apps/cli/config.schema.json",
|
||||||
"productName": "TypoGenie",
|
"productName": "TypoGenie",
|
||||||
"version": "1.2.0",
|
"version": "1.2.1",
|
||||||
"identifier": "live.lashman.typogenie",
|
"identifier": "live.lashman.typogenie",
|
||||||
"build": {
|
"build": {
|
||||||
"frontendDist": "../dist",
|
"frontendDist": "../dist",
|
||||||
|
|||||||
@@ -156,7 +156,7 @@ const App: React.FC = () => {
|
|||||||
let processedContent = text;
|
let processedContent = text;
|
||||||
if (detection.type === 'html') {
|
if (detection.type === 'html') {
|
||||||
try {
|
try {
|
||||||
processedContent = htmlToMarkdown(text);
|
processedContent = htmlToMarkdown(text, true);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('HTML conversion failed:', err);
|
console.error('HTML conversion failed:', err);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -273,7 +273,6 @@ export const Preview: React.FC<PreviewProps> = ({
|
|||||||
|
|
||||||
// Track blob URL for cleanup
|
// Track blob URL for cleanup
|
||||||
const blobUrlRef = useRef<string | null>(null);
|
const blobUrlRef = useRef<string | null>(null);
|
||||||
|
|
||||||
// Render preview whenever dependencies change
|
// Render preview whenever dependencies change
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!iframeRef.current || !style) return;
|
if (!iframeRef.current || !style) return;
|
||||||
@@ -322,7 +321,7 @@ export const Preview: React.FC<PreviewProps> = ({
|
|||||||
`.page {`,
|
`.page {`,
|
||||||
` width: ${paperSize === 'A4' ? '210mm' : '8.5in'};`,
|
` width: ${paperSize === 'A4' ? '210mm' : '8.5in'};`,
|
||||||
` min-height: ${paperSize === 'A4' ? '297mm' : '11in'};`,
|
` min-height: ${paperSize === 'A4' ? '297mm' : '11in'};`,
|
||||||
` padding: 25mm;`,
|
` padding: ${style.page?.margins ? `${style.page.margins.top}pt ${style.page.margins.right}pt ${style.page.margins.bottom}pt ${style.page.margins.left}pt` : '25mm'};`,
|
||||||
` box-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.4);`,
|
` box-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.4);`,
|
||||||
` box-sizing: border-box;`,
|
` box-sizing: border-box;`,
|
||||||
` margin: 0 auto;`,
|
` margin: 0 auto;`,
|
||||||
|
|||||||
@@ -193,9 +193,9 @@ export const generateDocxDocument = async (
|
|||||||
const src = img.getAttribute('src');
|
const src = img.getAttribute('src');
|
||||||
if (!src || src.startsWith('data:')) continue;
|
if (!src || src.startsWith('data:')) continue;
|
||||||
|
|
||||||
// Get dimensions from HTML attributes first
|
// Get display dimensions from HTML attributes (prefer width/height over data-original-*)
|
||||||
const htmlW = parseInt(img.getAttribute('data-original-width') || img.getAttribute('width') || '0');
|
const htmlW = parseInt(img.getAttribute('width') || '0');
|
||||||
const htmlH = parseInt(img.getAttribute('data-original-height') || img.getAttribute('height') || '0');
|
const htmlH = parseInt(img.getAttribute('height') || '0');
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Step 1: Fetch the image bytes
|
// Step 1: Fetch the image bytes
|
||||||
@@ -447,6 +447,10 @@ export const generateDocxDocument = async (
|
|||||||
} else if (node.nodeType === Node.ELEMENT_NODE) {
|
} else if (node.nodeType === Node.ELEMENT_NODE) {
|
||||||
const childEl = node as HTMLElement;
|
const childEl = node as HTMLElement;
|
||||||
const childTag = childEl.tagName.toLowerCase();
|
const childTag = childEl.tagName.toLowerCase();
|
||||||
|
if (childTag === 'br') {
|
||||||
|
linkRuns.push(new TextRun({ break: 1 }) as any);
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (childTag === 'img') {
|
if (childTag === 'img') {
|
||||||
const imgSrc = childEl.getAttribute('src');
|
const imgSrc = childEl.getAttribute('src');
|
||||||
if (imgSrc) {
|
if (imgSrc) {
|
||||||
@@ -566,9 +570,15 @@ export const generateDocxDocument = async (
|
|||||||
if (tag === 's' || tag === 'strike') fmt.strike = true;
|
if (tag === 's' || tag === 'strike') fmt.strike = true;
|
||||||
if (tag === 'sub') fmt.subScript = true;
|
if (tag === 'sub') fmt.subScript = true;
|
||||||
if (tag === 'sup') fmt.superScript = true;
|
if (tag === 'sup') fmt.superScript = true;
|
||||||
|
if (tag === 'br') {
|
||||||
|
runs.push(new TextRun({ break: 1 }) as any);
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (tag === 'code') {
|
if (tag === 'code') {
|
||||||
fmt.font = codeFontResolved;
|
fmt.font = codeFontResolved;
|
||||||
fmt.color = codeTextColor;
|
fmt.color = codeTextColor;
|
||||||
|
if (elements?.code?.size) fmt.size = pt(elements.code.size);
|
||||||
|
if (codeBgColor) fmt.shading = { fill: codeBgColor, type: ShadingType.CLEAR };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle links
|
// Handle links
|
||||||
@@ -593,6 +603,10 @@ export const generateDocxDocument = async (
|
|||||||
} else if (node.nodeType === Node.ELEMENT_NODE) {
|
} else if (node.nodeType === Node.ELEMENT_NODE) {
|
||||||
const childEl = node as HTMLElement;
|
const childEl = node as HTMLElement;
|
||||||
const childTag = childEl.tagName.toLowerCase();
|
const childTag = childEl.tagName.toLowerCase();
|
||||||
|
if (childTag === 'br') {
|
||||||
|
linkRuns.push(new TextRun({ break: 1 }) as any);
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (childTag === 'img') {
|
if (childTag === 'img') {
|
||||||
const imgSrc = childEl.getAttribute('src');
|
const imgSrc = childEl.getAttribute('src');
|
||||||
if (imgSrc) {
|
if (imgSrc) {
|
||||||
@@ -669,7 +683,7 @@ export const generateDocxDocument = async (
|
|||||||
spacing: {
|
spacing: {
|
||||||
before: 0,
|
before: 0,
|
||||||
after: 0,
|
after: 0,
|
||||||
line: Math.round((cfg?.spacing?.line || 1.2) * 240),
|
line: Math.round(Math.max(cfg?.spacing?.line || 1.2, 1.5) * 240),
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
@@ -814,20 +828,27 @@ export const generateDocxDocument = async (
|
|||||||
|
|
||||||
console.log('TABLE DOCX: Processing table');
|
console.log('TABLE DOCX: Processing table');
|
||||||
|
|
||||||
// Get table-level border config
|
// Get table-level border config - check both generic and per-side borders
|
||||||
const tableBorderConfig = elements?.table?.border;
|
const tblCfg = elements?.table;
|
||||||
const tableBorderColor = resolveColorToHex(tableBorderConfig?.color) || (isDark ? '444444' : 'CCCCCC');
|
const defaultBorderColor = isDark ? '444444' : 'CCCCCC';
|
||||||
const tableBorderWidth = tableBorderConfig?.width || 1;
|
const makeBorder = (cfg: any, fallbackColor: string) => ({
|
||||||
const tableBorderStyle = mapBorderStyle(tableBorderConfig?.style || 'single');
|
color: resolveColorToHex(cfg?.color) || fallbackColor,
|
||||||
|
size: (cfg?.width || 1) * 8,
|
||||||
|
style: mapBorderStyle(cfg?.style || 'single')
|
||||||
|
});
|
||||||
|
|
||||||
// Create table-level borders (outer border only by default)
|
const noBorder = { style: BorderStyle.NONE, size: 0, color: 'auto' };
|
||||||
|
const htmlBorderAttr = tableEl.getAttribute('border');
|
||||||
|
const hasHtmlBorder = htmlBorderAttr && parseInt(htmlBorderAttr) > 0;
|
||||||
|
const genericBorder = tblCfg?.border ? makeBorder(tblCfg.border, defaultBorderColor) : null;
|
||||||
const tableBorders = {
|
const tableBorders = {
|
||||||
top: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle },
|
top: tblCfg?.borderTop ? makeBorder(tblCfg.borderTop, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)),
|
||||||
bottom: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle },
|
bottom: tblCfg?.borderBottom ? makeBorder(tblCfg.borderBottom, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)),
|
||||||
left: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle },
|
left: tblCfg?.borderLeft ? makeBorder(tblCfg.borderLeft, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)),
|
||||||
right: { color: tableBorderColor, size: tableBorderWidth * 8, style: tableBorderStyle },
|
right: tblCfg?.borderRight ? makeBorder(tblCfg.borderRight, defaultBorderColor) : (genericBorder || (hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder)),
|
||||||
insideHorizontal: { style: BorderStyle.NIL, size: 0 },
|
insideHorizontal: elements?.th?.borderBottom ? makeBorder(elements.th.borderBottom, defaultBorderColor) :
|
||||||
insideVertical: { style: BorderStyle.NIL, size: 0 }
|
(hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder),
|
||||||
|
insideVertical: hasHtmlBorder ? { color: defaultBorderColor, size: 4, style: BorderStyle.SINGLE } : noBorder
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const rowEl of Array.from(tableEl.querySelectorAll('tr'))) {
|
for (const rowEl of Array.from(tableEl.querySelectorAll('tr'))) {
|
||||||
@@ -855,8 +876,11 @@ export const generateDocxDocument = async (
|
|||||||
bold: isHeader || undefined
|
bold: isHeader || undefined
|
||||||
});
|
});
|
||||||
|
|
||||||
// Get background from config
|
// Get background: HTML bgcolor attribute takes priority, then template config
|
||||||
const cellBg = resolveColorToHex(cellConfig?.background);
|
const htmlBgColor = cell.getAttribute('bgcolor');
|
||||||
|
const cellBg = htmlBgColor
|
||||||
|
? formatColor(htmlBgColor.replace('#', ''))
|
||||||
|
: resolveColorToHex(cellConfig?.background);
|
||||||
|
|
||||||
console.log(`TABLE CELL DOCX [${isHeader ? 'TH' : 'TD'}]:`, {
|
console.log(`TABLE CELL DOCX [${isHeader ? 'TH' : 'TD'}]:`, {
|
||||||
text: cell.textContent?.substring(0, 30) + (cell.textContent && cell.textContent.length > 30 ? '...' : ''),
|
text: cell.textContent?.substring(0, 30) + (cell.textContent && cell.textContent.length > 30 ? '...' : ''),
|
||||||
@@ -867,7 +891,8 @@ export const generateDocxDocument = async (
|
|||||||
bold: isHeader || undefined
|
bold: isHeader || undefined
|
||||||
});
|
});
|
||||||
|
|
||||||
// Resolve cell-specific borders from template
|
// Resolve cell-specific borders from template config only
|
||||||
|
// (HTML border is handled at table level via insideH/insideV to avoid overriding thick outer borders)
|
||||||
const cellBorders: any = {};
|
const cellBorders: any = {};
|
||||||
if (cellConfig?.border) {
|
if (cellConfig?.border) {
|
||||||
const b = { color: resolveColorToHex(cellConfig.border.color) || '000000', style: mapBorderStyle(cellConfig.border.style), size: (cellConfig.border.width || 1) * 8 };
|
const b = { color: resolveColorToHex(cellConfig.border.color) || '000000', style: mapBorderStyle(cellConfig.border.style), size: (cellConfig.border.width || 1) * 8 };
|
||||||
@@ -884,10 +909,10 @@ export const generateDocxDocument = async (
|
|||||||
cells.push(new TableCell({
|
cells.push(new TableCell({
|
||||||
children: [new Paragraph({
|
children: [new Paragraph({
|
||||||
children: cellRuns.length > 0 ? cellRuns : [new TextRun({ text: cell.textContent || '' })],
|
children: cellRuns.length > 0 ? cellRuns : [new TextRun({ text: cell.textContent || '' })],
|
||||||
alignment: isHeader ? AlignmentType.CENTER : mapAlignment(cellConfig?.align),
|
alignment: mapAlignment(cellConfig?.align || cell.getAttribute('align') || (cell.getAttribute('style')?.match(/text-align:\s*(\w+)/)?.[1]) || undefined),
|
||||||
spacing: {
|
spacing: {
|
||||||
after: 0,
|
after: 0,
|
||||||
line: Math.round((body.spacing?.line || 1.2) * 240)
|
line: Math.round(Math.max(elements?.table?.spacing?.line || body.spacing?.line || 1.2, 1.5) * 240)
|
||||||
}
|
}
|
||||||
})],
|
})],
|
||||||
shading: cellBg ? { fill: cellBg, type: ShadingType.CLEAR } : undefined,
|
shading: cellBg ? { fill: cellBg, type: ShadingType.CLEAR } : undefined,
|
||||||
@@ -1029,8 +1054,8 @@ export const generateDocxDocument = async (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const liSpacingBefore = (elements?.li?.spacing?.before || 4) * 20;
|
const liSpacingBefore = (elements?.li?.spacing?.before ?? 4) * 20;
|
||||||
const liSpacingAfter = (elements?.li?.spacing?.after || 4) * 20;
|
const liSpacingAfter = (elements?.li?.spacing?.after ?? 4) * 20;
|
||||||
const liLineHeight = (elements?.li?.spacing?.line || body.spacing?.line || 1.2) * 240;
|
const liLineHeight = (elements?.li?.spacing?.line || body.spacing?.line || 1.2) * 240;
|
||||||
|
|
||||||
// Log the actual text runs and their styling
|
// Log the actual text runs and their styling
|
||||||
@@ -1233,9 +1258,12 @@ export const generateDocxDocument = async (
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tables
|
// Tables - with spacing paragraphs before/after
|
||||||
if (tag === 'table') {
|
if (tag === 'table') {
|
||||||
|
const tblSpacing = elements?.table?.spacing;
|
||||||
|
results.push(new Paragraph({ spacing: { before: (tblSpacing?.before || 18) * 20, after: 0 }, children: [] }));
|
||||||
results.push(processTable(el));
|
results.push(processTable(el));
|
||||||
|
results.push(new Paragraph({ spacing: { before: 0, after: (tblSpacing?.after || 18) * 20 }, children: [] }));
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1266,62 +1294,110 @@ export const generateDocxDocument = async (
|
|||||||
// Center paragraphs that only contain an image
|
// Center paragraphs that only contain an image
|
||||||
const isImageOnly = el.querySelector('img') !== null && !el.textContent?.trim();
|
const isImageOnly = el.querySelector('img') !== null && !el.textContent?.trim();
|
||||||
|
|
||||||
|
const pSpacing = elements?.p?.spacing || body.spacing;
|
||||||
|
// When template spacing is 0, CSS generator skips the margin, so browser default 1em applies
|
||||||
|
const pAfter = (pSpacing?.after || body.size) * 20;
|
||||||
results.push(new Paragraph({
|
results.push(new Paragraph({
|
||||||
children: runs.length > 0 ? runs : [new TextRun({ text: el.textContent || '' })],
|
children: runs.length > 0 ? runs : [new TextRun({ text: el.textContent || '' })],
|
||||||
alignment: isImageOnly ? AlignmentType.CENTER : mapAlignment(body.align),
|
alignment: isImageOnly ? AlignmentType.CENTER : mapAlignment(elements?.p?.align || body.align),
|
||||||
indent: isImageOnly ? undefined : (elements?.p?.indent ? { firstLine: elements.p.indent * 20 } : undefined),
|
indent: isImageOnly ? undefined : (elements?.p?.indent ? { firstLine: elements.p.indent * 20 } : undefined),
|
||||||
spacing: {
|
spacing: {
|
||||||
before: (body.spacing?.before || 0) * 20,
|
before: (pSpacing?.before || 0) * 20,
|
||||||
after: (body.spacing?.after || 0) * 20,
|
after: pAfter,
|
||||||
line: Math.round((body.spacing?.line || 1.2) * 240)
|
line: Math.round((pSpacing?.line || 1.2) * 240)
|
||||||
},
|
},
|
||||||
shading: bgMatch ? { fill: formatColor(resolveColorToHex(bgMatch[1])), type: ShadingType.CLEAR } : undefined
|
shading: bgMatch ? { fill: formatColor(resolveColorToHex(bgMatch[1])), type: ShadingType.CLEAR } : undefined
|
||||||
}));
|
}));
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Blockquotes
|
// Blockquotes - process each inner <p> as a separate paragraph with blockquote styling
|
||||||
|
// Word groups adjacent paragraphs with identical borders, showing top/bottom only on outer edges
|
||||||
if (tag === 'blockquote') {
|
if (tag === 'blockquote') {
|
||||||
const bqConfig = elements?.blockquote;
|
const bqConfig = elements?.blockquote;
|
||||||
const runs = processTextRuns(el, {
|
const bqFont = bqConfig?.font ? resolveFont(bqConfig.font, fonts || {}) : body.font;
|
||||||
font: bqConfig?.font ? resolveFont(bqConfig.font, fonts || {}) : body.font,
|
const bqSize = pt(bqConfig?.size || body.size);
|
||||||
size: pt(bqConfig?.size || body.size),
|
const bqColor = formatColor(resolveColorToHex(bqConfig?.color || body.color));
|
||||||
color: formatColor(resolveColorToHex(bqConfig?.color || body.color)),
|
const bqFmt = { font: bqFont, size: bqSize, color: bqColor, italics: bqConfig?.italic !== false };
|
||||||
italics: true
|
|
||||||
|
console.log('DOCX BLOCKQUOTE:', {
|
||||||
|
font: bqFont, size: bqSize, color: bqColor, childCount: el.children.length
|
||||||
});
|
});
|
||||||
|
|
||||||
const borderColor = resolveColorToHex(bqConfig?.borderLeft?.color) || accentColor;
|
const bqBorder: any = {};
|
||||||
const borderWidth = bqConfig?.borderLeft?.width || 3;
|
if (bqConfig?.border) {
|
||||||
|
const b = { color: resolveColorToHex(bqConfig.border.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.border.style), size: (bqConfig.border.width || 1) * 8 };
|
||||||
|
bqBorder.top = b; bqBorder.bottom = b; bqBorder.left = b; bqBorder.right = b;
|
||||||
|
}
|
||||||
|
if (bqConfig?.borderTop) bqBorder.top = { color: resolveColorToHex(bqConfig.borderTop.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.borderTop.style), size: (bqConfig.borderTop.width || 1) * 8 };
|
||||||
|
if (bqConfig?.borderBottom) bqBorder.bottom = { color: resolveColorToHex(bqConfig.borderBottom.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.borderBottom.style), size: (bqConfig.borderBottom.width || 1) * 8 };
|
||||||
|
if (bqConfig?.borderLeft) bqBorder.left = { color: resolveColorToHex(bqConfig.borderLeft.color) || accentColor, space: 10, style: mapBorderStyle(bqConfig.borderLeft.style), size: (bqConfig.borderLeft.width || 1) * 8 };
|
||||||
|
if (bqConfig?.borderRight) bqBorder.right = { color: resolveColorToHex(bqConfig.borderRight.color) || accentColor, space: 6, style: mapBorderStyle(bqConfig.borderRight.style), size: (bqConfig.borderRight.width || 1) * 8 };
|
||||||
|
|
||||||
const debugKey = 'blockquote-debug';
|
const bqLine = Math.max(bqConfig?.spacing?.line || body.spacing?.line || 1.2, 1.5);
|
||||||
if (!visitedTags.has(debugKey)) {
|
const bqBorderObj = Object.keys(bqBorder).length > 0 ? bqBorder : undefined;
|
||||||
visitedTags.add(debugKey);
|
const bqShading = bqConfig?.background ? { fill: resolveColorToHex(bqConfig.background), type: ShadingType.CLEAR } : undefined;
|
||||||
console.log('DOCX BLOCKQUOTE CONFIG:', {
|
const bqSpacing = {
|
||||||
font: bqConfig?.font,
|
before: 0,
|
||||||
size: bqConfig?.size,
|
after: body.size * 20, // 1em gap between inner paragraphs (matches browser default)
|
||||||
color: formatColor(resolveColorToHex(bqConfig?.color)),
|
line: Math.round(bqLine * 240)
|
||||||
border: { color: borderColor, width: borderWidth },
|
};
|
||||||
background: bqConfig?.background
|
|
||||||
|
// Process children - each <p> becomes its own paragraph with blockquote styling
|
||||||
|
const childEls = Array.from(el.children);
|
||||||
|
const makeBqParagraph = (runs: any[], isFirst: boolean, isLast: boolean, align?: any) => new Paragraph({
|
||||||
|
children: runs,
|
||||||
|
alignment: align || mapAlignment(bqConfig?.align),
|
||||||
|
indent: bqConfig?.indent ? { left: bqConfig.indent * 20 } : undefined,
|
||||||
|
border: bqBorderObj,
|
||||||
|
shading: bqShading,
|
||||||
|
spacing: {
|
||||||
|
...bqSpacing,
|
||||||
|
before: isFirst ? (bqConfig?.spacing?.before || 12) * 20 : bqSpacing.before,
|
||||||
|
after: isLast ? (bqConfig?.spacing?.after || 12) * 20 : bqSpacing.after,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (childEls.length === 0) {
|
||||||
|
// No child elements - process as single paragraph with full blockquote styling
|
||||||
|
const runs = processTextRuns(el, bqFmt);
|
||||||
|
results.push(makeBqParagraph(runs, true, true, mapAlignment(bqConfig?.align)));
|
||||||
|
} else {
|
||||||
|
childEls.forEach((child, i) => {
|
||||||
|
const childEl = child as HTMLElement;
|
||||||
|
const childTagName = childEl.tagName.toLowerCase();
|
||||||
|
// Nested blockquotes - recurse
|
||||||
|
if (childTagName === 'blockquote') {
|
||||||
|
results.push(...processNode(childEl));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const isP = childTagName === 'p';
|
||||||
|
// CSS specificity: <p> rules override inherited blockquote styles
|
||||||
|
// Only italic inherits since .page p doesn't set font-style
|
||||||
|
const childFmt = isP ? {
|
||||||
|
font: body.font,
|
||||||
|
size: pt(body.size),
|
||||||
|
color: formatColor(resolveColorToHex(body.color)),
|
||||||
|
italics: bqConfig?.italic !== false
|
||||||
|
} : bqFmt;
|
||||||
|
const childAlign = isP ? mapAlignment(elements?.p?.align || body.align) : mapAlignment(bqConfig?.align);
|
||||||
|
const runs = processTextRuns(childEl, childFmt);
|
||||||
|
if (runs.length > 0) {
|
||||||
|
results.push(makeBqParagraph(runs, i === 0, i === childEls.length - 1, childAlign));
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
results.push(new Paragraph({
|
|
||||||
children: runs,
|
|
||||||
indent: { left: 720 },
|
|
||||||
border: { left: { color: borderColor, space: 10, style: BorderStyle.SINGLE, size: borderWidth * 8 } },
|
|
||||||
shading: bqConfig?.background ? { fill: resolveColorToHex(bqConfig.background), type: ShadingType.CLEAR } : (isDark ? undefined : { fill: 'F8F8F8', type: ShadingType.CLEAR }),
|
|
||||||
spacing: {
|
|
||||||
before: (bqConfig?.spacing?.before || 12) * 20,
|
|
||||||
after: (bqConfig?.spacing?.after || 12) * 20,
|
|
||||||
line: Math.round((bqConfig?.spacing?.line || body.spacing?.line || 1.2) * 240)
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Lists
|
// Lists - with spacing before/after the list container
|
||||||
if (tag === 'ul' || tag === 'ol') {
|
if (tag === 'ul' || tag === 'ol') {
|
||||||
|
const listCfg = tag === 'ul' ? elements?.ul : elements?.ol;
|
||||||
|
const listBefore = (listCfg?.spacing?.before ?? 12) * 20;
|
||||||
|
const listAfter = (listCfg?.spacing?.after ?? 12) * 20;
|
||||||
|
if (listBefore) results.push(new Paragraph({ spacing: { before: listBefore, after: 0 }, children: [] }));
|
||||||
results.push(...processList(el, tag === 'ol', 0));
|
results.push(...processList(el, tag === 'ol', 0));
|
||||||
|
if (listAfter) results.push(new Paragraph({ spacing: { before: 0, after: listAfter }, children: [] }));
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1335,7 +1411,8 @@ export const generateDocxDocument = async (
|
|||||||
if (imgRun) {
|
if (imgRun) {
|
||||||
results.push(new Paragraph({
|
results.push(new Paragraph({
|
||||||
children: [imgRun],
|
children: [imgRun],
|
||||||
spacing: { before: 120, after: 120 },
|
alignment: AlignmentType.CENTER,
|
||||||
|
spacing: { before: (elements?.img?.spacing?.before ?? 18) * 20, after: (elements?.img?.spacing?.after ?? 18) * 20 },
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -1369,12 +1446,82 @@ export const generateDocxDocument = async (
|
|||||||
},
|
},
|
||||||
spacing: {
|
spacing: {
|
||||||
before: (hrConfig?.spacing?.before || 12) * 20,
|
before: (hrConfig?.spacing?.before || 12) * 20,
|
||||||
after: (hrConfig?.spacing?.after || 12) * 20
|
after: 0
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Divs - split into paragraphs for text/inline content, recurse for nested block elements
|
||||||
|
if (tag === 'div') {
|
||||||
|
const style = el.getAttribute('style') || '';
|
||||||
|
const alignMatch = style.match(/text-align:\s*(left|center|right|justify)/i);
|
||||||
|
const divAlign = alignMatch ? alignMatch[1].toLowerCase() : undefined;
|
||||||
|
const divSpacing = elements?.p?.spacing || body.spacing;
|
||||||
|
const blockTags = new Set(['div', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'ul', 'ol', 'table', 'pre', 'hr']);
|
||||||
|
|
||||||
|
// Check if div has nested block elements
|
||||||
|
const hasBlockChildren = Array.from(el.children).some(c => blockTags.has(c.tagName.toLowerCase()));
|
||||||
|
|
||||||
|
if (hasBlockChildren) {
|
||||||
|
// Split: group consecutive inline/text nodes into paragraphs, recurse block elements
|
||||||
|
let inlineNodes: Node[] = [];
|
||||||
|
|
||||||
|
const flushInline = () => {
|
||||||
|
if (inlineNodes.length === 0) return;
|
||||||
|
// Create a temp container in the parsed document to process inline nodes
|
||||||
|
const temp = doc.createElement('span');
|
||||||
|
inlineNodes.forEach(n => temp.appendChild(n.cloneNode(true)));
|
||||||
|
const text = temp.textContent?.trim();
|
||||||
|
if (text) {
|
||||||
|
const runs = processTextRuns(temp as any, {
|
||||||
|
font: body.font, size: pt(body.size), color: formatColor(resolveColorToHex(body.color))
|
||||||
|
});
|
||||||
|
if (runs.length > 0) {
|
||||||
|
results.push(new Paragraph({
|
||||||
|
children: runs,
|
||||||
|
alignment: divAlign ? mapAlignment(divAlign === 'justify' ? 'both' : divAlign) : mapAlignment(body.align),
|
||||||
|
spacing: { before: 0, after: (divSpacing?.after || body.size) * 20, line: Math.round((divSpacing?.line || 1.2) * 240) }
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inlineNodes = [];
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const child of Array.from(el.childNodes)) {
|
||||||
|
if (child.nodeType === Node.ELEMENT_NODE && blockTags.has((child as HTMLElement).tagName.toLowerCase())) {
|
||||||
|
flushInline();
|
||||||
|
results.push(...processNode(child));
|
||||||
|
} else {
|
||||||
|
inlineNodes.push(child);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
flushInline();
|
||||||
|
} else {
|
||||||
|
// No nested blocks - treat entire div as one paragraph
|
||||||
|
const hasContent = el.textContent?.trim();
|
||||||
|
if (hasContent) {
|
||||||
|
const runs = processTextRuns(el, {
|
||||||
|
font: body.font, size: pt(body.size), color: formatColor(resolveColorToHex(body.color))
|
||||||
|
});
|
||||||
|
if (runs.length > 0) {
|
||||||
|
const isImgOnly = el.querySelector('img') !== null && !hasContent;
|
||||||
|
results.push(new Paragraph({
|
||||||
|
children: runs,
|
||||||
|
alignment: isImgOnly ? AlignmentType.CENTER : (divAlign ? mapAlignment(divAlign === 'justify' ? 'both' : divAlign) : mapAlignment(body.align)),
|
||||||
|
spacing: { before: 0, after: (divSpacing?.after || body.size) * 20, line: Math.round((divSpacing?.line || 1.2) * 240) }
|
||||||
|
}));
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// No text - process children for images etc.
|
||||||
|
for (const child of Array.from(el.childNodes)) {
|
||||||
|
results.push(...processNode(child));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
// Default: process children
|
// Default: process children
|
||||||
for (const child of Array.from(el.childNodes)) {
|
for (const child of Array.from(el.childNodes)) {
|
||||||
results.push(...processNode(child));
|
results.push(...processNode(child));
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
import TurndownService from 'turndown';
|
import TurndownService from 'turndown';
|
||||||
|
// @ts-ignore
|
||||||
|
import { gfm } from 'turndown-plugin-gfm';
|
||||||
|
|
||||||
export function htmlToMarkdown(html: string): string {
|
export function htmlToMarkdown(html: string, preserveAlignment = false): string {
|
||||||
const turndown = new TurndownService({
|
const turndown = new TurndownService({
|
||||||
headingStyle: 'atx',
|
headingStyle: 'atx',
|
||||||
hr: '---',
|
hr: '---',
|
||||||
@@ -10,6 +12,39 @@ export function htmlToMarkdown(html: string): string {
|
|||||||
strongDelimiter: '**',
|
strongDelimiter: '**',
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Enable GFM tables
|
||||||
|
turndown.use(gfm);
|
||||||
|
|
||||||
|
// Convert Blogger caption tables (image + caption) to image + italic caption
|
||||||
|
// These are layout tables, not data tables - without this rule, GFM converts them
|
||||||
|
// to markdown tables which then get data table borders in the export
|
||||||
|
turndown.addRule('bloggerCaptionTable', {
|
||||||
|
filter: (node) => {
|
||||||
|
return node.nodeName === 'TABLE' &&
|
||||||
|
(node as HTMLElement).classList.contains('tr-caption-container');
|
||||||
|
},
|
||||||
|
replacement: (_content, node) => {
|
||||||
|
const el = node as HTMLElement;
|
||||||
|
const img = el.querySelector('img');
|
||||||
|
const caption = el.querySelector('.tr-caption');
|
||||||
|
let result = '\n\n';
|
||||||
|
if (img) {
|
||||||
|
const src = img.getAttribute('src') || '';
|
||||||
|
const alt = img.getAttribute('alt') || '';
|
||||||
|
const link = img.closest('a');
|
||||||
|
if (link) {
|
||||||
|
result += `[](${link.getAttribute('href')})\n`;
|
||||||
|
} else {
|
||||||
|
result += `\n`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (caption && caption.textContent?.trim()) {
|
||||||
|
result += `\n*${caption.textContent.trim()}*\n`;
|
||||||
|
}
|
||||||
|
return result + '\n';
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
// Strip meaningless wrapper spans (Blogger, Google Docs, Word paste, etc.)
|
// Strip meaningless wrapper spans (Blogger, Google Docs, Word paste, etc.)
|
||||||
turndown.addRule('stripDecorativeSpans', {
|
turndown.addRule('stripDecorativeSpans', {
|
||||||
filter: (node) => {
|
filter: (node) => {
|
||||||
@@ -23,6 +58,21 @@ export function htmlToMarkdown(html: string): string {
|
|||||||
replacement: (content) => content,
|
replacement: (content) => content,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// For HTML content: preserve divs with text-align as raw HTML pass-through
|
||||||
|
if (preserveAlignment) {
|
||||||
|
turndown.addRule('preserveAlignment', {
|
||||||
|
filter: (node) => {
|
||||||
|
if (node.nodeName !== 'DIV') return false;
|
||||||
|
const style = node.getAttribute('style') || '';
|
||||||
|
return /text-align:\s*(right|center)/i.test(style);
|
||||||
|
},
|
||||||
|
replacement: (_content, node) => {
|
||||||
|
const el = node as HTMLElement;
|
||||||
|
return '\n\n' + el.outerHTML + '\n\n';
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
let markdown = turndown.turndown(html);
|
let markdown = turndown.turndown(html);
|
||||||
|
|
||||||
// Clean up excessive blank lines
|
// Clean up excessive blank lines
|
||||||
@@ -31,8 +81,9 @@ export function htmlToMarkdown(html: string): string {
|
|||||||
// Regular spaces would trigger markdown code block detection at 4+ indent
|
// Regular spaces would trigger markdown code block detection at 4+ indent
|
||||||
// entities pass through Marked.js as HTML and render as visible spaces
|
// entities pass through Marked.js as HTML and render as visible spaces
|
||||||
markdown = markdown.replace(/\u00A0/g, ' ');
|
markdown = markdown.replace(/\u00A0/g, ' ');
|
||||||
// Clean up trailing whitespace on lines
|
// Clean up whitespace-only lines but DON'T strip trailing spaces on content lines
|
||||||
markdown = markdown.replace(/[ \t]+$/gm, '');
|
// Turndown uses two trailing spaces for <br> line breaks - stripping them breaks line breaks
|
||||||
|
markdown = markdown.replace(/^[ \t]+$/gm, '');
|
||||||
|
|
||||||
return markdown.trim();
|
return markdown.trim();
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user