added html import, image embedding, and font checks
This commit is contained in:
46
src/App.tsx
46
src/App.tsx
@@ -10,7 +10,9 @@ import { useTemplates } from './hooks/useTemplates';
|
||||
import { useDialog } from './hooks/useDialog';
|
||||
// @ts-ignore
|
||||
import { parse } from 'marked';
|
||||
import { Sparkles, Loader2, FileType, Keyboard, X, RefreshCw } from 'lucide-react';
|
||||
import { Sparkles, Loader2, FileType, Keyboard, X, RefreshCw, AlertCircle } from 'lucide-react';
|
||||
import { detectContentType } from './utils/contentDetector';
|
||||
import { htmlToMarkdown } from './utils/htmlToMarkdown';
|
||||
|
||||
import { useKeyboardNavigation } from './hooks/useKeyboardNavigation';
|
||||
|
||||
@@ -92,6 +94,7 @@ const App: React.FC = () => {
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [showShortcuts, setShowShortcuts] = useState(false);
|
||||
const [statusMessage, setStatusMessage] = useState('');
|
||||
const [uploadError, setUploadError] = useState<string | null>(null);
|
||||
|
||||
const { uiZoom, setUiZoom, isLoaded } = useSettings();
|
||||
const { templates, categories, isLoading: templatesLoading, error: templatesError, refresh, openFolder } = useTemplates();
|
||||
@@ -135,9 +138,32 @@ const App: React.FC = () => {
|
||||
|
||||
|
||||
|
||||
const handleFileLoaded = (text: string, fileName: string = '') => {
|
||||
setContent(text);
|
||||
setInputFileName(fileName);
|
||||
const handleFileLoaded = (text: string, fullFileName: string = '') => {
|
||||
setUploadError(null);
|
||||
|
||||
const ext = fullFileName.includes('.')
|
||||
? fullFileName.split('.').pop()?.toLowerCase() || ''
|
||||
: '';
|
||||
const displayName = fullFileName.replace(/\.[^/.]+$/, '') || fullFileName;
|
||||
|
||||
const detection = detectContentType(text, ext);
|
||||
|
||||
if (detection.error) {
|
||||
setUploadError(detection.error);
|
||||
return;
|
||||
}
|
||||
|
||||
let processedContent = text;
|
||||
if (detection.type === 'html') {
|
||||
try {
|
||||
processedContent = htmlToMarkdown(text);
|
||||
} catch (err) {
|
||||
console.error('HTML conversion failed:', err);
|
||||
}
|
||||
}
|
||||
|
||||
setContent(processedContent);
|
||||
setInputFileName(displayName);
|
||||
setAppState(AppState.CONFIG);
|
||||
};
|
||||
|
||||
@@ -174,6 +200,7 @@ const App: React.FC = () => {
|
||||
setGeneratedHtml('');
|
||||
setSelectedStyle(null);
|
||||
setInputFileName('');
|
||||
setUploadError(null);
|
||||
};
|
||||
|
||||
const handleBackToConfig = () => {
|
||||
@@ -371,6 +398,17 @@ const App: React.FC = () => {
|
||||
</motion.p>
|
||||
</motion.div>
|
||||
<FileUpload onFileLoaded={handleFileLoaded} />
|
||||
{uploadError && (
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: -10 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
className="mt-4 max-w-xl mx-auto p-4 bg-red-900/20 border border-red-800 rounded-lg flex items-center gap-3 text-red-200"
|
||||
role="alert"
|
||||
>
|
||||
<AlertCircle size={20} className="flex-shrink-0" aria-hidden="true" />
|
||||
<span>{uploadError}</span>
|
||||
</motion.div>
|
||||
)}
|
||||
</motion.div>
|
||||
)}
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ export default function ExportOptionsModal({ isOpen, onClose, onExport }: Export
|
||||
ref={dialogRef}
|
||||
onClick={handleBackdropClick}
|
||||
aria-labelledby="export-title"
|
||||
className="fixed inset-0 z-50 p-4"
|
||||
className="fixed inset-0 z-50 p-4 m-0 w-full h-full border-none bg-black/50 flex items-center justify-center"
|
||||
>
|
||||
<div
|
||||
className="relative w-full max-w-2xl bg-zinc-900 rounded-2xl shadow-2xl overflow-hidden border border-zinc-700"
|
||||
|
||||
@@ -17,8 +17,9 @@ export const FileUpload: React.FC<FileUploadProps> = ({ onFileLoaded }) => {
|
||||
|
||||
const handleFile = (file: File) => {
|
||||
setError(null);
|
||||
if (!file.name.endsWith('.md') && !file.name.endsWith('.txt') && !file.name.endsWith('.markdown')) {
|
||||
setError('Please upload a Markdown (.md) or Text (.txt) file.');
|
||||
const ext = file.name.split('.').pop()?.toLowerCase() || '';
|
||||
if (!['md', 'txt', 'markdown', 'html', 'htm'].includes(ext)) {
|
||||
setError('Please upload a Markdown (.md), HTML (.html), or Text (.txt) file.');
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -26,9 +27,7 @@ export const FileUpload: React.FC<FileUploadProps> = ({ onFileLoaded }) => {
|
||||
reader.onload = (e) => {
|
||||
const text = e.target?.result;
|
||||
if (typeof text === 'string') {
|
||||
// Extract filename without extension
|
||||
const fileName = file.name.replace(/\.[^/.]+$/, '');
|
||||
onFileLoaded(text, fileName);
|
||||
onFileLoaded(text, file.name);
|
||||
}
|
||||
};
|
||||
reader.onerror = () => setError('Error reading file.');
|
||||
@@ -132,7 +131,7 @@ export const FileUpload: React.FC<FileUploadProps> = ({ onFileLoaded }) => {
|
||||
type="file"
|
||||
className="hidden"
|
||||
onChange={handleChange}
|
||||
accept=".md,.txt,.markdown"
|
||||
accept=".md,.txt,.markdown,.html,.htm"
|
||||
aria-label="Select file"
|
||||
/>
|
||||
|
||||
@@ -172,7 +171,7 @@ export const FileUpload: React.FC<FileUploadProps> = ({ onFileLoaded }) => {
|
||||
animate={{ opacity: 1 }}
|
||||
transition={{ delay: 0.7 }}
|
||||
>
|
||||
Markdown or Plain Text files
|
||||
Markdown, HTML, or Plain Text files
|
||||
</motion.p>
|
||||
<motion.p
|
||||
className="text-xs text-zinc-400 mt-2"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import React, { useEffect, useRef, useState } from 'react';
|
||||
import { motion, AnimatePresence } from 'motion/react';
|
||||
import { ArrowLeft, Download, FileText, CheckCircle2, ExternalLink, Loader2, ZoomIn, ZoomOut } from 'lucide-react';
|
||||
import { ArrowLeft, Download, FileText, CheckCircle2, ExternalLink, Loader2, ZoomIn, ZoomOut, AlertTriangle } from 'lucide-react';
|
||||
import { PaperSize } from '../types';
|
||||
import { StyleOption } from '../types';
|
||||
import { getPreviewCss } from '../services/templateRenderer';
|
||||
@@ -142,14 +142,17 @@ export const Preview: React.FC<PreviewProps> = ({
|
||||
const [showExportModal, setShowExportModal] = useState(false);
|
||||
const [focusedElement, setFocusedElement] = useState<'back' | 'fonts' | 'save'>('save');
|
||||
const [exportError, setExportError] = useState<string | null>(null);
|
||||
const [missingFonts, setMissingFonts] = useState<string[]>([]);
|
||||
const [showFontWarning, setShowFontWarning] = useState(false);
|
||||
|
||||
// Get current style from templates
|
||||
const style = templates.find(s => s.id === selectedStyleId) || templates[0] || null;
|
||||
|
||||
// Extract used fonts for display
|
||||
// Extract used fonts for display (heading, body, and code)
|
||||
const usedFonts = style ? Array.from(new Set([
|
||||
style.typography?.fonts?.heading || style.wordConfig?.heading1?.font || 'Arial',
|
||||
style.typography?.fonts?.body || style.wordConfig?.body?.font || 'Arial'
|
||||
style.typography?.fonts?.body || style.wordConfig?.body?.font || 'Arial',
|
||||
style.typography?.fonts?.code || 'JetBrains Mono'
|
||||
])).filter(Boolean) : [];
|
||||
|
||||
useKeyboardNavigation({
|
||||
@@ -165,6 +168,24 @@ export const Preview: React.FC<PreviewProps> = ({
|
||||
}, []);
|
||||
|
||||
const handleSave = async () => {
|
||||
// Check if required fonts are installed using Local Font Access API
|
||||
let missing: string[] = [];
|
||||
try {
|
||||
if ('queryLocalFonts' in window) {
|
||||
const localFonts = await (window as any).queryLocalFonts();
|
||||
const installed = new Set(localFonts.map((f: any) => f.family));
|
||||
missing = usedFonts.filter(font => !installed.has(font));
|
||||
}
|
||||
} catch {
|
||||
// Permission denied or API unavailable - skip check
|
||||
}
|
||||
|
||||
if (missing.length > 0) {
|
||||
setMissingFonts(missing);
|
||||
setShowFontWarning(true);
|
||||
return;
|
||||
}
|
||||
|
||||
setShowExportModal(true);
|
||||
};
|
||||
|
||||
@@ -440,6 +461,48 @@ export const Preview: React.FC<PreviewProps> = ({
|
||||
<button onClick={() => setExportError(null)} className="ml-3 text-red-400 hover:text-white" aria-label="Dismiss error">✕</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{showFontWarning && (
|
||||
<div className="fixed inset-0 z-[60] flex items-center justify-center bg-black/50 backdrop-blur-sm">
|
||||
<motion.div
|
||||
initial={{ scale: 0.9, opacity: 0 }}
|
||||
animate={{ scale: 1, opacity: 1 }}
|
||||
className="bg-zinc-900 border border-zinc-700 rounded-2xl p-6 max-w-md w-full shadow-2xl mx-4"
|
||||
>
|
||||
<div className="flex items-center gap-3 mb-4">
|
||||
<div className="p-2 bg-amber-500/10 rounded-lg text-amber-400">
|
||||
<AlertTriangle size={20} />
|
||||
</div>
|
||||
<h3 className="text-lg font-bold text-white">Missing Fonts</h3>
|
||||
</div>
|
||||
<p className="text-zinc-300 text-sm mb-3">
|
||||
The following fonts are not installed on your system:
|
||||
</p>
|
||||
<ul className="space-y-1 mb-4">
|
||||
{missingFonts.map(font => (
|
||||
<li key={font} className="text-amber-300 text-sm font-medium">- {font}</li>
|
||||
))}
|
||||
</ul>
|
||||
<p className="text-zinc-400 text-xs mb-6">
|
||||
Download and install them using the font buttons at the top of the page before opening the exported document.
|
||||
</p>
|
||||
<div className="flex gap-3">
|
||||
<button
|
||||
onClick={() => { setShowFontWarning(false); setShowExportModal(true); }}
|
||||
className="flex-1 px-4 py-2.5 text-sm font-medium text-zinc-300 bg-zinc-800 border border-zinc-700 rounded-lg hover:bg-zinc-700 transition-colors"
|
||||
>
|
||||
Export Anyway
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setShowFontWarning(false)}
|
||||
className="flex-1 px-4 py-2.5 text-sm font-medium text-white bg-indigo-600 rounded-lg hover:bg-indigo-500 transition-colors"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
</div>
|
||||
</motion.div>
|
||||
</div>
|
||||
)}
|
||||
</motion.div>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -229,6 +229,22 @@ export const StyleSelector: React.FC<StyleSelectorProps> = ({
|
||||
<div class="page">
|
||||
${SAMPLE_CONTENT}
|
||||
</div>
|
||||
<script>
|
||||
function fitPage() {
|
||||
var page = document.querySelector('.page');
|
||||
if (!page) return;
|
||||
var available = window.innerWidth;
|
||||
var needed = page.offsetWidth + 80;
|
||||
if (needed > available) {
|
||||
document.body.style.zoom = (available / needed).toFixed(3);
|
||||
} else {
|
||||
document.body.style.zoom = '1';
|
||||
}
|
||||
}
|
||||
window.addEventListener('load', fitPage);
|
||||
window.addEventListener('resize', fitPage);
|
||||
setTimeout(fitPage, 100);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
@@ -384,6 +400,7 @@ export const StyleSelector: React.FC<StyleSelectorProps> = ({
|
||||
role="listbox"
|
||||
aria-label="Typography styles"
|
||||
aria-activedescendant={selectedStyle ? `style-${selectedStyle}` : undefined}
|
||||
className="space-y-2"
|
||||
>
|
||||
{filteredStyles.length === 0 ? (
|
||||
<div className="text-center py-8 text-zinc-400">
|
||||
|
||||
136
src/utils/contentDetector.ts
Normal file
136
src/utils/contentDetector.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
export type ContentType = 'html' | 'markdown' | 'text';
|
||||
|
||||
export interface DetectionResult {
|
||||
type: ContentType;
|
||||
error?: string;
|
||||
detectedFormat?: string;
|
||||
}
|
||||
|
||||
const BINARY_SIGNATURES: [string, string][] = [
|
||||
['%PDF', 'PDF document'],
|
||||
['PK', 'Word document or ZIP archive'],
|
||||
['\x89PNG', 'PNG image'],
|
||||
['\xFF\xD8', 'JPEG image'],
|
||||
['GIF8', 'GIF image'],
|
||||
['RIFF', 'media file'],
|
||||
['Rar!', 'RAR archive'],
|
||||
];
|
||||
|
||||
function detectBinaryFormat(content: string): string | null {
|
||||
if (content.includes('\0')) {
|
||||
for (const [sig, name] of BINARY_SIGNATURES) {
|
||||
if (content.startsWith(sig)) return name;
|
||||
}
|
||||
return 'binary file';
|
||||
}
|
||||
|
||||
let nonPrintable = 0;
|
||||
const len = Math.min(content.length, 512);
|
||||
for (let i = 0; i < len; i++) {
|
||||
const code = content.charCodeAt(i);
|
||||
if (code < 32 && code !== 9 && code !== 10 && code !== 13) {
|
||||
nonPrintable++;
|
||||
}
|
||||
}
|
||||
return nonPrintable / len > 0.1 ? 'binary file' : null;
|
||||
}
|
||||
|
||||
function stripCodeBlocks(content: string): string {
|
||||
return content.replace(/```[\s\S]*?```/g, '');
|
||||
}
|
||||
|
||||
const STRUCTURAL_TAG_RE = /<(div|p|table|tr|td|th|thead|tbody|tfoot|ul|ol|li|h[1-6]|section|article|header|footer|nav|main|aside|form|blockquote|pre|dl|dt|dd|figure|figcaption|hr)\b[^>]*>/gi;
|
||||
const INLINE_TAG_RE = /<(span|b|i|u|strong|em|a|img|br|code|sub|sup|small|mark|del|ins|s|abbr)\b[^>]*>/gi;
|
||||
|
||||
function countStructuralTags(content: string): number {
|
||||
return (content.match(STRUCTURAL_TAG_RE) || []).length;
|
||||
}
|
||||
|
||||
function countInlineTags(content: string): number {
|
||||
return (content.match(INLINE_TAG_RE) || []).length;
|
||||
}
|
||||
|
||||
function countMarkdownSyntax(content: string): number {
|
||||
let score = 0;
|
||||
const lines = content.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
const t = line.trim();
|
||||
if (/^#{1,6}\s/.test(t)) score += 3;
|
||||
if (/^[-*+]\s/.test(t)) score += 2;
|
||||
if (/^\d+\.\s/.test(t)) score += 2;
|
||||
if (/^>\s/.test(t)) score += 2;
|
||||
if (/^(---|\*\*\*|___)$/.test(t)) score += 2;
|
||||
if (/^```/.test(t)) score += 3;
|
||||
}
|
||||
|
||||
const sample = content.substring(0, 5000);
|
||||
score += (sample.match(/\*\*[^*]+\*\*/g) || []).length;
|
||||
score += (sample.match(/\[([^\]]+)\]\(([^)]+)\)/g) || []).length * 2;
|
||||
score += (sample.match(/!\[([^\]]*)\]\(([^)]+)\)/g) || []).length * 2;
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
export function detectContentType(content: string, extension: string): DetectionResult {
|
||||
if (!content || !content.trim()) {
|
||||
return { type: 'text' };
|
||||
}
|
||||
|
||||
const binaryFormat = detectBinaryFormat(content);
|
||||
if (binaryFormat) {
|
||||
return {
|
||||
type: 'text',
|
||||
error: `This appears to be a ${binaryFormat}. TypoGenie accepts Markdown, HTML, and plain text files.`,
|
||||
detectedFormat: binaryFormat,
|
||||
};
|
||||
}
|
||||
|
||||
// Full HTML document detection
|
||||
const trimmed = content.trimStart().toLowerCase();
|
||||
if (trimmed.startsWith('<!doctype') || trimmed.startsWith('<html')) {
|
||||
return { type: 'html' };
|
||||
}
|
||||
|
||||
const mdScore = countMarkdownSyntax(content);
|
||||
|
||||
// Count HTML tags on content with code blocks stripped to avoid false positives
|
||||
const stripped = stripCodeBlocks(content);
|
||||
const structural = countStructuralTags(stripped);
|
||||
const inline = countInlineTags(stripped);
|
||||
|
||||
// Both signals strong - likely markdown with HTML examples
|
||||
if (structural >= 3 && mdScore >= 5) {
|
||||
return { type: 'markdown' };
|
||||
}
|
||||
|
||||
// Strong HTML signal
|
||||
if (structural >= 3) {
|
||||
return { type: 'html' };
|
||||
}
|
||||
|
||||
// Moderate HTML: few structural tags but heavy inline tags (Blogger/Google Docs style)
|
||||
if (structural >= 1 && inline >= 10) {
|
||||
return { type: 'html' };
|
||||
}
|
||||
|
||||
// Strong markdown signal
|
||||
if (mdScore >= 3) {
|
||||
return { type: 'markdown' };
|
||||
}
|
||||
|
||||
// Weak HTML with no markdown at all
|
||||
if (structural >= 1 && mdScore === 0) {
|
||||
return { type: 'html' };
|
||||
}
|
||||
|
||||
// Extension as tiebreaker
|
||||
if (extension === 'html' || extension === 'htm') {
|
||||
return { type: 'html' };
|
||||
}
|
||||
if (extension === 'md' || extension === 'markdown') {
|
||||
return { type: 'markdown' };
|
||||
}
|
||||
|
||||
return { type: 'text' };
|
||||
}
|
||||
@@ -3,10 +3,11 @@ import {
|
||||
Document, Paragraph, TextRun, AlignmentType, HeadingLevel, BorderStyle,
|
||||
UnderlineType, ShadingType, LevelFormat,
|
||||
Packer, Table, TableCell, TableRow, WidthType, VerticalAlign,
|
||||
ExternalHyperlink, TableBorders
|
||||
ExternalHyperlink, TableBorders, ImageRun
|
||||
} from 'docx';
|
||||
import { DocxStyleConfig, PaperSize, TemplateElementStyle } from '../types';
|
||||
import { resolveColor, resolveFont } from '../services/templateRenderer';
|
||||
import { fetch as tauriFetch } from '@tauri-apps/plugin-http';
|
||||
|
||||
const pt = (points: number) => points * 2;
|
||||
const inchesToTwips = (inches: number) => Math.round(inches * 1440);
|
||||
@@ -185,6 +186,72 @@ export const generateDocxDocument = async (
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString(htmlContent, 'text/html');
|
||||
|
||||
// Pre-fetch all images for embedding
|
||||
const imageCache = new Map<string, { data: Uint8Array; width: number; height: number }>();
|
||||
const imgElements = doc.querySelectorAll('img');
|
||||
for (const img of Array.from(imgElements)) {
|
||||
const src = img.getAttribute('src');
|
||||
if (!src || src.startsWith('data:')) continue;
|
||||
|
||||
// Get dimensions from HTML attributes first
|
||||
const htmlW = parseInt(img.getAttribute('data-original-width') || img.getAttribute('width') || '0');
|
||||
const htmlH = parseInt(img.getAttribute('data-original-height') || img.getAttribute('height') || '0');
|
||||
|
||||
try {
|
||||
// Step 1: Fetch the image bytes
|
||||
let data: Uint8Array | null = null;
|
||||
|
||||
// Try Tauri HTTP plugin
|
||||
try {
|
||||
const resp = await tauriFetch(src, { method: 'GET' });
|
||||
if (resp.ok) {
|
||||
data = new Uint8Array(await resp.arrayBuffer());
|
||||
}
|
||||
} catch (e1) {
|
||||
console.warn('tauriFetch failed, trying standard fetch:', e1);
|
||||
}
|
||||
|
||||
// Fallback to standard fetch
|
||||
if (!data) {
|
||||
try {
|
||||
const resp = await globalThis.fetch(src, { mode: 'no-cors' });
|
||||
// no-cors gives opaque response, try cors mode
|
||||
const resp2 = await globalThis.fetch(src);
|
||||
if (resp2.ok) {
|
||||
data = new Uint8Array(await resp2.arrayBuffer());
|
||||
}
|
||||
} catch (e2) {
|
||||
console.warn('Standard fetch also failed:', e2);
|
||||
}
|
||||
}
|
||||
|
||||
if (!data || data.length === 0) {
|
||||
console.warn('No image data received for:', src);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Step 2: Determine dimensions
|
||||
let width = htmlW;
|
||||
let height = htmlH;
|
||||
if (!width || !height) {
|
||||
try {
|
||||
const bitmap = await createImageBitmap(new Blob([data]));
|
||||
width = bitmap.width;
|
||||
height = bitmap.height;
|
||||
bitmap.close();
|
||||
} catch {
|
||||
width = width || 600;
|
||||
height = height || 400;
|
||||
}
|
||||
}
|
||||
|
||||
imageCache.set(src, { data, width, height });
|
||||
console.log('Image cached:', src.substring(0, 60), width, 'x', height, data.length, 'bytes');
|
||||
} catch (err) {
|
||||
console.warn('Image embed failed for:', src, err);
|
||||
}
|
||||
}
|
||||
|
||||
const children: (Paragraph | Table)[] = [];
|
||||
|
||||
// Track separate ordered lists for independent numbering
|
||||
@@ -284,6 +351,26 @@ export const generateDocxDocument = async (
|
||||
return elementConfig?.allCaps || false;
|
||||
};
|
||||
|
||||
// Create an ImageRun from a cached image, scaled to fit the page
|
||||
// Note: docx library transformation uses PIXELS (it converts to EMU internally)
|
||||
const createInlineImageRun = (src: string): ImageRun | null => {
|
||||
const cached = imageCache.get(src);
|
||||
if (!cached) return null;
|
||||
let width = cached.width;
|
||||
let height = cached.height;
|
||||
// Max width in pixels at 96 DPI
|
||||
const pageWidthTwips = paperSize === 'A4' ? mmToTwips(210) : inchesToTwips(8.5);
|
||||
const leftMargin = (options.page?.margins?.left || 72) * 20;
|
||||
const rightMargin = (options.page?.margins?.right || 72) * 20;
|
||||
const maxWidthPx = ((pageWidthTwips - leftMargin - rightMargin) / 1440) * 96;
|
||||
if (width > maxWidthPx) {
|
||||
const scale = maxWidthPx / width;
|
||||
width = Math.round(maxWidthPx);
|
||||
height = Math.round(height * scale);
|
||||
}
|
||||
return new ImageRun({ data: cached.data, transformation: { width, height } });
|
||||
};
|
||||
|
||||
// Process text runs with support for links and formatting
|
||||
const processTextRuns = (element: HTMLElement, baseFormatting: any = {}, elementType?: string): (TextRun | ExternalHyperlink)[] => {
|
||||
const runs: (TextRun | ExternalHyperlink)[] = [];
|
||||
@@ -360,6 +447,14 @@ export const generateDocxDocument = async (
|
||||
} else if (node.nodeType === Node.ELEMENT_NODE) {
|
||||
const childEl = node as HTMLElement;
|
||||
const childTag = childEl.tagName.toLowerCase();
|
||||
if (childTag === 'img') {
|
||||
const imgSrc = childEl.getAttribute('src');
|
||||
if (imgSrc) {
|
||||
const imgRun = createInlineImageRun(imgSrc);
|
||||
if (imgRun) linkRuns.push(imgRun as any);
|
||||
}
|
||||
return;
|
||||
}
|
||||
const childFmt = { ...fmt };
|
||||
if (childTag === 'strong' || childTag === 'b') childFmt.bold = true;
|
||||
if (childTag === 'em' || childTag === 'i') childFmt.italics = true;
|
||||
@@ -498,6 +593,14 @@ export const generateDocxDocument = async (
|
||||
} else if (node.nodeType === Node.ELEMENT_NODE) {
|
||||
const childEl = node as HTMLElement;
|
||||
const childTag = childEl.tagName.toLowerCase();
|
||||
if (childTag === 'img') {
|
||||
const imgSrc = childEl.getAttribute('src');
|
||||
if (imgSrc) {
|
||||
const imgRun = createInlineImageRun(imgSrc);
|
||||
if (imgRun) linkRuns.push(imgRun as any);
|
||||
}
|
||||
return;
|
||||
}
|
||||
const childFmt = { ...fmt };
|
||||
if (childTag === 'strong' || childTag === 'b') childFmt.bold = true;
|
||||
if (childTag === 'em' || childTag === 'i') childFmt.italics = true;
|
||||
@@ -522,6 +625,16 @@ export const generateDocxDocument = async (
|
||||
}
|
||||
}
|
||||
|
||||
// Handle standalone images in text runs
|
||||
if (tag === 'img') {
|
||||
const imgSrc = el.getAttribute('src');
|
||||
if (imgSrc) {
|
||||
const imgRun = createInlineImageRun(imgSrc);
|
||||
if (imgRun) runs.push(imgRun as any);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const style = el.getAttribute('style') || '';
|
||||
const colorMatch = style.match(/color:\s*#?([a-fA-F0-9]{6})/);
|
||||
if (colorMatch) fmt.color = colorMatch[1];
|
||||
@@ -560,7 +673,12 @@ export const generateDocxDocument = async (
|
||||
}
|
||||
}));
|
||||
|
||||
const cellBorders: any = {};
|
||||
const cellBorders: any = {
|
||||
top: { style: BorderStyle.NIL, size: 0, color: '000000' },
|
||||
bottom: { style: BorderStyle.NIL, size: 0, color: '000000' },
|
||||
left: { style: BorderStyle.NIL, size: 0, color: '000000' },
|
||||
right: { style: BorderStyle.NIL, size: 0, color: '000000' }
|
||||
};
|
||||
if (cfg?.border) {
|
||||
const b = { color: resolveColorToHex(cfg.border.color) || '000000', style: mapBorderStyle(cfg.border.style), size: cfg.border.width * 8 };
|
||||
cellBorders.top = b;
|
||||
@@ -1145,9 +1263,13 @@ export const generateDocxDocument = async (
|
||||
});
|
||||
}
|
||||
|
||||
// Center paragraphs that only contain an image
|
||||
const isImageOnly = el.querySelector('img') !== null && !el.textContent?.trim();
|
||||
|
||||
results.push(new Paragraph({
|
||||
children: runs.length > 0 ? runs : [new TextRun({ text: el.textContent || '' })],
|
||||
alignment: mapAlignment(body.align),
|
||||
alignment: isImageOnly ? AlignmentType.CENTER : mapAlignment(body.align),
|
||||
indent: isImageOnly ? undefined : (elements?.p?.indent ? { firstLine: elements.p.indent * 20 } : undefined),
|
||||
spacing: {
|
||||
before: (body.spacing?.before || 0) * 20,
|
||||
after: (body.spacing?.after || 0) * 20,
|
||||
@@ -1203,20 +1325,33 @@ export const generateDocxDocument = async (
|
||||
return results;
|
||||
}
|
||||
|
||||
// Images - produce accessible placeholder text
|
||||
// Images - embed if fetched, otherwise placeholder
|
||||
if (tag === 'img') {
|
||||
const alt = el.getAttribute('alt') || '';
|
||||
const placeholderText = alt ? `[Image: ${alt}]` : '[Image]';
|
||||
results.push(new Paragraph({
|
||||
children: [new TextRun({
|
||||
text: placeholderText,
|
||||
font: body.font,
|
||||
size: pt(body.size),
|
||||
color: formatColor(resolveColorToHex(body.color) || '666666'),
|
||||
italics: true,
|
||||
})],
|
||||
spacing: { before: 120, after: 120 },
|
||||
}));
|
||||
const src = el.getAttribute('src');
|
||||
const cached = src ? imageCache.get(src) : null;
|
||||
|
||||
if (cached) {
|
||||
const imgRun = createInlineImageRun(src);
|
||||
if (imgRun) {
|
||||
results.push(new Paragraph({
|
||||
children: [imgRun],
|
||||
spacing: { before: 120, after: 120 },
|
||||
}));
|
||||
}
|
||||
} else {
|
||||
const alt = el.getAttribute('alt') || '';
|
||||
const placeholderText = alt ? `[Image: ${alt}]` : '[Image]';
|
||||
results.push(new Paragraph({
|
||||
children: [new TextRun({
|
||||
text: placeholderText,
|
||||
font: body.font,
|
||||
size: pt(body.size),
|
||||
color: formatColor(resolveColorToHex(body.color) || '666666'),
|
||||
italics: true,
|
||||
})],
|
||||
spacing: { before: 120, after: 120 },
|
||||
}));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
38
src/utils/htmlToMarkdown.ts
Normal file
38
src/utils/htmlToMarkdown.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import TurndownService from 'turndown';
|
||||
|
||||
export function htmlToMarkdown(html: string): string {
|
||||
const turndown = new TurndownService({
|
||||
headingStyle: 'atx',
|
||||
hr: '---',
|
||||
bulletListMarker: '-',
|
||||
codeBlockStyle: 'fenced',
|
||||
emDelimiter: '*',
|
||||
strongDelimiter: '**',
|
||||
});
|
||||
|
||||
// Strip meaningless wrapper spans (Blogger, Google Docs, Word paste, etc.)
|
||||
turndown.addRule('stripDecorativeSpans', {
|
||||
filter: (node) => {
|
||||
if (node.nodeName !== 'SPAN') return false;
|
||||
const style = node.getAttribute('style') || '';
|
||||
if (!style) return true;
|
||||
const meaningless = /font-family:\s*inherit|font-size:\s*(medium|inherit)|font-weight:\s*normal|color:\s*(black|inherit)/i;
|
||||
const props = style.split(';').map(p => p.trim()).filter(Boolean);
|
||||
return props.length > 0 && props.every(p => meaningless.test(p));
|
||||
},
|
||||
replacement: (content) => content,
|
||||
});
|
||||
|
||||
let markdown = turndown.turndown(html);
|
||||
|
||||
// Clean up excessive blank lines
|
||||
markdown = markdown.replace(/\n{3,}/g, '\n\n');
|
||||
// Convert non-breaking spaces to entities (NOT regular spaces)
|
||||
// Regular spaces would trigger markdown code block detection at 4+ indent
|
||||
// entities pass through Marked.js as HTML and render as visible spaces
|
||||
markdown = markdown.replace(/\u00A0/g, ' ');
|
||||
// Clean up trailing whitespace on lines
|
||||
markdown = markdown.replace(/[ \t]+$/gm, '');
|
||||
|
||||
return markdown.trim();
|
||||
}
|
||||
Reference in New Issue
Block a user