Drop your PDF here
or click to choose a file
Extract text into clean editable HTML. Best for text-based PDFs — for scanned PDFs, run OCR first.
\n\n`; return html; } function groupIntoLines(items) { // PDF.js text items have transform[5] = y (bottom-left). Items on the same line have ~same y. const sorted = items.slice().sort((a, b) => { const dy = b.transform[5] - a.transform[5]; if (Math.abs(dy) > 2) return dy; // higher y first (top of page) return a.transform[4] - b.transform[4]; // then left to right }); const lines = []; let cur = []; let curY = null; for (const it of sorted) { const y = it.transform[5]; if (curY === null || Math.abs(curY - y) <= 2) { cur.push(it); curY = curY === null ? y : curY; } else { if (cur.length) lines.push(cur); cur = [it]; curY = y; } } if (cur.length) lines.push(cur); return lines; } // Positioned HTML: absolute-positioned divs, preserves layout closely. function buildPositionedHtml(pages) { const docTitle = state.fileName.replace(/\.pdf$/i, ''); let html = `\n\n\n \n ${escapeHtml(docTitle)}\n \n\n\n`; for (const p of pages) { const w = p.viewport.width; const h = p.viewport.height; html += `
\n`; for (const it of p.items) { if (!it.str || !it.str.trim()) continue; const tr = it.transform; const fontSize = Math.abs(tr[0]) || 12; const x = tr[4]; const y = h - tr[5]; // flip to top-left origin html += `
${escapeHtml(it.str)}
\n`; } html += `
\n`; } html += ` \n\n`; return html; } async function copyHtml() { if (!state.html) { toast('Convert first.', 'warning'); return; } try { await navigator.clipboard.writeText(state.html); toast('Copied to clipboard.', 'success'); } catch (_) { toast('Copy failed; select and copy manually.', 'warning'); } } function downloadHtml() { if (!state.html) { toast('Convert first.', 'warning'); return; } const blob = new Blob([state.html], { type: 'text/html' }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = state.fileName.replace(/\.pdf$/i, '') + '.html'; document.body.appendChild(a); a.click(); a.remove(); URL.revokeObjectURL(url); } function toast(msg, type = 'success', duration = 3500) { const el = document.createElement('div'); el.className = `toast ${type}`; el.textContent = msg; dom.toastContainer.appendChild(el); setTimeout(() => { el.classList.add('exiting'); el.addEventListener('animationend', () => el.remove()); }, duration); } })();