diff --git a/modules/key-card/package.json b/modules/key-card/package.json index 5eca6ab043..80d29e5785 100644 --- a/modules/key-card/package.json +++ b/modules/key-card/package.json @@ -37,6 +37,7 @@ "@bitgo/sdk-core": "^37.3.0", "@bitgo/statics": "^58.43.0", "jspdf": ">=4.2.0", + "pdfjs-dist": "^4.0.0", "qrcode": "^1.5.1" }, "devDependencies": { diff --git a/modules/key-card/src/extractKeycardFromPDF.ts b/modules/key-card/src/extractKeycardFromPDF.ts new file mode 100644 index 0000000000..867c5c2371 --- /dev/null +++ b/modules/key-card/src/extractKeycardFromPDF.ts @@ -0,0 +1,50 @@ +import { buildLinesFromPDFNodes, KeycardEntry, parseKeycardFromLines, PDFTextNode } from './parseKeycard'; + +/** + * Extracts keycard entries from a PDF file (browser only). + * + * Before calling this function, configure the pdfjs worker: + * import { GlobalWorkerOptions } from 'pdfjs-dist'; + * GlobalWorkerOptions.workerSrc = ''; + * + * pdfjs-dist is loaded via dynamic import so this module can be safely + * imported in Node.js environments without triggering browser-only globals. + */ +export async function extractKeycardEntriesFromPDF(file: File): Promise<{ + lines: string[]; + entries: KeycardEntry[]; +}> { + const pdfjsLib = await import('pdfjs-dist'); + const arrayBuffer = await file.arrayBuffer(); + const loadingTask = pdfjsLib.getDocument({ data: arrayBuffer }); + const pdfDocument = await loadingTask.promise; + const nodes: PDFTextNode[] = []; + + for (let pageNumber = 1; pageNumber <= pdfDocument.numPages; pageNumber++) { + const page = await pdfDocument.getPage(pageNumber); + const textContent = await page.getTextContent(); + + for (const item of textContent.items) { + if (!('str' in item) || !Array.isArray(item.transform)) { + continue; + } + + const text = item.str.replace(/\s+/g, ' ').trim(); + if (!text) { + continue; + } + + const x = Number(item.transform[4] ?? 0); + const y = Number(item.transform[5] ?? 0); + const width = 'width' in item ? Number(item.width ?? 0) : 0; + + nodes.push({ text, x, y, page: pageNumber, width }); + } + } + + const lines = buildLinesFromPDFNodes(nodes); + return { + lines, + entries: parseKeycardFromLines(lines), + }; +} diff --git a/modules/key-card/src/index.ts b/modules/key-card/src/index.ts index f01d5d7afe..c4b627a96b 100644 --- a/modules/key-card/src/index.ts +++ b/modules/key-card/src/index.ts @@ -5,8 +5,10 @@ import { generateParamsForKeyCreation } from './generateParamsForKeyCreation'; import { GenerateKeycardParams, GenerateLightningQrDataParams, GenerateQrDataBaseParams } from './types'; export * from './drawKeycard'; +export * from './extractKeycardFromPDF'; export * from './faq'; export * from './generateQrData'; +export * from './parseKeycard'; export * from './utils'; export * from './types'; diff --git a/modules/key-card/src/parseKeycard.ts b/modules/key-card/src/parseKeycard.ts new file mode 100644 index 0000000000..83e9dd0c3e --- /dev/null +++ b/modules/key-card/src/parseKeycard.ts @@ -0,0 +1,196 @@ +export type PDFTextNode = { + text: string; + x: number; + y: number; + page: number; + width: number; +}; + +export type KeycardEntry = { + label: string; + value: string; +}; + +const sectionHeaderRegex = /^([A-D])\s*[:.)-]\s*(.+?)\s*$/i; +const dataLineRegex = /^data\s*:\s*(.*)$/i; +const faqHeaderRegex = /^BitGo\s+KeyCard\s+FAQ$/i; + +// PDF coordinate tolerance in points. Nodes within this distance on the Y-axis +// are treated as belonging to the same line; nodes further apart are separate lines. +const PDF_LINE_Y_TOLERANCE = 2; +// Horizontal gap in points above which a space is inserted between adjacent nodes. +const PDF_NODE_GAP_THRESHOLD = 2; + +function sanitizeText(input: string): string { + return input.replace(/\s+/g, ' ').trim(); +} + +function normalizeSectionValue(rawValue: string): string { + // Two-pass removal of "Part N" page-continuation labels: + // 1. Line filter: removes labels that appear as standalone lines. + // 2. Regex replace: removes labels embedded mid-line when + // buildLinesFromPDFNodes merges them with adjacent content at the same + // y-coordinate (e.g. "...X88bPart 2 lFPMd..."). + // join('') intentionally uses no separator — section values are continuous + // strings (base64 / xpub) that wrap across PDF lines without spaces. + return rawValue + .split('\n') + .filter((line) => !/^Part\s+\d+$/i.test(line.trim())) + .join('') + .replace(/\s*Part\s+\d+\s*/gi, '') + .trim(); +} + +function countChar(input: string, char: string): number { + return input.split(char).length - 1; +} + +function isEncryptedWalletPasswordSectionTitle(title: string): boolean { + return title.toLowerCase().includes('encrypted wallet password'); +} + +/** + * Reconstructs logical text lines from an unordered set of PDF text nodes. + * + * PDF text extraction returns individual positioned fragments. This function + * sorts them by page then Y-coordinate (top-to-bottom), groups fragments + * within PDF_LINE_Y_TOLERANCE points of each other onto the same line, and + * inserts a space between fragments that are separated by more than + * PDF_NODE_GAP_THRESHOLD points horizontally. + */ +export function buildLinesFromPDFNodes(nodes: PDFTextNode[]): string[] { + const sortedNodes = [...nodes].sort((a, b) => { + if (a.page !== b.page) { + return a.page - b.page; + } + const yDiff = Math.abs(a.y - b.y); + if (yDiff > PDF_LINE_Y_TOLERANCE) { + return b.y - a.y; + } + return a.x - b.x; + }); + + const lines: string[] = []; + let currentLineNodes: PDFTextNode[] = []; + let currentPage = -1; + let currentY = Number.NaN; + + function flushLine() { + if (currentLineNodes.length === 0) { + return; + } + + const sortedLineNodes = [...currentLineNodes].sort((a, b) => a.x - b.x); + let line = ''; + let previousRightEdge: number | null = null; + for (const node of sortedLineNodes) { + const piece = sanitizeText(node.text); + if (!piece) { + continue; + } + + if (previousRightEdge !== null && node.x - previousRightEdge > PDF_NODE_GAP_THRESHOLD) { + line += ' '; + } + line += piece; + previousRightEdge = node.x + node.width; + } + + const normalizedLine = line.trim(); + if (normalizedLine) { + lines.push(normalizedLine); + } + } + + for (const node of sortedNodes) { + const pageChanged = node.page !== currentPage; + const lineChanged = Number.isNaN(currentY) || Math.abs(node.y - currentY) > PDF_LINE_Y_TOLERANCE; + if (pageChanged || lineChanged) { + flushLine(); + currentLineNodes = [node]; + currentPage = node.page; + currentY = node.y; + continue; + } + + currentLineNodes.push(node); + } + + flushLine(); + return lines; +} + +export function parseKeycardFromLines(lines: string[]): KeycardEntry[] { + const sections: Array<{ + section: string; + title: string; + values: string[]; + isCapturingData: boolean; + openCurlyCount: number; + }> = []; + let currentSectionIndex = -1; + + for (const line of lines) { + const labelMatch = line.match(sectionHeaderRegex); + if (labelMatch) { + const section = labelMatch[1]?.toUpperCase(); + const title = sanitizeText(labelMatch[2] ?? ''); + if (section && title) { + sections.push({ + section, + title, + values: [], + isCapturingData: false, + openCurlyCount: 0, + }); + currentSectionIndex = sections.length - 1; + continue; + } + } + + if (currentSectionIndex < 0) { + continue; + } + + const currentSection = sections[currentSectionIndex]; + if (!currentSection) { + continue; + } + + const dataLineMatch = line.match(dataLineRegex); + if (dataLineMatch) { + currentSection.isCapturingData = true; + const inlineValue = sanitizeText(dataLineMatch[1] ?? ''); + if (inlineValue) { + currentSection.values.push(inlineValue); + currentSection.openCurlyCount += countChar(inlineValue, '{') - countChar(inlineValue, '}'); + } + continue; + } + + if (currentSection.isCapturingData) { + if (faqHeaderRegex.test(line)) { + currentSection.isCapturingData = false; + continue; + } + + currentSection.values.push(line); + + // For encrypted wallet password, data is a single JSON object. Stop as + // soon as the object closes so footer/FAQ content is not appended. + if (isEncryptedWalletPasswordSectionTitle(currentSection.title)) { + currentSection.openCurlyCount += countChar(line, '{') - countChar(line, '}'); + if (currentSection.values.length > 0 && currentSection.openCurlyCount <= 0) { + currentSection.isCapturingData = false; + } + } + } + } + + return sections + .filter(({ section, values }) => ['A', 'B', 'C', 'D'].includes(section) && values.length > 0) + .map(({ section, title, values }) => ({ + label: `${section}: ${title}`, + value: normalizeSectionValue(values.join('\n')), + })); +} diff --git a/modules/key-card/test/unit/parseKeycard.test.ts b/modules/key-card/test/unit/parseKeycard.test.ts new file mode 100644 index 0000000000..2cceb6a8d5 --- /dev/null +++ b/modules/key-card/test/unit/parseKeycard.test.ts @@ -0,0 +1,88 @@ +import * as assert from 'assert'; +import { parseKeycardFromLines } from '../../src/parseKeycard'; + +describe('parseKeycardFromLines', function () { + it('happy path – clean JSON in one line', function () { + const encryptedJson = + '{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"AAABBBCCC"}'; + const lines = [ + 'A: Box A – User Key', + 'data: xpub661MyMwAqRbcF...', + 'B: Box B – Backup Key', + 'data: xpub661MyMwAqRbcG...', + 'C: Box C – BitGo Key', + 'data: xpub661MyMwAqRbcH...', + 'D: Box D – Encrypted Wallet Password', + `data: ${encryptedJson}`, + ]; + + const entries = parseKeycardFromLines(lines); + const sectionD = entries.find((e) => e.label.startsWith('D:')); + assert.ok(sectionD, 'section D should be present'); + assert.strictEqual(sectionD.value, encryptedJson); + }); + + it('Part N on its own line – strips standalone label', function () { + const part1 = + '{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"AAABBB'; + const part2 = 'CCCDDDEEE"}'; + const lines = [ + 'A: Box A – User Key', + 'data: xpub661MyMwAqRbcF...', + 'B: Box B – Backup Key', + 'data: xpub661MyMwAqRbcG...', + 'C: Box C – BitGo Key', + 'data: xpub661MyMwAqRbcH...', + 'D: Box D – Encrypted Wallet Password', + `data: ${part1}`, + 'Part 2', + part2, + ]; + + const entries = parseKeycardFromLines(lines); + const sectionD = entries.find((e) => e.label.startsWith('D:')); + assert.ok(sectionD, 'section D should be present'); + assert.strictEqual(sectionD.value, `${part1}${part2}`); + }); + + it('multiple embedded Part N labels – strips all page-break labels across a long ct value', function () { + const seg1 = 'AAABBBCCC'; + const seg2 = 'DDDEEEFFF'; + const seg3 = 'GGGHHH'; + const mergedLine = `{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"${seg1}Part 2 ${seg2}Part 3 ${seg3}"}`; + const lines = [ + 'A: Box A – User Key', + 'data: xpub661MyMwAqRbcF...', + 'D: Box D – Encrypted Wallet Password', + `data: ${mergedLine}`, + ]; + + const entries = parseKeycardFromLines(lines); + const sectionD = entries.find((e) => e.label.startsWith('D:')); + assert.ok(sectionD, 'section D should be present'); + const expected = `{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"${seg1}${seg2}${seg3}"}`; + assert.strictEqual(sectionD.value, expected); + }); + + it('Part N embedded mid-line – strips label fused into base64 content', function () { + const ctPrefix = 'AAABBBCCC'; + const ctSuffix = 'DDDEEEFFF'; + const mergedLine = `{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"${ctPrefix}Part 2 ${ctSuffix}"}`; + const lines = [ + 'A: Box A – User Key', + 'data: xpub661MyMwAqRbcF...', + 'B: Box B – Backup Key', + 'data: xpub661MyMwAqRbcG...', + 'C: Box C – BitGo Key', + 'data: xpub661MyMwAqRbcH...', + 'D: Box D – Encrypted Wallet Password', + `data: ${mergedLine}`, + ]; + + const entries = parseKeycardFromLines(lines); + const sectionD = entries.find((e) => e.label.startsWith('D:')); + assert.ok(sectionD, 'section D should be present'); + const expected = `{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"${ctPrefix}${ctSuffix}"}`; + assert.strictEqual(sectionD.value, expected); + }); +}); diff --git a/modules/web-demo/package.json b/modules/web-demo/package.json index 6ccbe77cb8..c401445b6e 100644 --- a/modules/web-demo/package.json +++ b/modules/web-demo/package.json @@ -68,6 +68,7 @@ "@bitgo/statics": "^58.43.0", "bitgo": "^51.2.0", "lodash": "^4.18.0", + "pdfjs-dist": "^4.0.0", "react": "^18.0.0", "react-dom": "^18.0.0", "react-json-view": "^1.21.3", diff --git a/modules/web-demo/src/components/KeyCard/index.tsx b/modules/web-demo/src/components/KeyCard/index.tsx index 21f42ac0a7..d8b480bbb4 100644 --- a/modules/web-demo/src/components/KeyCard/index.tsx +++ b/modules/web-demo/src/components/KeyCard/index.tsx @@ -1,4 +1,6 @@ -import React from 'react'; +import React, { useRef, useState } from 'react'; +import { GlobalWorkerOptions } from 'pdfjs-dist'; +import { extractKeycardEntriesFromPDF, KeycardEntry } from '@bitgo/key-card'; import { downloadKeycardForHotEthTSSWallet, downloadKeycardForHotLtcWallet, @@ -8,7 +10,43 @@ import { downloadKeycardForDKLsTSS, } from '@components/KeyCard/fixtures'; +// Configure pdfjs worker for webpack (must be set before calling extractKeycardEntriesFromPDF) +GlobalWorkerOptions.workerSrc = new URL( + 'pdfjs-dist/build/pdf.worker.min.mjs', + import.meta.url, +).toString(); + const KeyCard = () => { + const fileInputRef = useRef(null); + const [isLoading, setIsLoading] = useState(false); + const [error, setError] = useState(null); + const [entries, setEntries] = useState(null); + + const handleFileChange = async (e: React.ChangeEvent) => { + const file = e.target.files?.[0]; + if (!file) return; + + setIsLoading(true); + setError(null); + setEntries(null); + + try { + const result = await extractKeycardEntriesFromPDF(file); + setEntries(result.entries); + if (result.entries.length === 0) { + setError('No keycard sections (A–D) found in this PDF.'); + } + } catch (err) { + setError(err instanceof Error ? err.message : String(err)); + } finally { + setIsLoading(false); + // Reset so the same file can be re-uploaded + if (fileInputRef.current) { + fileInputRef.current.value = ''; + } + } + }; + return (

Key Card

@@ -36,6 +74,77 @@ const KeyCard = () => { > Download for Self Managed Cold Eddsa Key with Derived Keys + +
+

Parse Keycard from PDF

+

+ Upload a BitGo keycard PDF to extract and inspect its sections (A–D). +

+ + {isLoading &&

Parsing PDF…

} + {error &&

Error: {error}

} + {entries && entries.length > 0 && ( + + + + + + + + + {entries.map((entry) => ( + + + + + ))} + +
+ Section + + Value +
+ {entry.label} + + {entry.value} +
+ )}
); }; diff --git a/modules/web-demo/webpack/dev.config.js b/modules/web-demo/webpack/dev.config.js index ee88c42d7c..65205b096d 100644 --- a/modules/web-demo/webpack/dev.config.js +++ b/modules/web-demo/webpack/dev.config.js @@ -27,7 +27,7 @@ module.exports = (env, options) => { rules: devRules, }, resolve: { - extensions: ['.tsx', '.ts', '.js'], + extensions: ['.tsx', '.ts', '.js', '.mjs'], alias: aliasItems, fallback: resolveFallback, }, diff --git a/modules/web-demo/webpack/prod.config.js b/modules/web-demo/webpack/prod.config.js index c935af66f0..7f91d60d3a 100644 --- a/modules/web-demo/webpack/prod.config.js +++ b/modules/web-demo/webpack/prod.config.js @@ -23,7 +23,7 @@ module.exports = (env, options) => { rules: prodRules, }, resolve: { - extensions: ['.tsx', '.ts', '.js'], + extensions: ['.tsx', '.ts', '.js', '.mjs'], alias: aliasItems, fallback: resolveFallback, },