BitGo · mohammadalfaiyazbitgo · Jun 5, 2026 · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026
@@ -37,6 +37,7 @@
     "@bitgo/sdk-core": "^37.3.0",
     "@bitgo/statics": "^58.43.0",
     "jspdf": ">=4.2.0",
+    "pdfjs-dist": "^4.0.0",
     "qrcode": "^1.5.1"
   },
   "devDependencies": {

@@ -0,0 +1,50 @@
+import { buildLinesFromPDFNodes, KeycardEntry, parseKeycardFromLines, PDFTextNode } from './parseKeycard';
+
+/**
+ * Extracts keycard entries from a PDF file (browser only).
+ *
+ * Before calling this function, configure the pdfjs worker:
+ *   import { GlobalWorkerOptions } from 'pdfjs-dist';
+ *   GlobalWorkerOptions.workerSrc = '<url to pdf.worker.min.js>';
+ *
+ * pdfjs-dist is loaded via dynamic import so this module can be safely
+ * imported in Node.js environments without triggering browser-only globals.
+ */
+export async function extractKeycardEntriesFromPDF(file: File): Promise<{
+  lines: string[];
+  entries: KeycardEntry[];
+}> {
+  const pdfjsLib = await import('pdfjs-dist');
+  const arrayBuffer = await file.arrayBuffer();
+  const loadingTask = pdfjsLib.getDocument({ data: arrayBuffer });
+  const pdfDocument = await loadingTask.promise;
+  const nodes: PDFTextNode[] = [];
+
+  for (let pageNumber = 1; pageNumber <= pdfDocument.numPages; pageNumber++) {
+    const page = await pdfDocument.getPage(pageNumber);
+    const textContent = await page.getTextContent();
+
+    for (const item of textContent.items) {
+      if (!('str' in item) || !Array.isArray(item.transform)) {
+        continue;
+      }
+
+      const text = item.str.replace(/\s+/g, ' ').trim();
+      if (!text) {
+        continue;
+      }
+
+      const x = Number(item.transform[4] ?? 0);
+      const y = Number(item.transform[5] ?? 0);
+      const width = 'width' in item ? Number(item.width ?? 0) : 0;
+
+      nodes.push({ text, x, y, page: pageNumber, width });
+    }
+  }
+
+  const lines = buildLinesFromPDFNodes(nodes);
+  return {
+    lines,
+    entries: parseKeycardFromLines(lines),
+  };
+}
@@ -5,8 +5,10 @@ import { generateParamsForKeyCreation } from './generateParamsForKeyCreation';
 import { GenerateKeycardParams, GenerateLightningQrDataParams, GenerateQrDataBaseParams } from './types';
 
 export * from './drawKeycard';
+export * from './extractKeycardFromPDF';
 export * from './faq';
 export * from './generateQrData';
+export * from './parseKeycard';
 export * from './utils';
 export * from './types';
 

@@ -0,0 +1,196 @@
+export type PDFTextNode = {
+  text: string;
+  x: number;
+  y: number;
+  page: number;
+  width: number;
+};
+
+export type KeycardEntry = {
+  label: string;
+  value: string;
+};
+
+const sectionHeaderRegex = /^([A-D])\s*[:.)-]\s*(.+?)\s*$/i;
+const dataLineRegex = /^data\s*:\s*(.*)$/i;
+const faqHeaderRegex = /^BitGo\s+KeyCard\s+FAQ$/i;
+
+// PDF coordinate tolerance in points. Nodes within this distance on the Y-axis
+// are treated as belonging to the same line; nodes further apart are separate lines.
+const PDF_LINE_Y_TOLERANCE = 2;
+// Horizontal gap in points above which a space is inserted between adjacent nodes.
+const PDF_NODE_GAP_THRESHOLD = 2;
+
+function sanitizeText(input: string): string {
+  return input.replace(/\s+/g, ' ').trim();
+}
+
+function normalizeSectionValue(rawValue: string): string {
+  // Two-pass removal of "Part N" page-continuation labels:
+  // 1. Line filter: removes labels that appear as standalone lines.
+  // 2. Regex replace: removes labels embedded mid-line when
+  //    buildLinesFromPDFNodes merges them with adjacent content at the same
+  //    y-coordinate (e.g. "...X88bPart 2 lFPMd...").
+  // join('') intentionally uses no separator — section values are continuous
+  // strings (base64 / xpub) that wrap across PDF lines without spaces.
+  return rawValue
+    .split('\n')
+    .filter((line) => !/^Part\s+\d+$/i.test(line.trim()))
+    .join('')
+    .replace(/\s*Part\s+\d+\s*/gi, '')
+    .trim();
+}
+
+function countChar(input: string, char: string): number {
+  return input.split(char).length - 1;
+}
+
+function isEncryptedWalletPasswordSectionTitle(title: string): boolean {
+  return title.toLowerCase().includes('encrypted wallet password');
+}
+
+/**
+ * Reconstructs logical text lines from an unordered set of PDF text nodes.
+ *
+ * PDF text extraction returns individual positioned fragments. This function
+ * sorts them by page then Y-coordinate (top-to-bottom), groups fragments
+ * within PDF_LINE_Y_TOLERANCE points of each other onto the same line, and
+ * inserts a space between fragments that are separated by more than
+ * PDF_NODE_GAP_THRESHOLD points horizontally.
+ */
+export function buildLinesFromPDFNodes(nodes: PDFTextNode[]): string[] {
+  const sortedNodes = [...nodes].sort((a, b) => {
+    if (a.page !== b.page) {
+      return a.page - b.page;
+    }
+    const yDiff = Math.abs(a.y - b.y);
+    if (yDiff > PDF_LINE_Y_TOLERANCE) {
+      return b.y - a.y;
+    }
+    return a.x - b.x;
+  });
+
+  const lines: string[] = [];
+  let currentLineNodes: PDFTextNode[] = [];
+  let currentPage = -1;
+  let currentY = Number.NaN;
+
+  function flushLine() {
+    if (currentLineNodes.length === 0) {
+      return;
+    }
+
+    const sortedLineNodes = [...currentLineNodes].sort((a, b) => a.x - b.x);
+    let line = '';
+    let previousRightEdge: number | null = null;
+    for (const node of sortedLineNodes) {
+      const piece = sanitizeText(node.text);
+      if (!piece) {
+        continue;
+      }
+
+      if (previousRightEdge !== null && node.x - previousRightEdge > PDF_NODE_GAP_THRESHOLD) {
+        line += ' ';
+      }
+      line += piece;
+      previousRightEdge = node.x + node.width;
+    }
+
+    const normalizedLine = line.trim();
+    if (normalizedLine) {
+      lines.push(normalizedLine);
+    }
+  }
+
+  for (const node of sortedNodes) {
+    const pageChanged = node.page !== currentPage;
+    const lineChanged = Number.isNaN(currentY) || Math.abs(node.y - currentY) > PDF_LINE_Y_TOLERANCE;
+    if (pageChanged || lineChanged) {
+      flushLine();
+      currentLineNodes = [node];
+      currentPage = node.page;
+      currentY = node.y;
+      continue;
+    }
+
+    currentLineNodes.push(node);
+  }
+
+  flushLine();
+  return lines;
+}
+
+export function parseKeycardFromLines(lines: string[]): KeycardEntry[] {
+  const sections: Array<{
+    section: string;
+    title: string;
+    values: string[];
+    isCapturingData: boolean;
+    openCurlyCount: number;
+  }> = [];
+  let currentSectionIndex = -1;
+
+  for (const line of lines) {
+    const labelMatch = line.match(sectionHeaderRegex);
+    if (labelMatch) {
+      const section = labelMatch[1]?.toUpperCase();
+      const title = sanitizeText(labelMatch[2] ?? '');
+      if (section && title) {
+        sections.push({
+          section,
+          title,
+          values: [],
+          isCapturingData: false,
+          openCurlyCount: 0,
+        });
+        currentSectionIndex = sections.length - 1;
+        continue;
+      }
+    }
+
+    if (currentSectionIndex < 0) {
+      continue;
+    }
+
+    const currentSection = sections[currentSectionIndex];
+    if (!currentSection) {
+      continue;
+    }
+
+    const dataLineMatch = line.match(dataLineRegex);
+    if (dataLineMatch) {
+      currentSection.isCapturingData = true;
+      const inlineValue = sanitizeText(dataLineMatch[1] ?? '');
+      if (inlineValue) {
+        currentSection.values.push(inlineValue);
+        currentSection.openCurlyCount += countChar(inlineValue, '{') - countChar(inlineValue, '}');
+      }
+      continue;
+    }
+
+    if (currentSection.isCapturingData) {
+      if (faqHeaderRegex.test(line)) {
+        currentSection.isCapturingData = false;
+        continue;
+      }
+
+      currentSection.values.push(line);
+
+      // For encrypted wallet password, data is a single JSON object. Stop as
+      // soon as the object closes so footer/FAQ content is not appended.
+      if (isEncryptedWalletPasswordSectionTitle(currentSection.title)) {
+        currentSection.openCurlyCount += countChar(line, '{') - countChar(line, '}');
+        if (currentSection.values.length > 0 && currentSection.openCurlyCount <= 0) {
+          currentSection.isCapturingData = false;
+        }
+      }
+    }
+  }
+
+  return sections
+    .filter(({ section, values }) => ['A', 'B', 'C', 'D'].includes(section) && values.length > 0)
+    .map(({ section, title, values }) => ({
+      label: `${section}: ${title}`,
+      value: normalizeSectionValue(values.join('\n')),
+    }));
+}
@@ -0,0 +1,88 @@
+import * as assert from 'assert';
+import { parseKeycardFromLines } from '../../src/parseKeycard';
+
+describe('parseKeycardFromLines', function () {
+  it('happy path – clean JSON in one line', function () {
+    const encryptedJson =
+      '{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"AAABBBCCC"}';
+    const lines = [
+      'A: Box A – User Key',
+      'data: xpub661MyMwAqRbcF...',
+      'B: Box B – Backup Key',
+      'data: xpub661MyMwAqRbcG...',
+      'C: Box C – BitGo Key',
+      'data: xpub661MyMwAqRbcH...',
+      'D: Box D – Encrypted Wallet Password',
+      `data: ${encryptedJson}`,
+    ];
+
+    const entries = parseKeycardFromLines(lines);
+    const sectionD = entries.find((e) => e.label.startsWith('D:'));
+    assert.ok(sectionD, 'section D should be present');
+    assert.strictEqual(sectionD.value, encryptedJson);
+  });
+
+  it('Part N on its own line – strips standalone label', function () {
+    const part1 =
+      '{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"AAABBB';
+    const part2 = 'CCCDDDEEE"}';
+    const lines = [
+      'A: Box A – User Key',
+      'data: xpub661MyMwAqRbcF...',
+      'B: Box B – Backup Key',
+      'data: xpub661MyMwAqRbcG...',
+      'C: Box C – BitGo Key',
+      'data: xpub661MyMwAqRbcH...',
+      'D: Box D – Encrypted Wallet Password',
+      `data: ${part1}`,
+      'Part 2',
+      part2,
+    ];
+
+    const entries = parseKeycardFromLines(lines);
+    const sectionD = entries.find((e) => e.label.startsWith('D:'));
+    assert.ok(sectionD, 'section D should be present');
+    assert.strictEqual(sectionD.value, `${part1}${part2}`);
+  });
+
+  it('multiple embedded Part N labels – strips all page-break labels across a long ct value', function () {
+    const seg1 = 'AAABBBCCC';
+    const seg2 = 'DDDEEEFFF';
+    const seg3 = 'GGGHHH';
+    const mergedLine = `{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"${seg1}Part 2 ${seg2}Part 3 ${seg3}"}`;
+    const lines = [
+      'A: Box A – User Key',
+      'data: xpub661MyMwAqRbcF...',
+      'D: Box D – Encrypted Wallet Password',
+      `data: ${mergedLine}`,
+    ];
+
+    const entries = parseKeycardFromLines(lines);
+    const sectionD = entries.find((e) => e.label.startsWith('D:'));
+    assert.ok(sectionD, 'section D should be present');
+    const expected = `{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"${seg1}${seg2}${seg3}"}`;
+    assert.strictEqual(sectionD.value, expected);
+  });
+
+  it('Part N embedded mid-line – strips label fused into base64 content', function () {
+    const ctPrefix = 'AAABBBCCC';
+    const ctSuffix = 'DDDEEEFFF';
+    const mergedLine = `{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"${ctPrefix}Part 2 ${ctSuffix}"}`;
+    const lines = [
+      'A: Box A – User Key',
+      'data: xpub661MyMwAqRbcF...',
+      'B: Box B – Backup Key',
+      'data: xpub661MyMwAqRbcG...',
+      'C: Box C – BitGo Key',
+      'data: xpub661MyMwAqRbcH...',
+      'D: Box D – Encrypted Wallet Password',
+      `data: ${mergedLine}`,
+    ];
+
+    const entries = parseKeycardFromLines(lines);
+    const sectionD = entries.find((e) => e.label.startsWith('D:'));
+    assert.ok(sectionD, 'section D should be present');
+    const expected = `{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"${ctPrefix}${ctSuffix}"}`;
+    assert.strictEqual(sectionD.value, expected);
+  });
+});
@@ -68,6 +68,7 @@
     "@bitgo/statics": "^58.43.0",
     "bitgo": "^51.2.0",
     "lodash": "^4.18.0",
+    "pdfjs-dist": "^4.0.0",
     "react": "^18.0.0",
     "react-dom": "^18.0.0",
     "react-json-view": "^1.21.3",