Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions modules/key-card/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"@bitgo/sdk-core": "^37.3.0",
"@bitgo/statics": "^58.43.0",
"jspdf": ">=4.2.0",
"pdfjs-dist": "^4.0.0",
"qrcode": "^1.5.1"
},
"devDependencies": {
Expand Down
50 changes: 50 additions & 0 deletions modules/key-card/src/extractKeycardFromPDF.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { buildLinesFromPDFNodes, KeycardEntry, parseKeycardFromLines, PDFTextNode } from './parseKeycard';

/**
* Extracts keycard entries from a PDF file (browser only).
*
* Before calling this function, configure the pdfjs worker:
* import { GlobalWorkerOptions } from 'pdfjs-dist';
* GlobalWorkerOptions.workerSrc = '<url to pdf.worker.min.js>';
*
* pdfjs-dist is loaded via dynamic import so this module can be safely
* imported in Node.js environments without triggering browser-only globals.
*/
export async function extractKeycardEntriesFromPDF(file: File): Promise<{
lines: string[];
entries: KeycardEntry[];
}> {
const pdfjsLib = await import('pdfjs-dist');
const arrayBuffer = await file.arrayBuffer();
const loadingTask = pdfjsLib.getDocument({ data: arrayBuffer });
const pdfDocument = await loadingTask.promise;
const nodes: PDFTextNode[] = [];

for (let pageNumber = 1; pageNumber <= pdfDocument.numPages; pageNumber++) {
const page = await pdfDocument.getPage(pageNumber);
const textContent = await page.getTextContent();

for (const item of textContent.items) {
if (!('str' in item) || !Array.isArray(item.transform)) {
continue;
}

const text = item.str.replace(/\s+/g, ' ').trim();
if (!text) {
continue;
}

const x = Number(item.transform[4] ?? 0);
const y = Number(item.transform[5] ?? 0);
const width = 'width' in item ? Number(item.width ?? 0) : 0;

nodes.push({ text, x, y, page: pageNumber, width });
}
}

const lines = buildLinesFromPDFNodes(nodes);
return {
lines,
entries: parseKeycardFromLines(lines),
};
}
2 changes: 2 additions & 0 deletions modules/key-card/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ import { generateParamsForKeyCreation } from './generateParamsForKeyCreation';
import { GenerateKeycardParams, GenerateLightningQrDataParams, GenerateQrDataBaseParams } from './types';

export * from './drawKeycard';
export * from './extractKeycardFromPDF';
export * from './faq';
export * from './generateQrData';
export * from './parseKeycard';
export * from './utils';
export * from './types';

Expand Down
196 changes: 196 additions & 0 deletions modules/key-card/src/parseKeycard.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
export type PDFTextNode = {
text: string;
x: number;
y: number;
page: number;
width: number;
};

export type KeycardEntry = {
label: string;
value: string;
};

const sectionHeaderRegex = /^([A-D])\s*[:.)-]\s*(.+?)\s*$/i;
const dataLineRegex = /^data\s*:\s*(.*)$/i;
const faqHeaderRegex = /^BitGo\s+KeyCard\s+FAQ$/i;

// PDF coordinate tolerance in points. Nodes within this distance on the Y-axis
// are treated as belonging to the same line; nodes further apart are separate lines.
const PDF_LINE_Y_TOLERANCE = 2;
// Horizontal gap in points above which a space is inserted between adjacent nodes.
const PDF_NODE_GAP_THRESHOLD = 2;

function sanitizeText(input: string): string {
return input.replace(/\s+/g, ' ').trim();
}

function normalizeSectionValue(rawValue: string): string {
// Two-pass removal of "Part N" page-continuation labels:
// 1. Line filter: removes labels that appear as standalone lines.
// 2. Regex replace: removes labels embedded mid-line when
// buildLinesFromPDFNodes merges them with adjacent content at the same
// y-coordinate (e.g. "...X88bPart 2 lFPMd...").
// join('') intentionally uses no separator — section values are continuous
// strings (base64 / xpub) that wrap across PDF lines without spaces.
return rawValue
.split('\n')
.filter((line) => !/^Part\s+\d+$/i.test(line.trim()))
.join('')
.replace(/\s*Part\s+\d+\s*/gi, '')
.trim();
}

function countChar(input: string, char: string): number {
return input.split(char).length - 1;
}

function isEncryptedWalletPasswordSectionTitle(title: string): boolean {
return title.toLowerCase().includes('encrypted wallet password');
}

/**
* Reconstructs logical text lines from an unordered set of PDF text nodes.
*
* PDF text extraction returns individual positioned fragments. This function
* sorts them by page then Y-coordinate (top-to-bottom), groups fragments
* within PDF_LINE_Y_TOLERANCE points of each other onto the same line, and
* inserts a space between fragments that are separated by more than
* PDF_NODE_GAP_THRESHOLD points horizontally.
*/
export function buildLinesFromPDFNodes(nodes: PDFTextNode[]): string[] {
const sortedNodes = [...nodes].sort((a, b) => {
if (a.page !== b.page) {
return a.page - b.page;
}
const yDiff = Math.abs(a.y - b.y);
if (yDiff > PDF_LINE_Y_TOLERANCE) {
return b.y - a.y;
}
return a.x - b.x;
});

const lines: string[] = [];
let currentLineNodes: PDFTextNode[] = [];
let currentPage = -1;
let currentY = Number.NaN;

function flushLine() {
if (currentLineNodes.length === 0) {
return;
}

const sortedLineNodes = [...currentLineNodes].sort((a, b) => a.x - b.x);
let line = '';
let previousRightEdge: number | null = null;
for (const node of sortedLineNodes) {
const piece = sanitizeText(node.text);
if (!piece) {
continue;
}

if (previousRightEdge !== null && node.x - previousRightEdge > PDF_NODE_GAP_THRESHOLD) {
line += ' ';
}
line += piece;
previousRightEdge = node.x + node.width;
}

const normalizedLine = line.trim();
if (normalizedLine) {
lines.push(normalizedLine);
}
}

for (const node of sortedNodes) {
const pageChanged = node.page !== currentPage;
const lineChanged = Number.isNaN(currentY) || Math.abs(node.y - currentY) > PDF_LINE_Y_TOLERANCE;
if (pageChanged || lineChanged) {
flushLine();
currentLineNodes = [node];
currentPage = node.page;
currentY = node.y;
continue;
}

currentLineNodes.push(node);
}

flushLine();
return lines;
}

export function parseKeycardFromLines(lines: string[]): KeycardEntry[] {
const sections: Array<{
section: string;
title: string;
values: string[];
isCapturingData: boolean;
openCurlyCount: number;
}> = [];
let currentSectionIndex = -1;

for (const line of lines) {
const labelMatch = line.match(sectionHeaderRegex);
if (labelMatch) {
const section = labelMatch[1]?.toUpperCase();
const title = sanitizeText(labelMatch[2] ?? '');
if (section && title) {
sections.push({
section,
title,
values: [],
isCapturingData: false,
openCurlyCount: 0,
});
currentSectionIndex = sections.length - 1;
continue;
}
}

if (currentSectionIndex < 0) {
continue;
}

const currentSection = sections[currentSectionIndex];
if (!currentSection) {
continue;
}

const dataLineMatch = line.match(dataLineRegex);
if (dataLineMatch) {
currentSection.isCapturingData = true;
const inlineValue = sanitizeText(dataLineMatch[1] ?? '');
if (inlineValue) {
currentSection.values.push(inlineValue);
currentSection.openCurlyCount += countChar(inlineValue, '{') - countChar(inlineValue, '}');
}
continue;
}

if (currentSection.isCapturingData) {
if (faqHeaderRegex.test(line)) {
currentSection.isCapturingData = false;
continue;
}

currentSection.values.push(line);

// For encrypted wallet password, data is a single JSON object. Stop as
// soon as the object closes so footer/FAQ content is not appended.
if (isEncryptedWalletPasswordSectionTitle(currentSection.title)) {
currentSection.openCurlyCount += countChar(line, '{') - countChar(line, '}');
if (currentSection.values.length > 0 && currentSection.openCurlyCount <= 0) {
currentSection.isCapturingData = false;
}
}
}
}

return sections
.filter(({ section, values }) => ['A', 'B', 'C', 'D'].includes(section) && values.length > 0)
.map(({ section, title, values }) => ({
label: `${section}: ${title}`,
value: normalizeSectionValue(values.join('\n')),
}));
}
88 changes: 88 additions & 0 deletions modules/key-card/test/unit/parseKeycard.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import * as assert from 'assert';
import { parseKeycardFromLines } from '../../src/parseKeycard';

describe('parseKeycardFromLines', function () {
it('happy path – clean JSON in one line', function () {
const encryptedJson =
'{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"AAABBBCCC"}';
Comment thread
mohammadalfaiyazbitgo marked this conversation as resolved.
const lines = [
'A: Box A – User Key',
'data: xpub661MyMwAqRbcF...',
'B: Box B – Backup Key',
'data: xpub661MyMwAqRbcG...',
'C: Box C – BitGo Key',
'data: xpub661MyMwAqRbcH...',
'D: Box D – Encrypted Wallet Password',
`data: ${encryptedJson}`,
];

const entries = parseKeycardFromLines(lines);
const sectionD = entries.find((e) => e.label.startsWith('D:'));
assert.ok(sectionD, 'section D should be present');
assert.strictEqual(sectionD.value, encryptedJson);
});

it('Part N on its own line – strips standalone label', function () {
const part1 =
'{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"AAABBB';
const part2 = 'CCCDDDEEE"}';
const lines = [
'A: Box A – User Key',
'data: xpub661MyMwAqRbcF...',
'B: Box B – Backup Key',
'data: xpub661MyMwAqRbcG...',
'C: Box C – BitGo Key',
'data: xpub661MyMwAqRbcH...',
'D: Box D – Encrypted Wallet Password',
`data: ${part1}`,
'Part 2',
part2,
];

const entries = parseKeycardFromLines(lines);
const sectionD = entries.find((e) => e.label.startsWith('D:'));
assert.ok(sectionD, 'section D should be present');
assert.strictEqual(sectionD.value, `${part1}${part2}`);
});

it('multiple embedded Part N labels – strips all page-break labels across a long ct value', function () {
const seg1 = 'AAABBBCCC';
const seg2 = 'DDDEEEFFF';
const seg3 = 'GGGHHH';
const mergedLine = `{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"${seg1}Part 2 ${seg2}Part 3 ${seg3}"}`;
const lines = [
'A: Box A – User Key',
'data: xpub661MyMwAqRbcF...',
'D: Box D – Encrypted Wallet Password',
`data: ${mergedLine}`,
];

const entries = parseKeycardFromLines(lines);
const sectionD = entries.find((e) => e.label.startsWith('D:'));
assert.ok(sectionD, 'section D should be present');
const expected = `{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"${seg1}${seg2}${seg3}"}`;
assert.strictEqual(sectionD.value, expected);
});

it('Part N embedded mid-line – strips label fused into base64 content', function () {
const ctPrefix = 'AAABBBCCC';
const ctSuffix = 'DDDEEEFFF';
const mergedLine = `{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"${ctPrefix}Part 2 ${ctSuffix}"}`;
const lines = [
'A: Box A – User Key',
'data: xpub661MyMwAqRbcF...',
'B: Box B – Backup Key',
'data: xpub661MyMwAqRbcG...',
'C: Box C – BitGo Key',
'data: xpub661MyMwAqRbcH...',
'D: Box D – Encrypted Wallet Password',
`data: ${mergedLine}`,
];

const entries = parseKeycardFromLines(lines);
const sectionD = entries.find((e) => e.label.startsWith('D:'));
assert.ok(sectionD, 'section D should be present');
const expected = `{"iv":"abc123","v":1,"iter":10000,"ks":256,"ts":64,"mode":"ccm","adata":"","cipher":"aes","salt":"xyz789","ct":"${ctPrefix}${ctSuffix}"}`;
assert.strictEqual(sectionD.value, expected);
});
});
1 change: 1 addition & 0 deletions modules/web-demo/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
"@bitgo/statics": "^58.43.0",
"bitgo": "^51.2.0",
"lodash": "^4.18.0",
"pdfjs-dist": "^4.0.0",
"react": "^18.0.0",
"react-dom": "^18.0.0",
"react-json-view": "^1.21.3",
Expand Down
Loading
Loading