Skip to content

Commit 6e03ce2

Browse files
committed
fix(file-parsers): fail closed on unverifiable ZIP-shaped OOXML archives
Address review: the guard previously no-opped (fell through to the decompression library) whenever the central directory could not be parsed, and findEocdOffset accepted the first backward EOCD signature without checking it sat at the buffer tail. A crafted archive with a decoy EOCD or an unsupported directory layout could bypass the size limits. - findEocdOffset now requires the EOCD comment length to place the record exactly at the buffer tail, defeating decoy signatures planted in the trailing region. - assertOoxmlArchiveWithinLimits now fails closed: a ZIP-shaped buffer (local file header / EOCD magic) whose central directory cannot be parsed is rejected rather than passed through. Genuine non-ZIP inputs (legacy OLE .xls/.doc, plaintext) still no-op and defer to the downstream parser.
1 parent 5dbe929 commit 6e03ce2

2 files changed

Lines changed: 72 additions & 6 deletions

File tree

apps/sim/lib/file-parsers/zip-guard.test.ts

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,19 @@ const HIGH_LIMITS: OoxmlSizeLimits = {
1515
ratioCheckFloorBytes: 1024 * 1024 * 1024,
1616
}
1717

18-
async function buildZip(entries: Record<string, string>): Promise<Buffer> {
18+
async function buildZip(
19+
entries: Record<string, string>,
20+
options: { comment?: string } = {}
21+
): Promise<Buffer> {
1922
const zip = new JSZip()
2023
for (const [name, content] of Object.entries(entries)) {
2124
zip.file(name, content)
2225
}
23-
return zip.generateAsync({ type: 'nodebuffer', compression: 'DEFLATE' })
26+
return zip.generateAsync({
27+
type: 'nodebuffer',
28+
compression: 'DEFLATE',
29+
comment: options.comment,
30+
})
2431
}
2532

2633
describe('assertOoxmlArchiveWithinLimits', () => {
@@ -76,6 +83,31 @@ describe('assertOoxmlArchiveWithinLimits', () => {
7683
).toThrow(ZipBombError)
7784
})
7885

86+
it('accepts a well-formed archive that carries a trailing comment', async () => {
87+
const buffer = await buildZip(
88+
{ 'word/document.xml': '<xml>hello</xml>' },
89+
{ comment: 'generated by test' }
90+
)
91+
expect(() => assertOoxmlArchiveWithinLimits(buffer, HIGH_LIMITS)).not.toThrow()
92+
})
93+
94+
it('fails closed for a ZIP-shaped buffer whose central directory is unparseable', () => {
95+
const buffer = Buffer.alloc(64)
96+
buffer.writeUInt32LE(0x04034b50, 0) // local file header signature, no valid EOCD
97+
expect(() => assertOoxmlArchiveWithinLimits(buffer)).toThrow(ZipBombError)
98+
})
99+
100+
it('rejects a decoy EOCD signature that does not validate against the buffer tail', async () => {
101+
const realZip = await buildZip({ 'xl/worksheets/sheet1.xml': 'A'.repeat(200_000) })
102+
// A decoy EOCD (zeroed central directory) appended after the genuine archive
103+
// would, without tail validation, redirect the guard to an empty directory
104+
// and undercount the real entries.
105+
const decoy = Buffer.alloc(64)
106+
decoy.writeUInt32LE(0x06054b50, 0)
107+
const tampered = Buffer.concat([realZip, decoy])
108+
expect(() => assertOoxmlArchiveWithinLimits(tampered)).toThrow(ZipBombError)
109+
})
110+
79111
it('no-ops for buffers that are not ZIP archives', () => {
80112
const plaintext = Buffer.from('this is just plain text, not a zip archive at all')
81113
expect(() => assertOoxmlArchiveWithinLimits(plaintext)).not.toThrow()

apps/sim/lib/file-parsers/zip-guard.ts

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ const logger = createLogger('ZipBombGuard')
1515
* compression ratio exceeds a safe threshold — without decompressing anything.
1616
*/
1717

18+
const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50
1819
const EOCD_SIGNATURE = 0x06054b50
1920
const ZIP64_EOCD_LOCATOR_SIGNATURE = 0x07064b50
2021
const ZIP64_EOCD_SIGNATURE = 0x06064b50
@@ -58,14 +59,36 @@ export class ZipBombError extends Error {
5859
}
5960
}
6061

62+
/**
63+
* Whether the buffer is shaped like a ZIP archive — i.e. begins with a local
64+
* file header (the leading signature of every non-empty ZIP, and thus every
65+
* OOXML document) or with the EOCD signature of an empty archive. Used to fail
66+
* closed: a ZIP-shaped buffer the guard cannot parse must be rejected rather
67+
* than handed to a decompression library.
68+
*/
69+
function isZipShaped(buffer: Buffer): boolean {
70+
if (buffer.length < 4) {
71+
return false
72+
}
73+
const signature = buffer.readUInt32LE(0)
74+
return signature === LOCAL_FILE_HEADER_SIGNATURE || signature === EOCD_SIGNATURE
75+
}
76+
6177
/**
6278
* Locate the End Of Central Directory record by scanning backwards from the end
63-
* of the buffer (it sits within the trailing 22 + comment bytes).
79+
* of the buffer (it sits within the trailing 22 + comment bytes). A candidate
80+
* is only accepted when its declared comment length places the record exactly
81+
* at the buffer tail, so a decoy EOCD signature planted in the comment region
82+
* cannot redirect the guard to a smaller, attacker-chosen central directory.
6483
*/
6584
function findEocdOffset(buffer: Buffer): number {
6685
const minStart = Math.max(0, buffer.length - EOCD_MIN_SIZE - MAX_EOCD_COMMENT_SIZE)
6786
for (let offset = buffer.length - EOCD_MIN_SIZE; offset >= minStart; offset--) {
68-
if (buffer.readUInt32LE(offset) === EOCD_SIGNATURE) {
87+
if (buffer.readUInt32LE(offset) !== EOCD_SIGNATURE) {
88+
continue
89+
}
90+
const commentLength = buffer.readUInt16LE(offset + 20)
91+
if (offset + EOCD_MIN_SIZE + commentLength === buffer.length) {
6992
return offset
7093
}
7194
}
@@ -198,15 +221,26 @@ function sumDeclaredUncompressedSize(buffer: Buffer, abortAboveBytes: number): n
198221
* Reject an OOXML archive whose declared expanded size or compression ratio
199222
* exceeds safe bounds, before any decompression library materializes it.
200223
*
201-
* No-op for inputs that are not parseable ZIP archives — those are handled by
202-
* the downstream parser's own validation and fallbacks.
224+
* Fails closed: a ZIP-shaped buffer whose central directory cannot be parsed is
225+
* rejected rather than passed through, so a malformed archive that a downstream
226+
* library still inflates cannot bypass the guard. Genuinely non-ZIP inputs
227+
* (legacy OLE `.xls`/`.doc`, misidentified plaintext) no-op and defer to the
228+
* downstream parser's own validation and fallbacks.
203229
*/
204230
export function assertOoxmlArchiveWithinLimits(
205231
buffer: Buffer,
206232
limits: OoxmlSizeLimits = DEFAULT_OOXML_SIZE_LIMITS
207233
): void {
208234
const totalUncompressed = sumDeclaredUncompressedSize(buffer, limits.maxTotalUncompressedBytes)
209235
if (totalUncompressed === null) {
236+
if (isZipShaped(buffer)) {
237+
logger.warn('Rejected ZIP-shaped archive: central directory could not be parsed', {
238+
compressedBytes: buffer.length,
239+
})
240+
throw new ZipBombError(
241+
'Unable to inspect ZIP central directory; refusing to parse an unverifiable ZIP-shaped archive'
242+
)
243+
}
210244
return
211245
}
212246

0 commit comments

Comments
 (0)