From 40af28f4d2ca80a2fc1d6d386a39638272e18a7e Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Tue, 2 Jun 2026 12:27:33 +0000 Subject: [PATCH 1/5] fix: bound DWARF .eh_frame_hdr parsing to the section --- ddprof-lib/src/main/cpp/dwarf.cpp | 69 ++++++++++++++++---- ddprof-lib/src/main/cpp/dwarf.h | 79 +++++++++++++++++------ ddprof-lib/src/main/cpp/symbols_linux.cpp | 3 +- ddprof-lib/src/test/cpp/dwarf_ut.cpp | 22 +++++++ ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp | 17 +++-- 5 files changed, 150 insertions(+), 40 deletions(-) diff --git a/ddprof-lib/src/main/cpp/dwarf.cpp b/ddprof-lib/src/main/cpp/dwarf.cpp index 15d092369..bf320eef1 100644 --- a/ddprof-lib/src/main/cpp/dwarf.cpp +++ b/ddprof-lib/src/main/cpp/dwarf.cpp @@ -96,6 +96,8 @@ FrameDesc FrameDesc::no_dwarf_frame = {0, DW_REG_INVALID, DW_REG_INVALID, DW_REG void DwarfParser::init(const char *name, const char *image_base) { _name = name; _image_base = image_base; + _section_start = NULL; + _section_end = reinterpret_cast(~(size_t)0); _capacity = 128; _count = 0; @@ -109,9 +111,10 @@ void DwarfParser::init(const char *name, const char *image_base) { } DwarfParser::DwarfParser(const char *name, const char *image_base, - const char *eh_frame_hdr) { + const char *eh_frame_hdr, size_t eh_frame_hdr_size, + EhFrameHdrTag) { init(name, image_base); - parse(eh_frame_hdr); + parse(eh_frame_hdr, eh_frame_hdr_size); } DwarfParser::DwarfParser(const char *name, const char *image_base, @@ -130,7 +133,16 @@ static constexpr u8 omit_sign_bit_mask_low(u8 value) { return value & 0x7; } -void DwarfParser::parse(const char *eh_frame_hdr) { +void DwarfParser::parse(const char *eh_frame_hdr, size_t size) { + // Fixed .eh_frame_hdr header: version (1) + 3 encoding bytes + eh_frame_ptr (4) + // + fde_count at offset 8 (4), binary-search table starting at offset 16. + // Refuse anything too small. Note: the table entries resolve into the adjacent + // .eh_frame section, not into .eh_frame_hdr itself, so we do NOT bound FDE + // reads to [eh_frame_hdr, eh_frame_hdr+size). + if (eh_frame_hdr == NULL || size < 16) { + return; + } + u8 version = eh_frame_hdr[0]; u8 eh_frame_ptr_enc = eh_frame_hdr[1]; u8 fde_count_enc = eh_frame_hdr[2]; @@ -149,6 +161,15 @@ void DwarfParser::parse(const char *eh_frame_hdr) { int fde_count = *(int *)(eh_frame_hdr + 8); int *table = (int *)(eh_frame_hdr + 16); + // Each table entry is a pair of 4-byte values (8 bytes) after the 16-byte + // header; this loop reads the first value of each pair. Reject a count that + // would make the index walk read past the section. A negative count cannot + // be valid either. + if (fde_count < 0 || (size_t)fde_count > (size - 16) / 8) { + Log::warn("Truncated or invalid .eh_frame_hdr (fde_count=%d, size=%lu) in %s", + fde_count, (unsigned long)size, _name); + return; + } for (int i = 0; i < fde_count; i++) { _ptr = eh_frame_hdr + table[i * 2]; parseFde(); @@ -161,10 +182,12 @@ void DwarfParser::parseEhFrame(const char *eh_frame, size_t size) { if (eh_frame == NULL || size < 4) { return; } - const char *section_end = eh_frame + size; + // Publish the read window so the get*/skip* helpers clamp to it. + _section_start = eh_frame; + _section_end = eh_frame + size; _ptr = eh_frame; - while (_ptr + 4 <= section_end) { + while (_ptr + 4 <= _section_end) { const char *record_start = _ptr; u32 length = get32(); if (length == 0) { @@ -174,7 +197,7 @@ void DwarfParser::parseEhFrame(const char *eh_frame, size_t size) { break; // 64-bit DWARF not supported } - if (length > (size_t)(section_end - record_start) - 4) { + if (length > (size_t)(_section_end - record_start) - 4) { break; } const char *record_end = record_start + 4 + length; @@ -250,7 +273,7 @@ void DwarfParser::parseCie() { const char *cie_start = _ptr; _ptr += 5; - while (*_ptr++) { + while (_ptr < _section_end && *_ptr++) { } _code_align = getLeb(); _data_align = getSLeb(); @@ -266,6 +289,9 @@ void DwarfParser::parseFde() { const char *fde_start = _ptr; u32 cie_offset = get32(); if (_count == 0) { + if (_section_start != NULL && cie_offset > (size_t)(fde_start - _section_start)) { + return; + } _ptr = fde_start - cie_offset; parseCie(); _ptr = fde_start + 4; @@ -274,12 +300,19 @@ void DwarfParser::parseFde() { u32 range_start = getPtr() - _image_base; u32 range_len = get32(); _ptr += getLeb(); + if (_ptr > _section_end) _ptr = _section_end; parseInstructions(range_start, fde_start + fde_len); addRecord(range_start + range_len, DW_REG_FP, LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE + DW_STACK_SLOT); } void DwarfParser::parseInstructions(u32 loc, const char *end) { + // `end` is derived from an untrusted record length; never let it run past + // the section. Reads inside the loop are clamped by the get*/skip* helpers, + // but clamping here keeps the loop bound honest and _ptr in range. + if (end > _section_end) { + end = _section_end; + } const u32 code_align = _code_align; const int data_align = _data_align; @@ -364,11 +397,13 @@ void DwarfParser::parseInstructions(u32 loc, const char *end) { cfa_reg = len == 11 ? DW_REG_PLT : DW_REG_INVALID; cfa_off = DW_STACK_SLOT; _ptr += len; + if (_ptr > _section_end) _ptr = _section_end; break; } case DW_CFA_expression: skipLeb(); _ptr += getLeb(); + if (_ptr > _section_end) _ptr = _section_end; break; case DW_CFA_offset_extended_sf: switch (getLeb()) { @@ -402,6 +437,7 @@ void DwarfParser::parseInstructions(u32 loc, const char *end) { } } else { _ptr += getLeb(); + if (_ptr > _section_end) _ptr = _section_end; } break; #ifdef __aarch64__ @@ -452,6 +488,9 @@ int DwarfParser::parseExpression() { u32 len = getLeb(); const char *end = _ptr + len; + if (end > _section_end) { + end = _section_end; + } while (_ptr < end) { u8 op = get8(); @@ -499,13 +538,15 @@ int DwarfParser::parseExpression() { void DwarfParser::addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off, int pc_off) { - // Sanity asserts to be able to pack those two values into one u32 vq - // Assert that the cfa_reg fits in 8 bits (0 to 255) - assert(cfa_reg <= 0xFF); - - // Assert that the cfa_off fits in a 24-bit signed range. - // Signed 24-bit integer range: -2^23 (-8,388,608) to 2^23 - 1 (8,388,607) - assert(cfa_off >= -8388608 && cfa_off <= 8388607); + // cfa_reg and cfa_off are packed into a single u32 (cfa_off << 8 | cfa_reg), + // so cfa_reg must fit in 8 bits (0..255) and cfa_off in a signed 24-bit range + // (-2^23 .. 2^23-1). Well-formed compiler-generated DWARF always satisfies + // this, but a malformed or corrupt .eh_frame from an untrusted ELF may not. + // Rather than pack a truncated (wrong) descriptor, drop the record: the stack + // walker treats a missing entry for a PC the same as any other unwind miss. + if (cfa_reg > 0xFF || cfa_off < -8388608 || cfa_off > 8388607) { + return; + } // cfa_reg and cfa_off can be encoded to a single 32 bit value, considering the existing and supported systems u32 cfa = static_cast(cfa_off) << 8 | static_cast(cfa_reg & 0xff); diff --git a/ddprof-lib/src/main/cpp/dwarf.h b/ddprof-lib/src/main/cpp/dwarf.h index 016ead404..cf37b3fa5 100644 --- a/ddprof-lib/src/main/cpp/dwarf.h +++ b/ddprof-lib/src/main/cpp/dwarf.h @@ -107,6 +107,13 @@ class DwarfParser { const char* _name; const char* _image_base; const char* _ptr; + // Read window [_section_start, _section_end). Set by parseEhFrame() to the + // actual .eh_frame bounds so every helper read is clamped to the section. + // When parsing via .eh_frame_hdr (parse()), FDEs live in the adjacent + // .eh_frame whose bounds are unknown here, so _section_start stays NULL and + // _section_end stays at max-pointer (unbounded mode — same as original). + const char* _section_start; + const char* _section_end; int _capacity; int _count; @@ -118,25 +125,41 @@ class DwarfParser { int _linked_frame_size; // detected from FP-based DWARF entries; -1 = undetected bool _has_z_augmentation; - const char* add(size_t size) { - const char* ptr = _ptr; - _ptr = ptr + size; - return ptr; + // True if `size` bytes can be read at _ptr without leaving the section. + // Guards against both over-reads (past _section_end) and under-reads + // (_ptr moved before _section_start by an untrusted offset). + bool canRead(size_t size) const { + return _ptr >= _section_start && _ptr <= _section_end && + size <= (size_t)(_section_end - _ptr); } u8 get8() { + if (!canRead(1)) { + _ptr = _section_end; + return 0; + } return *_ptr++; } u16 get16() { - const char* ptr = add(2); + if (!canRead(2)) { + _ptr = _section_end; + return 0; + } + const char* ptr = _ptr; + _ptr += 2; u16 result; memcpy(&result, ptr, sizeof(u16)); return result; } u32 get32() { - const char* ptr = add(4); + if (!canRead(4)) { + _ptr = _section_end; + return 0; + } + const char* ptr = _ptr; + _ptr += 4; u32 result; memcpy(&result, ptr, sizeof(u32)); return result; @@ -144,13 +167,14 @@ class DwarfParser { u32 getLeb() { u32 result = 0; - for (u32 shift = 0; ; shift += 7) { + for (u32 shift = 0; canRead(1) && shift < 32; shift += 7) { u8 b = *_ptr++; - result |= (b & 0x7f) << shift; + result |= (u32)(b & 0x7f) << shift; if ((b & 0x80) == 0) { - return result; + break; } } + return result; } u32 getLeb(const char* end) { @@ -167,26 +191,31 @@ class DwarfParser { int getSLeb() { int result = 0; - for (u32 shift = 0; ; shift += 7) { + for (u32 shift = 0; canRead(1) && shift < 32; shift += 7) { u8 b = *_ptr++; - result |= (b & 0x7f) << shift; + // Compute in unsigned to avoid signed left-shift overflow (UB) for + // large shift values; the result is reinterpreted as signed below. + result |= (int)((u32)(b & 0x7f) << shift); if ((b & 0x80) == 0) { if ((b & 0x40) != 0 && (shift += 7) < 32) { - result |= ~0U << shift; + result |= (int)(~0U << shift); } - return result; + break; } } + return result; } int getSLeb(const char* end) { int result = 0; - for (u32 shift = 0; _ptr < end; shift += 7) { + for (u32 shift = 0; _ptr < end && shift < 32; shift += 7) { u8 b = *_ptr++; - result |= (b & 0x7f) << shift; + // Compute in unsigned to avoid signed left-shift overflow (UB) for + // large shift values; the result is reinterpreted as signed below. + result |= (int)((u32)(b & 0x7f) << shift); if ((b & 0x80) == 0) { if ((b & 0x40) != 0 && (shift += 7) < 32) { - result |= ~0U << shift; + result |= (int)(~0U << shift); } return result; } @@ -195,19 +224,23 @@ class DwarfParser { } void skipLeb() { - while (*_ptr++ & 0x80) {} + while (canRead(1) && (*_ptr++ & 0x80)) {} } const char* getPtr() { const char* ptr = _ptr; - const char* offset_ptr = add(4); + if (!canRead(4)) { + _ptr = _section_end; + return ptr; + } int offset; - memcpy(&offset, offset_ptr, sizeof(int)); + memcpy(&offset, _ptr, sizeof(int)); + _ptr += 4; return ptr + offset; } void init(const char* name, const char* image_base); - void parse(const char* eh_frame_hdr); + void parse(const char* eh_frame_hdr, size_t size); void parseEhFrame(const char* eh_frame, size_t size); void parseCie(); void parseFde(); @@ -218,7 +251,11 @@ class DwarfParser { FrameDesc* addRecordRaw(u32 loc, int cfa, int fp_off, int pc_off); public: - DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr); + // Tag to disambiguate the .eh_frame_hdr (binary-search index) constructor + // from the raw .eh_frame constructor below: with a size added, the two + // would otherwise share a signature. + struct EhFrameHdrTag {}; + DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr, size_t eh_frame_hdr_size, EhFrameHdrTag); DwarfParser(const char* name, const char* image_base, const char* eh_frame, size_t eh_frame_size); // Ownership of the returned pointer transfers to the caller. diff --git a/ddprof-lib/src/main/cpp/symbols_linux.cpp b/ddprof-lib/src/main/cpp/symbols_linux.cpp index 3193cff54..f3a508838 100644 --- a/ddprof-lib/src/main/cpp/symbols_linux.cpp +++ b/ddprof-lib/src/main/cpp/symbols_linux.cpp @@ -600,7 +600,8 @@ void ElfParser::parseDwarfInfo() { // Parse per-PC frame descriptions and detect per-library default frame layout. // On aarch64 this distinguishes GCC (LINKED_FRAME_SIZE=0) from clang // (LINKED_FRAME_CLANG_SIZE=16) conventions for each shared library. - DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr)); + DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr), eh_frame_hdr->p_memsz, + DwarfParser::EhFrameHdrTag{}); _cc->setDwarfTable(dwarf.table(), dwarf.count(), dwarf.detectedDefaultFrame()); } else if (strcmp(_cc->name(), "[vdso]") == 0) { FrameDesc* table = (FrameDesc*)malloc(sizeof(FrameDesc)); diff --git a/ddprof-lib/src/test/cpp/dwarf_ut.cpp b/ddprof-lib/src/test/cpp/dwarf_ut.cpp index f6877e148..e60f5ae2e 100644 --- a/ddprof-lib/src/test/cpp/dwarf_ut.cpp +++ b/ddprof-lib/src/test/cpp/dwarf_ut.cpp @@ -190,4 +190,26 @@ TEST(DwarfEhFrame, FdeAugDataOverrun) { delete dwarf; } +// Regression test for the .eh_frame_hdr hardening (found by fuzz_dwarf). +// A hostile .eh_frame_hdr can claim a large fde_count while providing no +// binary-search table; pre-hardening, parse() walked `table[i*2]` off the end +// of the section. The bounded parser rejects a fde_count that cannot fit in the +// section. The header is a heap buffer sized to exactly 16 bytes (header only, +// no table entries), so ASan's redzone catches any over-read deterministically. +TEST(DwarfEhFrameHdr, FdeCountOverrun) { + std::vector hdr(16, 0); // 16-byte header; the table would start at 16 + hdr[0] = 1; // version + hdr[1] = 0x03; // eh_frame_ptr_enc = DW_EH_PE_udata4 + hdr[2] = 0x03; // fde_count_enc = DW_EH_PE_udata4 + hdr[3] = 0x33; // table_enc = DW_EH_PE_datarel | DW_EH_PE_udata4 + // fde_count at offset 8 (little-endian): claim 1024 entries that aren't there. + hdr[8] = 0x00; + hdr[9] = 0x04; + + const char* base = reinterpret_cast(hdr.data()); + DwarfParser dwarf("test", base, base, hdr.size(), DwarfParser::EhFrameHdrTag{}); + EXPECT_EQ(dwarf.count(), 0); // rejected: no records, no crash + free(dwarf.table()); +} + #endif // DWARF_SUPPORTED diff --git a/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp b/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp index 3606d8209..55659f6e7 100644 --- a/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp +++ b/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp @@ -19,6 +19,7 @@ #include #include +#include #include // Include the DWARF parser @@ -89,14 +90,22 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { // The DwarfParser constructor calls parse() internally, which is where // most of the interesting parsing happens. We wrap this in a try-catch // because the parser may throw on invalid data. + // + // DwarfParser has no destructor: it malloc()s its FrameDesc table in + // init() and transfers ownership to the caller via table() (see dwarf.h). + // Production callers hand that pointer to CodeCache::setDwarfTable(), which + // later free()s it. We must do the same here, otherwise the table leaks on + // every iteration. The pointer is hoisted out of the try so it is freed on + // the throw path too (init() allocates the table before parse() can throw). + FrameDesc *table = nullptr; try { - DwarfParser parser(name, image_base, eh_frame_hdr); - // If we get here, the parser successfully built an unwind table. - // The parser allocates a FrameDesc table internally; destructor - // will clean it up. + DwarfParser parser(name, image_base, eh_frame_hdr, size, + DwarfParser::EhFrameHdrTag{}); + table = parser.table(); } catch (...) { // Expected for malformed input - the parser may throw on invalid data } + free(table); // free(), not delete[], to match the malloc() in init() delete[] eh_frame_hdr; return 0; From b7dae001bc2be5deceea2b4802bd2815fd53afc8 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Tue, 2 Jun 2026 12:45:04 +0000 Subject: [PATCH 2/5] fix pr comment --- ddprof-lib/src/main/cpp/dwarf.cpp | 20 ++++++++++---------- ddprof-lib/src/main/cpp/dwarf.h | 2 ++ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/ddprof-lib/src/main/cpp/dwarf.cpp b/ddprof-lib/src/main/cpp/dwarf.cpp index bf320eef1..5c14fb52a 100644 --- a/ddprof-lib/src/main/cpp/dwarf.cpp +++ b/ddprof-lib/src/main/cpp/dwarf.cpp @@ -159,19 +159,18 @@ void DwarfParser::parse(const char *eh_frame_hdr, size_t size) { return; } - int fde_count = *(int *)(eh_frame_hdr + 8); - int *table = (int *)(eh_frame_hdr + 16); - // Each table entry is a pair of 4-byte values (8 bytes) after the 16-byte - // header; this loop reads the first value of each pair. Reject a count that - // would make the index walk read past the section. A negative count cannot - // be valid either. - if (fde_count < 0 || (size_t)fde_count > (size - 16) / 8) { - Log::warn("Truncated or invalid .eh_frame_hdr (fde_count=%d, size=%lu) in %s", + u32 fde_count = *(u32 *)(eh_frame_hdr + 8); + u32 *table = (u32 *)(eh_frame_hdr + 16); + // Each entry is a (initial_loc, fde_ptr) pair of 4-byte section-relative + // offsets (DW_EH_PE_datarel | DW_EH_PE_udata4). Reject a count that would + // make the table walk read past the section. + if (fde_count > (size - 16) / 8) { + Log::warn("Truncated or invalid .eh_frame_hdr (fde_count=%u, size=%lu) in %s", fde_count, (unsigned long)size, _name); return; } - for (int i = 0; i < fde_count; i++) { - _ptr = eh_frame_hdr + table[i * 2]; + for (u32 i = 0; i < fde_count; i++) { + _ptr = eh_frame_hdr + table[i * 2 + 1]; // [i*2] is initial_loc; [i*2+1] is fde_ptr parseFde(); } } @@ -272,6 +271,7 @@ void DwarfParser::parseCie() { } const char *cie_start = _ptr; + if (!canRead(5)) { _ptr = _section_end; return; } _ptr += 5; while (_ptr < _section_end && *_ptr++) { } diff --git a/ddprof-lib/src/main/cpp/dwarf.h b/ddprof-lib/src/main/cpp/dwarf.h index cf37b3fa5..11f9a1674 100644 --- a/ddprof-lib/src/main/cpp/dwarf.h +++ b/ddprof-lib/src/main/cpp/dwarf.h @@ -178,6 +178,7 @@ class DwarfParser { } u32 getLeb(const char* end) { + if (end > _section_end) end = _section_end; u32 result = 0; for (u32 shift = 0; _ptr < end && shift < 32; shift += 7) { u8 b = *_ptr++; @@ -207,6 +208,7 @@ class DwarfParser { } int getSLeb(const char* end) { + if (end > _section_end) end = _section_end; int result = 0; for (u32 shift = 0; _ptr < end && shift < 32; shift += 7) { u8 b = *_ptr++; From 61adef89f2c903c82edd52b5895351ede4cf6628 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Tue, 2 Jun 2026 13:02:30 +0000 Subject: [PATCH 3/5] fix pr comment --- ddprof-lib/src/main/cpp/dwarf.cpp | 21 +++++++++++++-------- ddprof-lib/src/main/cpp/dwarf.h | 13 ++++++------- ddprof-lib/src/main/cpp/symbols_linux.cpp | 15 ++++++++++++++- ddprof-lib/src/test/cpp/dwarf_ut.cpp | 2 +- ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp | 2 +- 5 files changed, 35 insertions(+), 18 deletions(-) diff --git a/ddprof-lib/src/main/cpp/dwarf.cpp b/ddprof-lib/src/main/cpp/dwarf.cpp index 5c14fb52a..ac59689ec 100644 --- a/ddprof-lib/src/main/cpp/dwarf.cpp +++ b/ddprof-lib/src/main/cpp/dwarf.cpp @@ -112,9 +112,9 @@ void DwarfParser::init(const char *name, const char *image_base) { DwarfParser::DwarfParser(const char *name, const char *image_base, const char *eh_frame_hdr, size_t eh_frame_hdr_size, - EhFrameHdrTag) { + EhFrameHdrTag, const char *image_end) { init(name, image_base); - parse(eh_frame_hdr, eh_frame_hdr_size); + parse(eh_frame_hdr, eh_frame_hdr_size, image_end); } DwarfParser::DwarfParser(const char *name, const char *image_base, @@ -133,15 +133,18 @@ static constexpr u8 omit_sign_bit_mask_low(u8 value) { return value & 0x7; } -void DwarfParser::parse(const char *eh_frame_hdr, size_t size) { +void DwarfParser::parse(const char *eh_frame_hdr, size_t size, const char *image_end) { // Fixed .eh_frame_hdr header: version (1) + 3 encoding bytes + eh_frame_ptr (4) // + fde_count at offset 8 (4), binary-search table starting at offset 16. - // Refuse anything too small. Note: the table entries resolve into the adjacent - // .eh_frame section, not into .eh_frame_hdr itself, so we do NOT bound FDE - // reads to [eh_frame_hdr, eh_frame_hdr+size). + // Refuse anything too small. if (eh_frame_hdr == NULL || size < 16) { return; } + // Bound FDE reads to the full ELF image [image_base, image_end) so that + // pointers into the adjacent .eh_frame section are validated against mapped + // memory, not left unbounded. + _section_start = _image_base; + _section_end = image_end; u8 version = eh_frame_hdr[0]; u8 eh_frame_ptr_enc = eh_frame_hdr[1]; @@ -170,7 +173,9 @@ void DwarfParser::parse(const char *eh_frame_hdr, size_t size) { return; } for (u32 i = 0; i < fde_count; i++) { - _ptr = eh_frame_hdr + table[i * 2 + 1]; // [i*2] is initial_loc; [i*2+1] is fde_ptr + // table[i*2+1] is the FDE pointer (datarel sdata4); table[i*2] is initial_loc. + // Cast to int32_t to correctly handle negative offsets (FDE before the header). + _ptr = eh_frame_hdr + (int32_t)table[i * 2 + 1]; parseFde(); } } @@ -289,7 +294,7 @@ void DwarfParser::parseFde() { const char *fde_start = _ptr; u32 cie_offset = get32(); if (_count == 0) { - if (_section_start != NULL && cie_offset > (size_t)(fde_start - _section_start)) { + if (cie_offset > (size_t)(fde_start - _section_start)) { return; } _ptr = fde_start - cie_offset; diff --git a/ddprof-lib/src/main/cpp/dwarf.h b/ddprof-lib/src/main/cpp/dwarf.h index 11f9a1674..4277c0e29 100644 --- a/ddprof-lib/src/main/cpp/dwarf.h +++ b/ddprof-lib/src/main/cpp/dwarf.h @@ -107,11 +107,10 @@ class DwarfParser { const char* _name; const char* _image_base; const char* _ptr; - // Read window [_section_start, _section_end). Set by parseEhFrame() to the - // actual .eh_frame bounds so every helper read is clamped to the section. - // When parsing via .eh_frame_hdr (parse()), FDEs live in the adjacent - // .eh_frame whose bounds are unknown here, so _section_start stays NULL and - // _section_end stays at max-pointer (unbounded mode — same as original). + // Read window [_section_start, _section_end). Both paths set this window: + // - parseEhFrame(): set to the .eh_frame section bounds. + // - parse(): set to the full ELF image bounds [image_base, image_end) so + // that FDE reads into the adjacent .eh_frame are bounded to mapped memory. const char* _section_start; const char* _section_end; @@ -242,7 +241,7 @@ class DwarfParser { } void init(const char* name, const char* image_base); - void parse(const char* eh_frame_hdr, size_t size); + void parse(const char* eh_frame_hdr, size_t size, const char* image_end); void parseEhFrame(const char* eh_frame, size_t size); void parseCie(); void parseFde(); @@ -257,7 +256,7 @@ class DwarfParser { // from the raw .eh_frame constructor below: with a size added, the two // would otherwise share a signature. struct EhFrameHdrTag {}; - DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr, size_t eh_frame_hdr_size, EhFrameHdrTag); + DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr, size_t eh_frame_hdr_size, EhFrameHdrTag, const char* image_end); DwarfParser(const char* name, const char* image_base, const char* eh_frame, size_t eh_frame_size); // Ownership of the returned pointer transfers to the caller. diff --git a/ddprof-lib/src/main/cpp/symbols_linux.cpp b/ddprof-lib/src/main/cpp/symbols_linux.cpp index f3a508838..7a5568ce2 100644 --- a/ddprof-lib/src/main/cpp/symbols_linux.cpp +++ b/ddprof-lib/src/main/cpp/symbols_linux.cpp @@ -600,8 +600,21 @@ void ElfParser::parseDwarfInfo() { // Parse per-PC frame descriptions and detect per-library default frame layout. // On aarch64 this distinguishes GCC (LINKED_FRAME_SIZE=0) from clang // (LINKED_FRAME_CLANG_SIZE=16) conventions for each shared library. + // Compute image_end from the highest end address of all LOAD segments so + // the DWARF parser can validate FDE pointers against mapped memory. + const char* image_end = _base; + { + const char* pheaders = (const char*)_header + _header->e_phoff; + for (int i = 0; i < _header->e_phnum; i++) { + ElfProgramHeader* ph = (ElfProgramHeader*)(pheaders + i * _header->e_phentsize); + if (ph->p_type == PT_LOAD) { + const char* seg_end = at(ph) + ph->p_memsz; + if (seg_end > image_end) image_end = seg_end; + } + } + } DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr), eh_frame_hdr->p_memsz, - DwarfParser::EhFrameHdrTag{}); + DwarfParser::EhFrameHdrTag{}, image_end); _cc->setDwarfTable(dwarf.table(), dwarf.count(), dwarf.detectedDefaultFrame()); } else if (strcmp(_cc->name(), "[vdso]") == 0) { FrameDesc* table = (FrameDesc*)malloc(sizeof(FrameDesc)); diff --git a/ddprof-lib/src/test/cpp/dwarf_ut.cpp b/ddprof-lib/src/test/cpp/dwarf_ut.cpp index e60f5ae2e..421c0191f 100644 --- a/ddprof-lib/src/test/cpp/dwarf_ut.cpp +++ b/ddprof-lib/src/test/cpp/dwarf_ut.cpp @@ -207,7 +207,7 @@ TEST(DwarfEhFrameHdr, FdeCountOverrun) { hdr[9] = 0x04; const char* base = reinterpret_cast(hdr.data()); - DwarfParser dwarf("test", base, base, hdr.size(), DwarfParser::EhFrameHdrTag{}); + DwarfParser dwarf("test", base, base, hdr.size(), DwarfParser::EhFrameHdrTag{}, base + hdr.size()); EXPECT_EQ(dwarf.count(), 0); // rejected: no records, no crash free(dwarf.table()); } diff --git a/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp b/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp index 55659f6e7..76cc3943f 100644 --- a/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp +++ b/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp @@ -100,7 +100,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { FrameDesc *table = nullptr; try { DwarfParser parser(name, image_base, eh_frame_hdr, size, - DwarfParser::EhFrameHdrTag{}); + DwarfParser::EhFrameHdrTag{}, image_base + size); table = parser.table(); } catch (...) { // Expected for malformed input - the parser may throw on invalid data From ed9c3553a265ccc8cc9ed89f2a18dc95f6020e39 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Tue, 2 Jun 2026 17:49:07 +0000 Subject: [PATCH 4/5] fix: tighten parseFde augmentation skip and add image_end header guard --- ddprof-lib/src/main/cpp/dwarf.cpp | 7 ++++++- ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp | 19 +++++-------------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/ddprof-lib/src/main/cpp/dwarf.cpp b/ddprof-lib/src/main/cpp/dwarf.cpp index ac59689ec..4bf866089 100644 --- a/ddprof-lib/src/main/cpp/dwarf.cpp +++ b/ddprof-lib/src/main/cpp/dwarf.cpp @@ -140,6 +140,11 @@ void DwarfParser::parse(const char *eh_frame_hdr, size_t size, const char *image if (eh_frame_hdr == NULL || size < 16) { return; } + // The version/encoding bytes [0..3] and fde_count [8..11] are read directly + // (not via canRead), so reject if image_end does not cover the 12-byte prefix. + if (image_end < eh_frame_hdr + 12) { + return; + } // Bound FDE reads to the full ELF image [image_base, image_end) so that // pointers into the adjacent .eh_frame section are validated against mapped // memory, not left unbounded. @@ -305,7 +310,7 @@ void DwarfParser::parseFde() { u32 range_start = getPtr() - _image_base; u32 range_len = get32(); _ptr += getLeb(); - if (_ptr > _section_end) _ptr = _section_end; + if (_ptr > fde_start + fde_len) return; parseInstructions(range_start, fde_start + fde_len); addRecord(range_start + range_len, DW_REG_FP, LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE + DW_STACK_SLOT); diff --git a/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp b/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp index 76cc3943f..fa6dc2c81 100644 --- a/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp +++ b/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp @@ -88,24 +88,15 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { const char *image_base = eh_frame_hdr; // Relative addresses are within fuzzed data // The DwarfParser constructor calls parse() internally, which is where - // most of the interesting parsing happens. We wrap this in a try-catch - // because the parser may throw on invalid data. + // most of the interesting parsing happens. // // DwarfParser has no destructor: it malloc()s its FrameDesc table in // init() and transfers ownership to the caller via table() (see dwarf.h). // Production callers hand that pointer to CodeCache::setDwarfTable(), which - // later free()s it. We must do the same here, otherwise the table leaks on - // every iteration. The pointer is hoisted out of the try so it is freed on - // the throw path too (init() allocates the table before parse() can throw). - FrameDesc *table = nullptr; - try { - DwarfParser parser(name, image_base, eh_frame_hdr, size, - DwarfParser::EhFrameHdrTag{}, image_base + size); - table = parser.table(); - } catch (...) { - // Expected for malformed input - the parser may throw on invalid data - } - free(table); // free(), not delete[], to match the malloc() in init() + // later free()s it. We must do the same here, otherwise the table leaks. + DwarfParser parser(name, image_base, eh_frame_hdr, size, + DwarfParser::EhFrameHdrTag{}, image_base + size); + free(parser.table()); // free(), not delete[], to match the malloc() in init() delete[] eh_frame_hdr; return 0; From e504121dec80ecb0c8b3336c04f64a77f63cf8bc Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 3 Jun 2026 13:31:56 +0000 Subject: [PATCH 5/5] fix: correct .eh_frame_hdr table offset from 16 to 12 --- ddprof-lib/src/main/cpp/dwarf.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ddprof-lib/src/main/cpp/dwarf.cpp b/ddprof-lib/src/main/cpp/dwarf.cpp index 4bf866089..0cf6bbb7a 100644 --- a/ddprof-lib/src/main/cpp/dwarf.cpp +++ b/ddprof-lib/src/main/cpp/dwarf.cpp @@ -135,7 +135,7 @@ static constexpr u8 omit_sign_bit_mask_low(u8 value) { void DwarfParser::parse(const char *eh_frame_hdr, size_t size, const char *image_end) { // Fixed .eh_frame_hdr header: version (1) + 3 encoding bytes + eh_frame_ptr (4) - // + fde_count at offset 8 (4), binary-search table starting at offset 16. + // + fde_count at offset 8 (4), binary-search table starting at offset 12. // Refuse anything too small. if (eh_frame_hdr == NULL || size < 16) { return; @@ -168,19 +168,20 @@ void DwarfParser::parse(const char *eh_frame_hdr, size_t size, const char *image } u32 fde_count = *(u32 *)(eh_frame_hdr + 8); - u32 *table = (u32 *)(eh_frame_hdr + 16); + // Table starts at offset 12 (4-byte header + 4-byte eh_frame_ptr + 4-byte fde_count). // Each entry is a (initial_loc, fde_ptr) pair of 4-byte section-relative // offsets (DW_EH_PE_datarel | DW_EH_PE_udata4). Reject a count that would // make the table walk read past the section. - if (fde_count > (size - 16) / 8) { + u32 *table = (u32 *)(eh_frame_hdr + 12); + if (fde_count > (size - 12) / 8) { Log::warn("Truncated or invalid .eh_frame_hdr (fde_count=%u, size=%lu) in %s", fde_count, (unsigned long)size, _name); return; } for (u32 i = 0; i < fde_count; i++) { - // table[i*2+1] is the FDE pointer (datarel sdata4); table[i*2] is initial_loc. - // Cast to int32_t to correctly handle negative offsets (FDE before the header). - _ptr = eh_frame_hdr + (int32_t)table[i * 2 + 1]; + // table[i*2] is initial_loc; table[i*2+1] is the FDE pointer (datarel sdata4). + // Cast to int to correctly handle negative offsets (FDE before the header). + _ptr = eh_frame_hdr + (int)table[i * 2 + 1]; parseFde(); } }