diff --git a/ddprof-lib/src/main/cpp/dwarf.cpp b/ddprof-lib/src/main/cpp/dwarf.cpp index 15d092369..0cf6bbb7a 100644 --- a/ddprof-lib/src/main/cpp/dwarf.cpp +++ b/ddprof-lib/src/main/cpp/dwarf.cpp @@ -96,6 +96,8 @@ FrameDesc FrameDesc::no_dwarf_frame = {0, DW_REG_INVALID, DW_REG_INVALID, DW_REG void DwarfParser::init(const char *name, const char *image_base) { _name = name; _image_base = image_base; + _section_start = NULL; + _section_end = reinterpret_cast(~(size_t)0); _capacity = 128; _count = 0; @@ -109,9 +111,10 @@ void DwarfParser::init(const char *name, const char *image_base) { } DwarfParser::DwarfParser(const char *name, const char *image_base, - const char *eh_frame_hdr) { + const char *eh_frame_hdr, size_t eh_frame_hdr_size, + EhFrameHdrTag, const char *image_end) { init(name, image_base); - parse(eh_frame_hdr); + parse(eh_frame_hdr, eh_frame_hdr_size, image_end); } DwarfParser::DwarfParser(const char *name, const char *image_base, @@ -130,7 +133,24 @@ static constexpr u8 omit_sign_bit_mask_low(u8 value) { return value & 0x7; } -void DwarfParser::parse(const char *eh_frame_hdr) { +void DwarfParser::parse(const char *eh_frame_hdr, size_t size, const char *image_end) { + // Fixed .eh_frame_hdr header: version (1) + 3 encoding bytes + eh_frame_ptr (4) + // + fde_count at offset 8 (4), binary-search table starting at offset 12. + // Refuse anything too small. + if (eh_frame_hdr == NULL || size < 16) { + return; + } + // The version/encoding bytes [0..3] and fde_count [8..11] are read directly + // (not via canRead), so reject if image_end does not cover the 12-byte prefix. + if (image_end < eh_frame_hdr + 12) { + return; + } + // Bound FDE reads to the full ELF image [image_base, image_end) so that + // pointers into the adjacent .eh_frame section are validated against mapped + // memory, not left unbounded. + _section_start = _image_base; + _section_end = image_end; + u8 version = eh_frame_hdr[0]; u8 eh_frame_ptr_enc = eh_frame_hdr[1]; u8 fde_count_enc = eh_frame_hdr[2]; @@ -147,10 +167,21 @@ void DwarfParser::parse(const char *eh_frame_hdr) { return; } - int fde_count = *(int *)(eh_frame_hdr + 8); - int *table = (int *)(eh_frame_hdr + 16); - for (int i = 0; i < fde_count; i++) { - _ptr = eh_frame_hdr + table[i * 2]; + u32 fde_count = *(u32 *)(eh_frame_hdr + 8); + // Table starts at offset 12 (4-byte header + 4-byte eh_frame_ptr + 4-byte fde_count). + // Each entry is a (initial_loc, fde_ptr) pair of 4-byte section-relative + // offsets (DW_EH_PE_datarel | DW_EH_PE_udata4). Reject a count that would + // make the table walk read past the section. + u32 *table = (u32 *)(eh_frame_hdr + 12); + if (fde_count > (size - 12) / 8) { + Log::warn("Truncated or invalid .eh_frame_hdr (fde_count=%u, size=%lu) in %s", + fde_count, (unsigned long)size, _name); + return; + } + for (u32 i = 0; i < fde_count; i++) { + // table[i*2] is initial_loc; table[i*2+1] is the FDE pointer (datarel sdata4). + // Cast to int to correctly handle negative offsets (FDE before the header). + _ptr = eh_frame_hdr + (int)table[i * 2 + 1]; parseFde(); } } @@ -161,10 +192,12 @@ void DwarfParser::parseEhFrame(const char *eh_frame, size_t size) { if (eh_frame == NULL || size < 4) { return; } - const char *section_end = eh_frame + size; + // Publish the read window so the get*/skip* helpers clamp to it. + _section_start = eh_frame; + _section_end = eh_frame + size; _ptr = eh_frame; - while (_ptr + 4 <= section_end) { + while (_ptr + 4 <= _section_end) { const char *record_start = _ptr; u32 length = get32(); if (length == 0) { @@ -174,7 +207,7 @@ void DwarfParser::parseEhFrame(const char *eh_frame, size_t size) { break; // 64-bit DWARF not supported } - if (length > (size_t)(section_end - record_start) - 4) { + if (length > (size_t)(_section_end - record_start) - 4) { break; } const char *record_end = record_start + 4 + length; @@ -249,8 +282,9 @@ void DwarfParser::parseCie() { } const char *cie_start = _ptr; + if (!canRead(5)) { _ptr = _section_end; return; } _ptr += 5; - while (*_ptr++) { + while (_ptr < _section_end && *_ptr++) { } _code_align = getLeb(); _data_align = getSLeb(); @@ -266,6 +300,9 @@ void DwarfParser::parseFde() { const char *fde_start = _ptr; u32 cie_offset = get32(); if (_count == 0) { + if (cie_offset > (size_t)(fde_start - _section_start)) { + return; + } _ptr = fde_start - cie_offset; parseCie(); _ptr = fde_start + 4; @@ -274,12 +311,19 @@ void DwarfParser::parseFde() { u32 range_start = getPtr() - _image_base; u32 range_len = get32(); _ptr += getLeb(); + if (_ptr > fde_start + fde_len) return; parseInstructions(range_start, fde_start + fde_len); addRecord(range_start + range_len, DW_REG_FP, LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE + DW_STACK_SLOT); } void DwarfParser::parseInstructions(u32 loc, const char *end) { + // `end` is derived from an untrusted record length; never let it run past + // the section. Reads inside the loop are clamped by the get*/skip* helpers, + // but clamping here keeps the loop bound honest and _ptr in range. + if (end > _section_end) { + end = _section_end; + } const u32 code_align = _code_align; const int data_align = _data_align; @@ -364,11 +408,13 @@ void DwarfParser::parseInstructions(u32 loc, const char *end) { cfa_reg = len == 11 ? DW_REG_PLT : DW_REG_INVALID; cfa_off = DW_STACK_SLOT; _ptr += len; + if (_ptr > _section_end) _ptr = _section_end; break; } case DW_CFA_expression: skipLeb(); _ptr += getLeb(); + if (_ptr > _section_end) _ptr = _section_end; break; case DW_CFA_offset_extended_sf: switch (getLeb()) { @@ -402,6 +448,7 @@ void DwarfParser::parseInstructions(u32 loc, const char *end) { } } else { _ptr += getLeb(); + if (_ptr > _section_end) _ptr = _section_end; } break; #ifdef __aarch64__ @@ -452,6 +499,9 @@ int DwarfParser::parseExpression() { u32 len = getLeb(); const char *end = _ptr + len; + if (end > _section_end) { + end = _section_end; + } while (_ptr < end) { u8 op = get8(); @@ -499,13 +549,15 @@ int DwarfParser::parseExpression() { void DwarfParser::addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off, int pc_off) { - // Sanity asserts to be able to pack those two values into one u32 vq - // Assert that the cfa_reg fits in 8 bits (0 to 255) - assert(cfa_reg <= 0xFF); - - // Assert that the cfa_off fits in a 24-bit signed range. - // Signed 24-bit integer range: -2^23 (-8,388,608) to 2^23 - 1 (8,388,607) - assert(cfa_off >= -8388608 && cfa_off <= 8388607); + // cfa_reg and cfa_off are packed into a single u32 (cfa_off << 8 | cfa_reg), + // so cfa_reg must fit in 8 bits (0..255) and cfa_off in a signed 24-bit range + // (-2^23 .. 2^23-1). Well-formed compiler-generated DWARF always satisfies + // this, but a malformed or corrupt .eh_frame from an untrusted ELF may not. + // Rather than pack a truncated (wrong) descriptor, drop the record: the stack + // walker treats a missing entry for a PC the same as any other unwind miss. + if (cfa_reg > 0xFF || cfa_off < -8388608 || cfa_off > 8388607) { + return; + } // cfa_reg and cfa_off can be encoded to a single 32 bit value, considering the existing and supported systems u32 cfa = static_cast(cfa_off) << 8 | static_cast(cfa_reg & 0xff); diff --git a/ddprof-lib/src/main/cpp/dwarf.h b/ddprof-lib/src/main/cpp/dwarf.h index 016ead404..4277c0e29 100644 --- a/ddprof-lib/src/main/cpp/dwarf.h +++ b/ddprof-lib/src/main/cpp/dwarf.h @@ -107,6 +107,12 @@ class DwarfParser { const char* _name; const char* _image_base; const char* _ptr; + // Read window [_section_start, _section_end). Both paths set this window: + // - parseEhFrame(): set to the .eh_frame section bounds. + // - parse(): set to the full ELF image bounds [image_base, image_end) so + // that FDE reads into the adjacent .eh_frame are bounded to mapped memory. + const char* _section_start; + const char* _section_end; int _capacity; int _count; @@ -118,25 +124,41 @@ class DwarfParser { int _linked_frame_size; // detected from FP-based DWARF entries; -1 = undetected bool _has_z_augmentation; - const char* add(size_t size) { - const char* ptr = _ptr; - _ptr = ptr + size; - return ptr; + // True if `size` bytes can be read at _ptr without leaving the section. + // Guards against both over-reads (past _section_end) and under-reads + // (_ptr moved before _section_start by an untrusted offset). + bool canRead(size_t size) const { + return _ptr >= _section_start && _ptr <= _section_end && + size <= (size_t)(_section_end - _ptr); } u8 get8() { + if (!canRead(1)) { + _ptr = _section_end; + return 0; + } return *_ptr++; } u16 get16() { - const char* ptr = add(2); + if (!canRead(2)) { + _ptr = _section_end; + return 0; + } + const char* ptr = _ptr; + _ptr += 2; u16 result; memcpy(&result, ptr, sizeof(u16)); return result; } u32 get32() { - const char* ptr = add(4); + if (!canRead(4)) { + _ptr = _section_end; + return 0; + } + const char* ptr = _ptr; + _ptr += 4; u32 result; memcpy(&result, ptr, sizeof(u32)); return result; @@ -144,16 +166,18 @@ class DwarfParser { u32 getLeb() { u32 result = 0; - for (u32 shift = 0; ; shift += 7) { + for (u32 shift = 0; canRead(1) && shift < 32; shift += 7) { u8 b = *_ptr++; - result |= (b & 0x7f) << shift; + result |= (u32)(b & 0x7f) << shift; if ((b & 0x80) == 0) { - return result; + break; } } + return result; } u32 getLeb(const char* end) { + if (end > _section_end) end = _section_end; u32 result = 0; for (u32 shift = 0; _ptr < end && shift < 32; shift += 7) { u8 b = *_ptr++; @@ -167,26 +191,32 @@ class DwarfParser { int getSLeb() { int result = 0; - for (u32 shift = 0; ; shift += 7) { + for (u32 shift = 0; canRead(1) && shift < 32; shift += 7) { u8 b = *_ptr++; - result |= (b & 0x7f) << shift; + // Compute in unsigned to avoid signed left-shift overflow (UB) for + // large shift values; the result is reinterpreted as signed below. + result |= (int)((u32)(b & 0x7f) << shift); if ((b & 0x80) == 0) { if ((b & 0x40) != 0 && (shift += 7) < 32) { - result |= ~0U << shift; + result |= (int)(~0U << shift); } - return result; + break; } } + return result; } int getSLeb(const char* end) { + if (end > _section_end) end = _section_end; int result = 0; - for (u32 shift = 0; _ptr < end; shift += 7) { + for (u32 shift = 0; _ptr < end && shift < 32; shift += 7) { u8 b = *_ptr++; - result |= (b & 0x7f) << shift; + // Compute in unsigned to avoid signed left-shift overflow (UB) for + // large shift values; the result is reinterpreted as signed below. + result |= (int)((u32)(b & 0x7f) << shift); if ((b & 0x80) == 0) { if ((b & 0x40) != 0 && (shift += 7) < 32) { - result |= ~0U << shift; + result |= (int)(~0U << shift); } return result; } @@ -195,19 +225,23 @@ class DwarfParser { } void skipLeb() { - while (*_ptr++ & 0x80) {} + while (canRead(1) && (*_ptr++ & 0x80)) {} } const char* getPtr() { const char* ptr = _ptr; - const char* offset_ptr = add(4); + if (!canRead(4)) { + _ptr = _section_end; + return ptr; + } int offset; - memcpy(&offset, offset_ptr, sizeof(int)); + memcpy(&offset, _ptr, sizeof(int)); + _ptr += 4; return ptr + offset; } void init(const char* name, const char* image_base); - void parse(const char* eh_frame_hdr); + void parse(const char* eh_frame_hdr, size_t size, const char* image_end); void parseEhFrame(const char* eh_frame, size_t size); void parseCie(); void parseFde(); @@ -218,7 +252,11 @@ class DwarfParser { FrameDesc* addRecordRaw(u32 loc, int cfa, int fp_off, int pc_off); public: - DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr); + // Tag to disambiguate the .eh_frame_hdr (binary-search index) constructor + // from the raw .eh_frame constructor below: with a size added, the two + // would otherwise share a signature. + struct EhFrameHdrTag {}; + DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr, size_t eh_frame_hdr_size, EhFrameHdrTag, const char* image_end); DwarfParser(const char* name, const char* image_base, const char* eh_frame, size_t eh_frame_size); // Ownership of the returned pointer transfers to the caller. diff --git a/ddprof-lib/src/main/cpp/symbols_linux.cpp b/ddprof-lib/src/main/cpp/symbols_linux.cpp index 3193cff54..7a5568ce2 100644 --- a/ddprof-lib/src/main/cpp/symbols_linux.cpp +++ b/ddprof-lib/src/main/cpp/symbols_linux.cpp @@ -600,7 +600,21 @@ void ElfParser::parseDwarfInfo() { // Parse per-PC frame descriptions and detect per-library default frame layout. // On aarch64 this distinguishes GCC (LINKED_FRAME_SIZE=0) from clang // (LINKED_FRAME_CLANG_SIZE=16) conventions for each shared library. - DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr)); + // Compute image_end from the highest end address of all LOAD segments so + // the DWARF parser can validate FDE pointers against mapped memory. + const char* image_end = _base; + { + const char* pheaders = (const char*)_header + _header->e_phoff; + for (int i = 0; i < _header->e_phnum; i++) { + ElfProgramHeader* ph = (ElfProgramHeader*)(pheaders + i * _header->e_phentsize); + if (ph->p_type == PT_LOAD) { + const char* seg_end = at(ph) + ph->p_memsz; + if (seg_end > image_end) image_end = seg_end; + } + } + } + DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr), eh_frame_hdr->p_memsz, + DwarfParser::EhFrameHdrTag{}, image_end); _cc->setDwarfTable(dwarf.table(), dwarf.count(), dwarf.detectedDefaultFrame()); } else if (strcmp(_cc->name(), "[vdso]") == 0) { FrameDesc* table = (FrameDesc*)malloc(sizeof(FrameDesc)); diff --git a/ddprof-lib/src/test/cpp/dwarf_ut.cpp b/ddprof-lib/src/test/cpp/dwarf_ut.cpp index f6877e148..421c0191f 100644 --- a/ddprof-lib/src/test/cpp/dwarf_ut.cpp +++ b/ddprof-lib/src/test/cpp/dwarf_ut.cpp @@ -190,4 +190,26 @@ TEST(DwarfEhFrame, FdeAugDataOverrun) { delete dwarf; } +// Regression test for the .eh_frame_hdr hardening (found by fuzz_dwarf). +// A hostile .eh_frame_hdr can claim a large fde_count while providing no +// binary-search table; pre-hardening, parse() walked `table[i*2]` off the end +// of the section. The bounded parser rejects a fde_count that cannot fit in the +// section. The header is a heap buffer sized to exactly 16 bytes (header only, +// no table entries), so ASan's redzone catches any over-read deterministically. +TEST(DwarfEhFrameHdr, FdeCountOverrun) { + std::vector hdr(16, 0); // 16-byte header; the table would start at 16 + hdr[0] = 1; // version + hdr[1] = 0x03; // eh_frame_ptr_enc = DW_EH_PE_udata4 + hdr[2] = 0x03; // fde_count_enc = DW_EH_PE_udata4 + hdr[3] = 0x33; // table_enc = DW_EH_PE_datarel | DW_EH_PE_udata4 + // fde_count at offset 8 (little-endian): claim 1024 entries that aren't there. + hdr[8] = 0x00; + hdr[9] = 0x04; + + const char* base = reinterpret_cast(hdr.data()); + DwarfParser dwarf("test", base, base, hdr.size(), DwarfParser::EhFrameHdrTag{}, base + hdr.size()); + EXPECT_EQ(dwarf.count(), 0); // rejected: no records, no crash + free(dwarf.table()); +} + #endif // DWARF_SUPPORTED diff --git a/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp b/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp index 3606d8209..fa6dc2c81 100644 --- a/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp +++ b/ddprof-lib/src/test/fuzz/fuzz_dwarf.cpp @@ -19,6 +19,7 @@ #include #include +#include #include // Include the DWARF parser @@ -87,16 +88,15 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { const char *image_base = eh_frame_hdr; // Relative addresses are within fuzzed data // The DwarfParser constructor calls parse() internally, which is where - // most of the interesting parsing happens. We wrap this in a try-catch - // because the parser may throw on invalid data. - try { - DwarfParser parser(name, image_base, eh_frame_hdr); - // If we get here, the parser successfully built an unwind table. - // The parser allocates a FrameDesc table internally; destructor - // will clean it up. - } catch (...) { - // Expected for malformed input - the parser may throw on invalid data - } + // most of the interesting parsing happens. + // + // DwarfParser has no destructor: it malloc()s its FrameDesc table in + // init() and transfers ownership to the caller via table() (see dwarf.h). + // Production callers hand that pointer to CodeCache::setDwarfTable(), which + // later free()s it. We must do the same here, otherwise the table leaks. + DwarfParser parser(name, image_base, eh_frame_hdr, size, + DwarfParser::EhFrameHdrTag{}, image_base + size); + free(parser.table()); // free(), not delete[], to match the malloc() in init() delete[] eh_frame_hdr; return 0;