diff --git a/ddprof-lib/src/main/cpp/dwarf.cpp b/ddprof-lib/src/main/cpp/dwarf.cpp index 0cf6bbb7a..b4dee8877 100644 --- a/ddprof-lib/src/main/cpp/dwarf.cpp +++ b/ddprof-lib/src/main/cpp/dwarf.cpp @@ -93,11 +93,12 @@ FrameDesc FrameDesc::default_frame = {0, DW_REG_FP | LINKED_FRAME_SIZE << 8, FrameDesc FrameDesc::default_clang_frame = {0, DW_REG_FP | LINKED_FRAME_CLANG_SIZE << 8, -LINKED_FRAME_CLANG_SIZE, -LINKED_FRAME_CLANG_SIZE + DW_STACK_SLOT}; FrameDesc FrameDesc::no_dwarf_frame = {0, DW_REG_INVALID, DW_REG_INVALID, DW_REG_INVALID}; -void DwarfParser::init(const char *name, const char *image_base) { +void DwarfParser::init(const char *name, const char *image_base, const char *image_end) { _name = name; _image_base = image_base; _section_start = NULL; _section_end = reinterpret_cast(~(size_t)0); + _image_end = image_end; _capacity = 128; _count = 0; @@ -113,13 +114,13 @@ void DwarfParser::init(const char *name, const char *image_base) { DwarfParser::DwarfParser(const char *name, const char *image_base, const char *eh_frame_hdr, size_t eh_frame_hdr_size, EhFrameHdrTag, const char *image_end) { - init(name, image_base); + init(name, image_base, image_end); parse(eh_frame_hdr, eh_frame_hdr_size, image_end); } DwarfParser::DwarfParser(const char *name, const char *image_base, const char *eh_frame, size_t eh_frame_size) { - init(name, image_base); + init(name, image_base, eh_frame + eh_frame_size); parseEhFrame(eh_frame, eh_frame_size); } @@ -151,6 +152,7 @@ void DwarfParser::parse(const char *eh_frame_hdr, size_t size, const char *image _section_start = _image_base; _section_end = image_end; + u8 version = eh_frame_hdr[0]; u8 eh_frame_ptr_enc = eh_frame_hdr[1]; u8 fde_count_enc = eh_frame_hdr[2]; @@ -276,28 +278,37 @@ void DwarfParser::parseEhFrame(const char *eh_frame, size_t size) { } void DwarfParser::parseCie() { + if (_ptr + 4 > _image_end) return; u32 cie_len = get32(); if (cie_len == 0 || cie_len == 0xffffffff) { return; } const char *cie_start = _ptr; + const char *cie_end = cie_start + cie_len; + if (cie_end > _section_end) return; + if (!canRead(5)) { _ptr = _section_end; return; } _ptr += 5; - while (_ptr < _section_end && *_ptr++) { + while (_ptr < cie_end && *_ptr++) { } - _code_align = getLeb(); - _data_align = getSLeb(); - _ptr = cie_start + cie_len; + _code_align = getLeb(cie_end); + _data_align = getSLeb(cie_end); + _ptr = cie_end; } void DwarfParser::parseFde() { + if (_ptr + 4 > _image_end) return; u32 fde_len = get32(); if (fde_len == 0 || fde_len == 0xffffffff) { return; } const char *fde_start = _ptr; + const char *fde_end = fde_start + fde_len; + if (fde_end > _image_end) return; + + if (_ptr + 4 > fde_end) return; u32 cie_offset = get32(); if (_count == 0) { if (cie_offset > (size_t)(fde_start - _section_start)) { @@ -308,11 +319,12 @@ void DwarfParser::parseFde() { _ptr = fde_start + 4; } + if (_ptr + 8 > fde_end) return; u32 range_start = getPtr() - _image_base; u32 range_len = get32(); - _ptr += getLeb(); - if (_ptr > fde_start + fde_len) return; - parseInstructions(range_start, fde_start + fde_len); + _ptr += getLeb(fde_end); + if (_ptr > fde_end) return; + parseInstructions(range_start, fde_end); addRecord(range_start + range_len, DW_REG_FP, LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE + DW_STACK_SLOT); } diff --git a/ddprof-lib/src/main/cpp/dwarf.h b/ddprof-lib/src/main/cpp/dwarf.h index 4277c0e29..d591383e8 100644 --- a/ddprof-lib/src/main/cpp/dwarf.h +++ b/ddprof-lib/src/main/cpp/dwarf.h @@ -106,6 +106,7 @@ class DwarfParser { private: const char* _name; const char* _image_base; + const char* _image_end; const char* _ptr; // Read window [_section_start, _section_end). Both paths set this window: // - parseEhFrame(): set to the .eh_frame section bounds. @@ -229,6 +230,7 @@ class DwarfParser { } const char* getPtr() { + if (_ptr + 4 > _image_end) { _ptr = _image_end; return _image_base; } const char* ptr = _ptr; if (!canRead(4)) { _ptr = _section_end; @@ -240,7 +242,7 @@ class DwarfParser { return ptr + offset; } - void init(const char* name, const char* image_base); + void init(const char* name, const char* image_base, const char* image_end); void parse(const char* eh_frame_hdr, size_t size, const char* image_end); void parseEhFrame(const char* eh_frame, size_t size); void parseCie(); diff --git a/ddprof-lib/src/main/cpp/symbols_linux.cpp b/ddprof-lib/src/main/cpp/symbols_linux.cpp index 495f0486e..d6dd12ee2 100644 --- a/ddprof-lib/src/main/cpp/symbols_linux.cpp +++ b/ddprof-lib/src/main/cpp/symbols_linux.cpp @@ -348,29 +348,105 @@ class ElfParser { ElfHeader* _header; const char* _sections; const char* _vaddr_diff; + const char* _image_end; // one-past-the-end of the mapped ELF image; bounds file-relative reads - ElfParser(CodeCache* cc, const char* base, const void* addr, const char* file_name, bool relocate_dyn) { + ElfParser(CodeCache* cc, const char* base, const void* addr, size_t image_size, const char* file_name, bool relocate_dyn) { _cc = cc; _base = base; _file_name = file_name; _relocate_dyn = relocate_dyn; _header = (ElfHeader*)addr; - _sections = (const char*)addr + _header->e_shoff; + _image_end = (const char*)addr + image_size; + // e_shoff sits at a fixed offset inside the header; only compute the pointer + // when the image is at least header-sized AND e_shoff is within the image, + // so the addition cannot overflow and sectionAt()/inImage() can reject it + // cleanly without UB. + _sections = (image_size >= sizeof(ElfHeader) && _header->e_shoff < image_size) + ? (const char*)addr + _header->e_shoff + : NULL; } bool validHeader() { + // A valid ELF image is at least a full header; this also makes the + // e_ident / e_shstrndx reads below in-bounds for tiny inputs. + if (_image_end < (const char*)_header + sizeof(ElfHeader)) { + return false; + } unsigned char* ident = _header->e_ident; return ident[0] == 0x7f && ident[1] == 'E' && ident[2] == 'L' && ident[3] == 'F' && ident[4] == ELFCLASS_SUPPORTED && ident[5] == ELFDATA2LSB && ident[6] == EV_CURRENT && _header->e_shstrndx != SHN_UNDEF; } - ElfSection* section(int index) { - return (ElfSection*)(_sections + index * _header->e_shentsize); + // --- Bounds-checked accessors for the file/section path ----------------- + // These guard parsing of section headers, symbol tables and string tables, + // all of which use file-offset-relative pointers that must lie inside the + // mapped image [_header, _image_end). The dynamic-section path uses + // virtual-address-relative pointers into live memory and is intentionally + // NOT routed through inImage(). + + // True when [ptr, ptr+len) lies entirely within the mapped image. + bool inImage(const void* ptr, size_t len) const { + const char* p = (const char*)ptr; + return p >= (const char*)_header + && p <= _image_end + && len <= (size_t)(_image_end - p); + } + + // Section header at `index`, or NULL when the index or entry is out of bounds. + ElfSection* sectionAt(int index) { + if (_sections == NULL || index < 0 || index >= _header->e_shnum + || _header->e_shentsize < sizeof(ElfSection)) { + return NULL; + } + ElfSection* s = (ElfSection*)(_sections + (size_t)index * _header->e_shentsize); + return inImage(s, sizeof(ElfSection)) ? s : NULL; + } + + // Start of a section's first `want` content bytes, or NULL if not fully mapped. + const char* contentAt(ElfSection* s, size_t want) { + if (s == NULL) { + return NULL; + } + // Validate sh_offset in integer space before forming the pointer so that + // a large attacker-controlled offset cannot cause pointer-overflow UB + // (the project builds with -fsanitize=pointer-overflow -fno-sanitize-recover). + size_t img_size = (size_t)(_image_end - (const char*)_header); + if (s->sh_offset > img_size || want > img_size - s->sh_offset) { + return NULL; + } + return (const char*)_header + s->sh_offset; } - const char* at(ElfSection* section) { - return (const char*)_header + section->sh_offset; + // NUL-terminated string at `off` within a [strtab, strtab+size) string table, + // or NULL if the offset is out of range or the string is not terminated in it. + static const char* strAt(const char* strtab, size_t size, uint32_t off) { + if (strtab == NULL || off >= size) { + return NULL; + } + if (memchr(strtab + off, '\0', size - off) == NULL) { + return NULL; + } + return strtab + off; + } + + // Program-header entry at `index`, or NULL when the index or entry is out of bounds. + ElfProgramHeader* phdrAt(int index) { + if (index < 0 || index >= _header->e_phnum + || _header->e_phentsize < sizeof(ElfProgramHeader)) { + return NULL; + } + // Validate entirely in integer space before forming any pointer. + // Both e_phoff and index*e_phentsize are attacker-controlled; either + // can be large enough to wrap a pointer under -fsanitize=pointer-overflow. + size_t img_size = (size_t)(_image_end - (const char*)_header); + size_t phoff = _header->e_phoff; + size_t stride = (size_t)index * _header->e_phentsize; + if (phoff > img_size || stride > img_size - phoff) { + return NULL; + } + ElfProgramHeader* ph = (ElfProgramHeader*)((const char*)_header + phoff + stride); + return inImage(ph, sizeof(ElfProgramHeader)) ? ph : NULL; } const char* at(ElfProgramHeader* pheader) { @@ -406,7 +482,7 @@ class ElfParser { bool loadSymbolsFromDebuginfodCache(const char* build_id, const int build_id_len); bool loadSymbolsUsingBuildId(); bool loadSymbolsUsingDebugLink(); - void loadSymbolTable(const char* symbols, size_t total_size, size_t ent_size, const char* strings); + void loadSymbolTable(const char* symbols, size_t total_size, size_t ent_size, const char* strings, size_t strings_size); void addRelocationSymbols(ElfSection* reltab, const char* plt); const char* getDebuginfodCache(); @@ -417,12 +493,27 @@ class ElfParser { ElfSection* ElfParser::findSection(uint32_t type, const char* name) { - const char* strtab = at(section(_header->e_shstrndx)); + // The section-header string table must be present and fully mapped before + // any section name can be resolved. Untrusted e_shoff/e_shentsize/e_shstrndx + // and sh_offset values are all validated here. + ElfSection* shstr = sectionAt(_header->e_shstrndx); + if (shstr == NULL) { + return NULL; + } + size_t strtab_size = shstr->sh_size; + const char* strtab = contentAt(shstr, strtab_size); + if (strtab == NULL) { + return NULL; + } for (int i = 0; i < _header->e_shnum; i++) { - ElfSection* section = this->section(i); + ElfSection* section = sectionAt(i); + if (section == NULL) { + continue; + } if (section->sh_type == type && section->sh_name != 0) { - if (strcmp(strtab + section->sh_name, name) == 0) { + const char* sname = strAt(strtab, strtab_size, section->sh_name); + if (sname != NULL && strcmp(sname, name) == 0) { return section; } } @@ -432,15 +523,12 @@ ElfSection* ElfParser::findSection(uint32_t type, const char* name) { } ElfProgramHeader* ElfParser::findProgramHeader(uint32_t type) { - const char* pheaders = (const char*)_header + _header->e_phoff; - for (int i = 0; i < _header->e_phnum; i++) { - ElfProgramHeader* pheader = (ElfProgramHeader*)(pheaders + i * _header->e_phentsize); - if (pheader->p_type == type) { + ElfProgramHeader* pheader = phdrAt(i); + if (pheader != NULL && pheader->p_type == type) { return pheader; } } - return NULL; } @@ -457,7 +545,7 @@ bool ElfParser::parseFile(CodeCache* cc, const char* base, const char* file_name if (addr == MAP_FAILED) { Log::warn("Could not parse symbols from %s: %s", file_name, strerror(errno)); } else { - ElfParser elf(cc, base, addr, file_name, false); + ElfParser elf(cc, base, addr, length, file_name, false); if (elf.validHeader()) { elf.calcVirtualLoadAddress(); elf.loadSymbols(use_debug); @@ -468,7 +556,7 @@ bool ElfParser::parseFile(CodeCache* cc, const char* base, const char* file_name } void ElfParser::parseProgramHeaders(CodeCache* cc, const char* base, const char* end, bool relocate_dyn) { - ElfParser elf(cc, base, base, NULL, relocate_dyn); + ElfParser elf(cc, base, base, (size_t)(end - base), NULL, relocate_dyn); if (elf.validHeader() && base + elf._header->e_phoff < end) { cc->setTextBase(base); elf.calcVirtualLoadAddress(); @@ -483,10 +571,9 @@ void ElfParser::calcVirtualLoadAddress() { _vaddr_diff = NULL; return; } - const char* pheaders = (const char*)_header + _header->e_phoff; for (int i = 0; i < _header->e_phnum; i++) { - ElfProgramHeader* pheader = (ElfProgramHeader*)(pheaders + i * _header->e_phentsize); - if (pheader->p_type == PT_LOAD) { + ElfProgramHeader* pheader = phdrAt(i); + if (pheader != NULL && pheader->p_type == PT_LOAD) { _vaddr_diff = _base - pheader->p_vaddr; return; } @@ -506,6 +593,7 @@ void ElfParser::parseDynamicSection() { size_t relent = 0; size_t relcount = 0; size_t syment = 0; + size_t strsz = 0; uint32_t nsyms = 0; const char* dyn_start = at(dynamic); @@ -521,6 +609,9 @@ void ElfParser::parseDynamicSection() { case DT_SYMENT: syment = dyn->d_un.d_val; break; + case DT_STRSZ: + strsz = dyn->d_un.d_val; + break; case DT_HASH: nsyms = ((uint32_t*)dyn_ptr(dyn))[1]; break; @@ -558,8 +649,19 @@ void ElfParser::parseDynamicSection() { return; } + // DT_STRSZ is required by the ELF spec whenever DT_STRTAB is present. + // When it is absent (strsz == 0) all string lookups via strAt() would + // be rejected, silently dropping every symbol. Cap to 1 MB: real dynamic + // string tables are well under that, and live linker memory guarantees + // NUL termination, so memchr will always find a terminator before the cap. + if (strsz == 0) { + Log::warn("DT_STRSZ absent from dynamic section in %s; capping string-table scan to 1 MB", + _file_name != NULL ? _file_name : "unknown"); + strsz = 1u << 20; + } + if (!_cc->hasDebugSymbols() && nsyms > 0) { - loadSymbolTable(symtab, syment * nsyms, syment, strtab); + loadSymbolTable(symtab, syment * nsyms, syment, strtab, strsz); } const char* base = this->base(); @@ -569,7 +671,10 @@ void ElfParser::parseDynamicSection() { ElfRelocation* r = (ElfRelocation*)(jmprel + offs); ElfSymbol* sym = (ElfSymbol*)(symtab + ELF_R_SYM(r->r_info) * syment); if (sym->st_name != 0) { - _cc->addImport((void**)(base + r->r_offset), strtab + sym->st_name); + const char* sym_name = strAt(strtab, strsz, sym->st_name); + if (sym_name != NULL) { + _cc->addImport((void**)(base + r->r_offset), sym_name); + } } } } @@ -583,7 +688,10 @@ void ElfParser::parseDynamicSection() { if (ELF_R_TYPE(r->r_info) == R_GLOB_DAT || ELF_R_TYPE(r->r_info) == R_ABS64) { ElfSymbol* sym = (ElfSymbol*)(symtab + ELF_R_SYM(r->r_info) * syment); if (sym->st_name != 0) { - _cc->addImport((void**)(base + r->r_offset), strtab + sym->st_name); + const char* sym_name = strAt(strtab, strsz, sym->st_name); + if (sym_name != NULL) { + _cc->addImport((void**)(base + r->r_offset), sym_name); + } } } } @@ -603,14 +711,11 @@ void ElfParser::parseDwarfInfo() { // Compute image_end from the highest end address of all LOAD segments so // the DWARF parser can validate FDE pointers against mapped memory. const char* image_end = _base; - { - const char* pheaders = (const char*)_header + _header->e_phoff; - for (int i = 0; i < _header->e_phnum; i++) { - ElfProgramHeader* ph = (ElfProgramHeader*)(pheaders + i * _header->e_phentsize); - if (ph->p_type == PT_LOAD) { - const char* seg_end = at(ph) + ph->p_memsz; - if (seg_end > image_end) image_end = seg_end; - } + for (int i = 0; i < _header->e_phnum; i++) { + ElfProgramHeader* ph = phdrAt(i); + if (ph != NULL && ph->p_type == PT_LOAD) { + const char* seg_end = at(ph) + ph->p_memsz; + if (seg_end > image_end) image_end = seg_end; } } DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr), eh_frame_hdr->p_memsz, @@ -643,10 +748,16 @@ uint32_t ElfParser::getSymbolCount(uint32_t* gnu_hash) { void ElfParser::loadSymbols(bool use_debug) { ElfSection* symtab = findSection(SHT_SYMTAB, ".symtab"); if (symtab != NULL) { - // Parse debug symbols from the original .so - ElfSection* strtab = section(symtab->sh_link); - loadSymbolTable(at(symtab), symtab->sh_size, symtab->sh_entsize, at(strtab)); - _cc->setDebugSymbols(true); + // Parse debug symbols from the original .so. The symbol table and its + // linked string table are file-offset-relative, so every range is + // validated against the mapped image before it is read. + ElfSection* strtab = sectionAt(symtab->sh_link); + const char* symbols = contentAt(symtab, symtab->sh_size); + const char* strings = strtab != NULL ? contentAt(strtab, strtab->sh_size) : NULL; + if (symbols != NULL && strings != NULL) { + loadSymbolTable(symbols, symtab->sh_size, symtab->sh_entsize, strings, strtab->sh_size); + _cc->setDebugSymbols(true); + } } else if (use_debug) { // Try to load symbols from an external debuginfo library loadSymbolsUsingBuildId() || loadSymbolsUsingDebugLink(); @@ -733,12 +844,23 @@ bool ElfParser::loadSymbolsUsingBuildId() { return false; } - ElfNote* note = (ElfNote*)at(section); + // The whole note section must be mapped before reading the note header. + const char* note_base = contentAt(section, section->sh_size); + if (note_base == NULL || section->sh_size < sizeof(ElfNote)) { + return false; + } + ElfNote* note = (ElfNote*)note_base; if (note->n_namesz != 4 || note->n_descsz < 2 || note->n_descsz > 64) { return false; } - const char* build_id = (const char*)note + sizeof(*note) + 4; + // The descriptor (build-id bytes) follows the header and a 4-byte aligned + // "GNU\0" name; ensure it lies inside the note section. + size_t desc_off = sizeof(ElfNote) + 4; + if (desc_off + note->n_descsz > section->sh_size) { + return false; + } + const char* build_id = note_base + desc_off; int build_id_len = note->n_descsz; return loadSymbolsFromDebug(build_id, build_id_len) @@ -752,6 +874,13 @@ bool ElfParser::loadSymbolsUsingDebugLink() { return false; } + // The debuglink is a NUL-terminated filename at the start of the section; + // validate it is mapped and terminated before it feeds strcmp()/snprintf(). + const char* debuglink = contentAt(section, section->sh_size); + if (debuglink == NULL || memchr(debuglink, '\0', section->sh_size) == NULL) { + return false; + } + const char* basename = strrchr(_file_name, '/'); if (basename == NULL) { return false; @@ -762,7 +891,6 @@ bool ElfParser::loadSymbolsUsingDebugLink() { return false; } - const char* debuglink = at(section); char path[PATH_MAX]; bool result = false; @@ -786,13 +914,29 @@ bool ElfParser::loadSymbolsUsingDebugLink() { return result; } -void ElfParser::loadSymbolTable(const char* symbols, size_t total_size, size_t ent_size, const char* strings) { +void ElfParser::loadSymbolTable(const char* symbols, size_t total_size, size_t ent_size, const char* strings, size_t strings_size) { + // A stride smaller than one symbol entry would never advance past (or would + // re-read) an entry; reject it to avoid an infinite loop / over-read. + if (ent_size < sizeof(ElfSymbol)) { + return; + } const char* base = this->base(); - for (const char* symbols_end = symbols + total_size; symbols < symbols_end; symbols += ent_size) { - ElfSymbol* sym = (ElfSymbol*)symbols; + // Iterate by a size_t offset rather than incrementing the pointer: a huge + // attacker-controlled ent_size would otherwise overflow `symbols + ent_size` + // to a small pointer that still compares <= end, walking off the image. The + // `ent_size <= total_size - off` form keeps off <= total_size with no overflow. + for (size_t off = 0; ent_size <= total_size - off; off += ent_size) { + ElfSymbol* sym = (ElfSymbol*)(symbols + off); if (sym->st_name != 0 && sym->st_value != 0) { + // Resolve the name through the bounded string table; a bad st_name + // offset (or unterminated string) drops the symbol instead of reading + // out of bounds. + const char* sym_name = strAt(strings, strings_size, sym->st_name); + if (sym_name == NULL) { + continue; + } // Skip special AArch64 mapping symbols: $x and $d - if (sym->st_size != 0 || sym->st_info != 0 || strings[sym->st_name] != '$') { + if (sym->st_size != 0 || sym->st_info != 0 || sym_name[0] != '$') { const char* addr; if (base != NULL) { // Check for overflow when adding sym->st_value to base @@ -814,36 +958,65 @@ void ElfParser::loadSymbolTable(const char* symbols, size_t total_size, size_t e } else { addr = (const char*)sym->st_value; } - _cc->add(addr, (int)sym->st_size, strings + sym->st_name); + _cc->add(addr, (int)sym->st_size, sym_name); } } } } void ElfParser::addRelocationSymbols(ElfSection* reltab, const char* plt) { - ElfSection* symtab = section(reltab->sh_link); - const char* symbols = at(symtab); + // Resolve and bounds-check the linked symbol and string tables. Any missing + // or out-of-image section aborts relocation naming rather than reading wild + // pointers built from attacker-controlled sh_link / r_info / sh_entsize. + ElfSection* symtab = sectionAt(reltab->sh_link); + ElfSection* strtab = symtab != NULL ? sectionAt(symtab->sh_link) : NULL; + if (symtab == NULL || strtab == NULL) { + return; + } + size_t sym_region = symtab->sh_size; + size_t strings_size = strtab->sh_size; + size_t sym_ent = symtab->sh_entsize; + size_t rel_ent = reltab->sh_entsize; + const char* symbols = contentAt(symtab, sym_region); + const char* strings = contentAt(strtab, strings_size); + size_t reltab_size = reltab->sh_size; + const char* relocations = contentAt(reltab, reltab_size); + if (symbols == NULL || strings == NULL || relocations == NULL + || rel_ent < sizeof(ElfRelocation) + || sym_ent < sizeof(ElfSymbol) + || sym_region < sizeof(ElfSymbol)) { + return; + } - ElfSection* strtab = section(symtab->sh_link); - const char* strings = at(strtab); + // Largest symbol index whose full ElfSymbol still fits in the table. Written + // as a division so the index * sym_ent product can never overflow. + size_t max_sym_index = (sym_region - sizeof(ElfSymbol)) / sym_ent; - const char* relocations = at(reltab); - const char* relocations_end = relocations + reltab->sh_size; - for (; relocations < relocations_end; relocations += reltab->sh_entsize) { - ElfRelocation* r = (ElfRelocation*)relocations; - ElfSymbol* sym = (ElfSymbol*)(symbols + ELF_R_SYM(r->r_info) * symtab->sh_entsize); + // Offset-based iteration (see loadSymbolTable) so a huge rel_ent cannot + // overflow the relocation pointer past the section end. + for (size_t off = 0; rel_ent <= reltab_size - off; off += rel_ent, plt += PLT_ENTRY_SIZE) { + ElfRelocation* r = (ElfRelocation*)(relocations + off); + if (ELF_R_SYM(r->r_info) > max_sym_index) { + continue; + } + ElfSymbol* sym = (ElfSymbol*)(symbols + (size_t)ELF_R_SYM(r->r_info) * sym_ent); char name[256]; if (sym->st_name == 0) { strcpy(name, "@plt"); } else { - const char* sym_name = strings + sym->st_name; - snprintf(name, sizeof(name), "%s%cplt", sym_name, sym_name[0] == '_' && sym_name[1] == 'Z' ? '.' : '@'); + const char* sym_name = strAt(strings, strings_size, sym->st_name); + if (sym_name == NULL) { + continue; // plt advances via the for-increment + } + // sym_name is NUL-terminated within the string table, so sym_name[1] + // is safe to read (it is at worst the terminator). + char sep = sym_name[0] == '_' && sym_name[1] == 'Z' ? '.' : '@'; + snprintf(name, sizeof(name), "%s%cplt", sym_name, sep); name[sizeof(name) - 1] = 0; } _cc->add(plt, PLT_ENTRY_SIZE, name); - plt += PLT_ENTRY_SIZE; } } diff --git a/ddprof-lib/src/test/cpp/dwarf_ut.cpp b/ddprof-lib/src/test/cpp/dwarf_ut.cpp index 421c0191f..e45983696 100644 --- a/ddprof-lib/src/test/cpp/dwarf_ut.cpp +++ b/ddprof-lib/src/test/cpp/dwarf_ut.cpp @@ -190,6 +190,47 @@ TEST(DwarfEhFrame, FdeAugDataOverrun) { delete dwarf; } +// CIE + FDE whose body ends at exactly the last byte of the section (no +// terminator appended). Verifies that _image_end-bounded reads are not +// spuriously rejected when the FDE occupies the full section. +TEST(DwarfEhFrame, FdeAtExactImageBoundary) { + std::vector buf; + appendCie(buf); // 15 bytes + appendFde(buf, 0, 256); // 17 bytes; FDE ends at offset 32 == image_end + ASSERT_EQ(buf.size(), static_cast(32)); + DwarfParser* dwarf = parseBuf(buf); + EXPECT_EQ(dwarf->count(), 2); // normal result; boundary must not be spuriously rejected + free(dwarf->table()); + delete dwarf; +} + +// An FDE where fde_len makes fde_end > _image_end. +// parseFde()'s `fde_end > _image_end` guard must reject it without reading past +// the buffer. Uses the .eh_frame_hdr constructor path (parse → parseFde). +// +// Buffer layout (24 bytes): +// [0-3] .eh_frame_hdr header (version + 3 encoding bytes) +// [4-7] eh_frame_ptr = 0 +// [8-11] fde_count = 1 +// [12-15] table[0].initial_loc = 0 +// [16-19] table[0].fde_ptr = 20 (offset from hdr start to fde_len field below) +// [20-23] fde_len = 100 (fde_end = hdr+24+100 = hdr+124 > image_end=hdr+24) +TEST(DwarfEhFrameHdr, FdeExceedsImageEnd) { + std::vector hdr(24, 0); + hdr[0] = 1; // version + hdr[1] = 0x03; // eh_frame_ptr_enc = DW_EH_PE_udata4 + hdr[2] = 0x03; // fde_count_enc = DW_EH_PE_udata4 + hdr[3] = 0x33; // table_enc = DW_EH_PE_datarel | DW_EH_PE_udata4 + hdr[8] = 1; // fde_count = 1 + hdr[16] = 20; // table[0].fde_ptr: points to the fde_len field below + hdr[20] = 100; // fde_len = 100 → fde_end = hdr+124 > image_end = hdr+24 + + const char* base = reinterpret_cast(hdr.data()); + DwarfParser dwarf("test", base, base, hdr.size(), DwarfParser::EhFrameHdrTag{}, base + hdr.size()); + EXPECT_EQ(dwarf.count(), 0); // rejected: fde_end > image_end, no crash + free(dwarf.table()); +} + // Regression test for the .eh_frame_hdr hardening (found by fuzz_dwarf). // A hostile .eh_frame_hdr can claim a large fde_count while providing no // binary-search table; pre-hardening, parse() walked `table[i*2]` off the end diff --git a/ddprof-lib/src/test/cpp/elfparser_ut.cpp b/ddprof-lib/src/test/cpp/elfparser_ut.cpp index 7fea05090..c0e7cf584 100644 --- a/ddprof-lib/src/test/cpp/elfparser_ut.cpp +++ b/ddprof-lib/src/test/cpp/elfparser_ut.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -362,6 +363,17 @@ INSTANTIATE_TEST_SUITE_P( ); #endif +// ===================================================================== +// Regression tests for the ELF parser hardening (found by the fuzz_elf +// harness). Each builds a minimal ELF whose single malformed field made the +// pre-hardening parser read out of bounds. On the hardened parser they must +// return cleanly: the global crash handler installed above turns any wild or +// out-of-bounds access back into a gtest failure, so a regression fails CI. +// +// The exact byte layouts were confirmed to crash the pre-fix parser +// (ASan SEGV / heap-buffer-overflow) and to pass after the fix. +// ===================================================================== + namespace { // Minimal valid ELF64 header; callers set the malformed field afterwards. @@ -379,10 +391,28 @@ Elf64_Ehdr validEhdr() { e.e_machine = EM_X86_64; e.e_version = EV_CURRENT; e.e_ehsize = sizeof(Elf64_Ehdr); - e.e_shstrndx = 1; + e.e_shstrndx = 1; // non-zero so validHeader() accepts the image return e; } +// Write the bytes to a unique temp file and run ElfParser::parseFile over it, +// mirroring how Symbols::parseLibraries() parses an on-disk library. +void parseElfBytes(const std::vector& bytes) { + char path[] = "/tmp/elf_regress_XXXXXX"; + int fd = mkstemp(path); + ASSERT_NE(fd, -1); + ssize_t written = write(fd, bytes.data(), bytes.size()); + close(fd); + if (written != (ssize_t)bytes.size()) { + unlink(path); + FAIL() << "short write to " << path; + return; + } + CodeCache cc("regress"); + ElfParser::parseFile(&cc, nullptr, path, /*use_debug=*/false); + unlink(path); +} + } // namespace // Regression test for the build-id parser hardening (found by fuzz_elf). @@ -413,4 +443,134 @@ TEST(ElfBuildId, noteOffsetOverflow) { delete[] buf; } +// e_shoff pointing far outside the image made findSection() dereference a wild +// section-header pointer (ElfParser::at). 16 TB is reliably unmapped. +TEST_F(ElfTest, sectionHeaderOffsetOutOfBounds) { + Elf64_Ehdr e = validEhdr(); + e.e_shoff = 0x100000000000ULL; // 16 TB past a 64-byte file + e.e_shentsize = sizeof(Elf64_Shdr); + e.e_shnum = 3; + e.e_shstrndx = 1; + std::vector bytes(reinterpret_cast(&e), + reinterpret_cast(&e) + sizeof(e)); + parseElfBytes(bytes); // must not crash +} + +// A .symtab whose sh_size claims 256 MB in a tiny file made loadSymbolTable() +// walk the symbol table off the end of the mapping. +TEST_F(ElfTest, symbolTableSizeOutOfBounds) { + const uint16_t NSEC = 4; + const uint64_t shoff = sizeof(Elf64_Ehdr); + const uint64_t shstr_off = shoff + NSEC * sizeof(Elf64_Shdr); + // Section-header string table: names at offsets 1, 9, 17. + const char shstrtab[] = "\0.symtab\0.strtab\0.shstrtab"; + const uint64_t sym_off = shstr_off + sizeof(shstrtab); + Elf64_Sym sym; + memset(&sym, 0, sizeof(sym)); + sym.st_name = 1; + sym.st_value = 0x1000; + const uint64_t str_off = sym_off + sizeof(sym); + const char strtab[] = "\0main"; + + Elf64_Ehdr e = validEhdr(); + e.e_shoff = shoff; + e.e_shentsize = sizeof(Elf64_Shdr); + e.e_shnum = NSEC; + e.e_shstrndx = 3; + + Elf64_Shdr sh[4]; + memset(sh, 0, sizeof(sh)); + sh[1].sh_name = 1; // ".symtab" + sh[1].sh_type = SHT_SYMTAB; + sh[1].sh_offset = sym_off; + sh[1].sh_size = 0x10000000; // 256 MB: far past the file + sh[1].sh_link = 2; + sh[1].sh_entsize = sizeof(Elf64_Sym); + sh[2].sh_name = 9; // ".strtab" + sh[2].sh_type = SHT_STRTAB; + sh[2].sh_offset = str_off; + sh[2].sh_size = sizeof(strtab); + sh[3].sh_name = 17; // ".shstrtab" + sh[3].sh_type = SHT_STRTAB; + sh[3].sh_offset = shstr_off; + sh[3].sh_size = sizeof(shstrtab); + + std::vector b; + auto app = [&](const void* p, size_t n) { + const char* c = static_cast(p); + b.insert(b.end(), c, c + n); + }; + app(&e, sizeof(e)); + app(sh, sizeof(sh)); + app(shstrtab, sizeof(shstrtab)); + app(&sym, sizeof(sym)); + app(strtab, sizeof(strtab)); + parseElfBytes(b); // must not crash +} + +// A large e_phoff causes phdrAt() to try forming a pointer past the image. +// The bounds check must reject it before any dereference. +TEST_F(ElfTest, programHeaderOffsetOutOfBounds) { + Elf64_Ehdr e = validEhdr(); + e.e_phoff = 0x100000000000ULL; // 16 TB: reliably unmapped + e.e_phentsize = sizeof(Elf64_Phdr); + e.e_phnum = 1; + std::vector bytes(reinterpret_cast(&e), + reinterpret_cast(&e) + sizeof(e)); + parseElfBytes(bytes); // must not crash +} + +// strAt() bounds check: a symbol whose st_name equals strtab_size (one past +// the end) must be skipped without reading out of bounds. +TEST_F(ElfTest, symbolNameOffsetOutOfBounds) { + const uint16_t NSEC = 4; + const uint64_t shoff = sizeof(Elf64_Ehdr); + const uint64_t shstr_off = shoff + NSEC * sizeof(Elf64_Shdr); + const char shstrtab[] = "\0.symtab\0.strtab\0.shstrtab"; + const uint64_t sym_off = shstr_off + sizeof(shstrtab); + Elf64_Sym sym; + memset(&sym, 0, sizeof(sym)); + sym.st_name = 6; // == sizeof(strtab) below: one past the end + sym.st_value = 0x1000; + sym.st_size = 4; + const uint64_t str_off = sym_off + sizeof(sym); + const char strtab[] = "\0main\0"; // 6 bytes; index 6 is out of bounds + + Elf64_Ehdr e = validEhdr(); + e.e_shoff = shoff; + e.e_shentsize = sizeof(Elf64_Shdr); + e.e_shnum = NSEC; + e.e_shstrndx = 3; + + Elf64_Shdr sh[4]; + memset(sh, 0, sizeof(sh)); + sh[1].sh_name = 1; // ".symtab" + sh[1].sh_type = SHT_SYMTAB; + sh[1].sh_offset = sym_off; + sh[1].sh_size = sizeof(sym); // exactly one entry, within image + sh[1].sh_link = 2; + sh[1].sh_entsize = sizeof(Elf64_Sym); + sh[2].sh_name = 9; // ".strtab" + sh[2].sh_type = SHT_STRTAB; + sh[2].sh_offset = str_off; + sh[2].sh_size = sizeof(strtab); + sh[3].sh_name = 17; // ".shstrtab" + sh[3].sh_type = SHT_STRTAB; + sh[3].sh_offset = shstr_off; + sh[3].sh_size = sizeof(shstrtab); + + std::vector b; + auto app = [&](const void* p, size_t n) { + const char* c = static_cast(p); + b.insert(b.end(), c, c + n); + }; + app(&e, sizeof(e)); + app(sh, sizeof(sh)); + app(shstrtab, sizeof(shstrtab)); + app(&sym, sizeof(sym)); + app(strtab, sizeof(strtab)); + // strtab ends at image_size: also exercises inImage() equality case. + parseElfBytes(b); // must not crash: strAt() rejects st_name == strtab_size +} + #endif //__linux__ diff --git a/ddprof-lib/src/test/fuzz/corpus/fuzz_elf/bug1_section_header_oob_1 b/ddprof-lib/src/test/fuzz/corpus/fuzz_elf/bug1_section_header_oob_1 new file mode 100644 index 000000000..44f43e459 Binary files /dev/null and b/ddprof-lib/src/test/fuzz/corpus/fuzz_elf/bug1_section_header_oob_1 differ diff --git a/ddprof-lib/src/test/fuzz/corpus/fuzz_elf/bug1_section_header_oob_2 b/ddprof-lib/src/test/fuzz/corpus/fuzz_elf/bug1_section_header_oob_2 new file mode 100644 index 000000000..6aaa68de4 Binary files /dev/null and b/ddprof-lib/src/test/fuzz/corpus/fuzz_elf/bug1_section_header_oob_2 differ diff --git a/ddprof-lib/src/test/fuzz/corpus/fuzz_elf/bug2_symtab_size_oob b/ddprof-lib/src/test/fuzz/corpus/fuzz_elf/bug2_symtab_size_oob new file mode 100644 index 000000000..db44f53f9 Binary files /dev/null and b/ddprof-lib/src/test/fuzz/corpus/fuzz_elf/bug2_symtab_size_oob differ diff --git a/ddprof-lib/src/test/fuzz/fuzz_elf.cpp b/ddprof-lib/src/test/fuzz/fuzz_elf.cpp new file mode 100644 index 000000000..92f920262 --- /dev/null +++ b/ddprof-lib/src/test/fuzz/fuzz_elf.cpp @@ -0,0 +1,133 @@ +/* + * Copyright 2026, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + * + * libFuzzer fuzz target for the Linux ELF parsing code in symbols_linux.cpp. + * + * The profiler parses ELF images for every shared library loaded into the JVM + * (see Symbols::parseLibraries). A corrupt or malicious .so therefore feeds + * fully attacker-controlled bytes into two distinct parsers, both exercised + * here from a single ELF-blob corpus: + * + * 1. SymbolsLinux::extractBuildIdFromMemory(base, size, &len) + * An in-memory parser that walks the program-header table and PT_NOTE + * segments to recover the GNU build-id. It guards its reads with manual + * bounds checks such as `e_phoff + e_phnum * sizeof(Phdr) > elf_size` + * and `p_offset + p_filesz > elf_size` — both u64 additions that can + * wrap and defeat the check. Driven against a tight heap buffer so ASan + * catches any over-read precisely. + * + * 2. ElfParser::parseFile (via Symbols::parseElfFileForFuzzing) + * The core symbol/section/relocation loader: validHeader(), findSection() + * (indexes the section header table by attacker-controlled e_shoff / + * e_shentsize / e_shstrndx), loadSymbolTable() (iterates sh_size bytes in + * sh_entsize strides, indexes the string table by st_name) and, with + * use_debug, addRelocationSymbols(). parseFile() mmaps a real file, so the + * harness materialises the input as a temp file to mirror production + * exactly. + * + * Expected bug classes: + * - Integer overflow in the build-id bounds checks -> heap over-read + * - Out-of-bounds section/symbol/string-table reads from bad offsets + * - Infinite loop / DoS from a zero sh_entsize stride + * - Memory corruption in the malloc'd hex build-id string + * + * use_debug is enabled to reach the .plt/.rela.plt relocation path. The + * external debuginfo lookups (build-id / debuglink) it can also trigger are + * neutralised by clearing the relevant environment variables, so the fuzzer + * never fans out to unrelated files on disk. + */ + +#include +#include +#include +#include +#include +#include + +#include "codeCache.h" +#include "symbols_linux.h" // SymbolsLinux::extractBuildIdFromMemory + +// ElfParser is a translation-unit-local class in symbols_linux.cpp; forward- +// declare its public static parseFile() so the harness can drive it directly, +// mirroring how elfparser_ut.cpp reaches it (no production-side hook needed). +class ElfParser { + public: + static bool parseFile(CodeCache* cc, const char* base, const char* file_name, bool use_debug); +}; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +namespace { + +// One-time process setup: neutralise the external debuginfo lookups that +// ElfParser::loadSymbols() performs when use_debug is set and the image has no +// .symtab. With these unset, getDebuginfodCache() yields nothing and the +// /usr/lib/debug build-id path simply fails to open — the fuzzer stays focused +// on in-image parsing instead of walking the host filesystem. +struct ElfFuzzSetup { + int fd = -1; + char path[64]; + + ElfFuzzSetup() { + unsetenv("DEBUGINFOD_CACHE_PATH"); + unsetenv("XDG_CACHE_HOME"); + unsetenv("HOME"); + + // A single reusable temp file backs the parseFile() path; truncated and + // rewritten each iteration to avoid per-iteration mkstemp churn. + strcpy(path, "/tmp/fuzz_elf_XXXXXX"); + fd = mkstemp(path); + } + + ~ElfFuzzSetup() { + if (fd != -1) { + close(fd); + unlink(path); + } + } +}; + +ElfFuzzSetup g_setup; + +} // namespace +#endif + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size == 0) { + return 0; + } + + // Real shared libraries are well under this; cap to bound memory/time. + if (size > 4 * 1024 * 1024) { + size = 4 * 1024 * 1024; + } + + // --- Parser 1: in-memory build-id extraction (ASan-tight heap buffer) --- + // Copy into an exact-sized allocation so any over-read past `size` is an + // immediate ASan heap-buffer-overflow rather than a silent read into slack. + uint8_t *buf = (uint8_t *)malloc(size); + if (buf != nullptr) { + memcpy(buf, data, size); + size_t build_id_len = 0; + char *hex = SymbolsLinux::extractBuildIdFromMemory(buf, size, &build_id_len); + free(hex); // buildIdToHex() returns a malloc'd string (or NULL) + free(buf); + } + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // --- Parser 2: full symbol/section/relocation loader (production mmap path) --- + if (g_setup.fd != -1) { + if (ftruncate(g_setup.fd, 0) == 0 && + pwrite(g_setup.fd, data, size, 0) == (ssize_t)size) { + CodeCache cc("fuzz_elf"); + // base==NULL keeps symbol addresses as raw st_value (never + // dereferenced) and makes calcVirtualLoadAddress() a no-op, so the + // only reads are file-offset-relative into the mmap'd image — the + // untrusted parsing surface we want to exercise. + ElfParser::parseFile(&cc, /*base=*/NULL, g_setup.path, /*use_debug=*/true); + } + } +#endif + + return 0; +}