diff --git a/patchelf.1 b/patchelf.1 index 3d46f1fb..6a8a94e1 100644 --- a/patchelf.1 +++ b/patchelf.1 @@ -123,6 +123,14 @@ Clears the executable flag of the GNU_STACK program header, or adds a new header .IP "--set-execstack" Sets the executable flag of the GNU_STACK program header, or adds a new header. +.IP "--rename-dynamic-symbols NAME_MAP_FILE" +Renames dynamic symbols. The name map file should contain lines +with the old and the new name separated by spaces like this: + +old_name new_name + +Symbol names do not contain version specifier that are also shown in the output of the nm -D command from binutils. So instead of the name write@GLIBC_2.2.5 it is just write. + .IP "--output FILE" Set the output file name. If not specified, the input will be modified in place. diff --git a/src/patchelf.cc b/src/patchelf.cc index 126cada8..db18837b 100644 --- a/src/patchelf.cc +++ b/src/patchelf.cc @@ -17,17 +17,19 @@ */ #include +#include #include #include #include +#include #include #include #include #include #include #include +#include #include -#include #include #include @@ -599,6 +601,27 @@ std::optional> ElfFile return {}; } +template +template +span ElfFile::getSectionSpan(const Elf_Shdr & shdr) const +{ + return span((T*)(fileContents->data() + rdi(shdr.sh_offset)), rdi(shdr.sh_size)/sizeof(T)); +} + +template +template +span ElfFile::getSectionSpan(const SectionName & sectionName) +{ + return getSectionSpan(findSectionHeader(sectionName)); +} + +template +template +span ElfFile::tryGetSectionSpan(const SectionName & sectionName) +{ + auto shdrOpt = tryFindSectionHeader(sectionName); + return shdrOpt ? getSectionSpan(*shdrOpt) : span(); +} template unsigned int ElfFile::getSectionIndex(const SectionName & sectionName) const @@ -1910,6 +1933,220 @@ void ElfFile::addDebugTag() changed = true; } +static uint32_t gnuHash(std::string_view name) { + uint32_t h = 5381; + for (uint8_t c : name) + h = ((h << 5) + h) + c; + return h; +} + +template +auto ElfFile::parseGnuHashTable(span sectionData) -> GnuHashTable +{ + auto hdr = (typename GnuHashTable::Header*)sectionData.begin(); + auto bloomFilters = span((typename GnuHashTable::BloomWord*)(hdr+1), rdi(hdr->maskwords)); + auto buckets = span((uint32_t*)bloomFilters.end(), rdi(hdr->numBuckets)); + auto table = span(buckets.end(), ((uint32_t*)sectionData.end()) - buckets.end()); + return GnuHashTable{*hdr, bloomFilters, buckets, table}; +} + +template +void ElfFile::rebuildGnuHashTable(span strTab, span dynsyms) +{ + auto sectionData = tryGetSectionSpan(".gnu.hash"); + if (!sectionData) + return; + + auto ght = parseGnuHashTable(sectionData); + + // We can't trust the value of symndx when the hash table is empty + if (ght.m_table.size() == 0) + return; + + // The hash table includes only a subset of dynsyms + auto firstSymIdx = rdi(ght.m_hdr.symndx); + dynsyms = span(&dynsyms[firstSymIdx], dynsyms.end()); + + // Only use the range of symbol versions that will be changed + auto versyms = tryGetSectionSpan(".gnu.version"); + if (versyms) + versyms = span(&versyms[firstSymIdx], versyms.end()); + + struct Entry + { + uint32_t hash, bucketIdx, originalPos; + }; + + std::vector entries; + entries.reserve(dynsyms.size()); + + uint32_t pos = 0; // Track the original position of the symbol in the table + for (auto& sym : dynsyms) + { + Entry e; + e.hash = gnuHash(&strTab[rdi(sym.st_name)]); + e.bucketIdx = e.hash % ght.m_buckets.size(); + e.originalPos = pos++; + entries.push_back(e); + } + + // Sort the entries based on the buckets. This is a requirement for gnu hash table to work + std::sort(entries.begin(), entries.end(), [&] (auto& l, auto& r) { + return l.bucketIdx < r.bucketIdx; + }); + + // Create a map of old positions to new positions after sorting + std::vector old2new(entries.size()); + for (size_t i = 0; i < entries.size(); ++i) + old2new[entries[i].originalPos] = i; + + // Update the symbol table with the new order and + // all tables that refer to symbols through indexes in the symbol table + auto reorderSpan = [] (auto dst, auto& old2new) + { + std::vector tmp(dst.begin(), dst.end()); + for (size_t i = 0; i < tmp.size(); ++i) + dst[old2new[i]] = tmp[i]; + }; + + reorderSpan(dynsyms, old2new); + if (versyms) + reorderSpan(versyms, old2new); + + auto remapSymbolId = [&old2new, firstSymIdx] (auto& oldSymIdx) + { + return oldSymIdx >= firstSymIdx ? old2new[oldSymIdx - firstSymIdx] + firstSymIdx + : oldSymIdx; + }; + + for (unsigned int i = 1; i < rdi(hdr()->e_shnum); ++i) + { + auto& shdr = shdrs.at(i); + auto shtype = rdi(shdr.sh_type); + if (shtype == SHT_REL) + changeRelocTableSymIds(shdr, remapSymbolId); + else if (shtype == SHT_RELA) + changeRelocTableSymIds(shdr, remapSymbolId); + } + + // Update bloom filters + std::fill(ght.m_bloomFilters.begin(), ght.m_bloomFilters.end(), 0); + for (size_t i = 0; i < entries.size(); ++i) + { + auto h = entries[i].hash; + size_t idx = (h / ElfClass) % ght.m_bloomFilters.size(); + auto val = rdi(ght.m_bloomFilters[idx]); + val |= uint64_t(1) << (h % ElfClass); + val |= uint64_t(1) << ((h >> rdi(ght.m_hdr.shift2)) % ElfClass); + wri(ght.m_bloomFilters[idx], val); + } + + // Fill buckets + std::fill(ght.m_buckets.begin(), ght.m_buckets.end(), 0); + for (size_t i = 0; i < entries.size(); ++i) + { + auto symBucketIdx = entries[i].bucketIdx; + if (!ght.m_buckets[symBucketIdx]) + wri(ght.m_buckets[symBucketIdx], i + firstSymIdx); + } + + // Fill hash table + for (size_t i = 0; i < entries.size(); ++i) + { + auto& n = entries[i]; + bool isLast = (i == entries.size() - 1) || (n.bucketIdx != entries[i+1].bucketIdx); + // Add hash with first bit indicating end of chain + wri(ght.m_table[i], isLast ? (n.hash | 1) : (n.hash & ~1)); + } +} + +static uint32_t sysvHash(std::string_view name) { + uint32_t h = 0; + for (uint8_t c : name) + { + h = (h << 4) + c; + uint32_t g = h & 0xf0000000; + if (g != 0) + h ^= g >> 24; + h &= ~g; + } + return h; +} + +template +auto ElfFile::parseHashTable(span sectionData) -> HashTable +{ + auto hdr = (typename HashTable::Header*)sectionData.begin(); + auto buckets = span((uint32_t*)(hdr+1), rdi(hdr->numBuckets)); + auto table = span(buckets.end(), ((uint32_t*)sectionData.end()) - buckets.end()); + return HashTable{*hdr, buckets, table}; +} + +template +void ElfFile::rebuildHashTable(span strTab, span dynsyms) +{ + auto sectionData = tryGetSectionSpan(".hash"); + if (!sectionData) + return; + + auto ht = parseHashTable(sectionData); + + std::fill(ht.m_buckets.begin(), ht.m_buckets.end(), 0); + std::fill(ht.m_chain.begin(), ht.m_chain.end(), 0); + + // The hash table includes only a subset of dynsyms + auto firstSymIdx = dynsyms.size() - ht.m_chain.size(); + dynsyms = span(&dynsyms[firstSymIdx], dynsyms.end()); + + for (auto& sym : dynsyms) + { + auto name = &strTab[rdi(sym.st_name)]; + uint32_t i = &sym - dynsyms.begin(); + uint32_t hash = sysvHash(name) % ht.m_buckets.size(); + wri(ht.m_chain[i], rdi(ht.m_buckets[hash])); + wri(ht.m_buckets[hash], i); + } +} + +template +void ElfFile::renameDynamicSymbols(const std::unordered_map& remap) +{ + auto dynsyms = getSectionSpan(".dynsym"); + auto strTab = getSectionSpan(".dynstr"); + + std::vector extraStrings; + extraStrings.reserve(remap.size() * 30); // Just an estimate + for (auto& dynsym : dynsyms) + { + std::string_view name = &strTab[rdi(dynsym.st_name)]; + auto it = remap.find(name); + if (it != remap.end()) + { + wri(dynsym.st_name, strTab.size() + extraStrings.size()); + auto& newName = it->second; + debug("renaming dynamic symbol %s to %s\n", name.data(), it->second.c_str()); + extraStrings.insert(extraStrings.end(), newName.begin(), newName.end() + 1); + changed = true; + } else { + debug("skip renaming dynamic symbol %sn", name.data()); + } + } + + if (changed) + { + auto newStrTabSize = strTab.size() + extraStrings.size(); + auto& newSec = replaceSection(".dynstr", newStrTabSize); + auto newStrTabSpan = span(newSec.data(), newStrTabSize); + + std::copy(extraStrings.begin(), extraStrings.end(), &newStrTabSpan[strTab.size()]); + + rebuildGnuHashTable(newStrTabSpan, dynsyms); + rebuildHashTable(newStrTabSpan, dynsyms); + } + + this->rewriteSections(); +} + template void ElfFile::clearSymbolVersions(const std::set & syms) { @@ -2032,12 +2269,15 @@ static bool removeRPath = false; static bool setRPath = false; static bool addRPath = false; static bool addDebugTag = false; +static bool renameDynamicSymbols = false; static bool printRPath = false; static std::string newRPath; static std::set neededLibsToRemove; static std::map neededLibsToReplace; static std::set neededLibsToAdd; static std::set symbolsToClearVersion; +static std::unordered_map symbolsToRename; +static std::unordered_set symbolsToRenameKeys; static bool printNeeded = false; static bool noDefaultLib = false; static bool printExecstack = false; @@ -2097,6 +2337,9 @@ static void patchElf2(ElfFile && elfFile, const FileContents & fileContents, con if (addDebugTag) elfFile.addDebugTag(); + if (renameDynamicSymbols) + elfFile.renameDynamicSymbols(symbolsToRename); + if (elfFile.isChanged()){ writeFile(fileName, elfFile.fileContents); } else if (alwaysWrite) { @@ -2116,9 +2359,9 @@ static void patchElf() const std::string & outputFileName2 = outputFileName.empty() ? fileName : outputFileName; if (getElfType(fileContents).is32Bit) - patchElf2(ElfFile(fileContents), fileContents, outputFileName2); + patchElf2(ElfFile(fileContents), fileContents, outputFileName2); else - patchElf2(ElfFile(fileContents), fileContents, outputFileName2); + patchElf2(ElfFile(fileContents), fileContents, outputFileName2); } } @@ -2160,6 +2403,7 @@ static void showHelp(const std::string & progName) [--print-execstack]\t\tPrints whether the object requests an executable stack\n\ [--clear-execstack]\n\ [--set-execstack]\n\ + [--rename-dynamic-symbols NAME_MAP_FILE]\tRenames dynamic symbols. The map file should contain two symbols (old_name new_name) per line\n\ [--output FILE]\n\ [--debug]\n\ [--version]\n\ @@ -2291,6 +2535,31 @@ static int mainWrapped(int argc, char * * argv) else if (arg == "--add-debug-tag") { addDebugTag = true; } + else if (arg == "--rename-dynamic-symbols") { + renameDynamicSymbols = true; + if (++i == argc) error("missing argument"); + + const char* fname = argv[i]; + std::ifstream infile(fname); + if (!infile) error(fmt("Cannot open map file ", fname)); + + std::string line, from, to; + size_t lineCount = 1; + while (std::getline(infile, line)) + { + std::istringstream iss(line); + if (!(iss >> from)) + break; + if (!(iss >> to)) + error(fmt(fname, ":", lineCount, ": Map file line is missing the second element")); + if (symbolsToRenameKeys.count(from)) + error(fmt(fname, ":", lineCount, ": Name '", from, "' appears twice in the map file")); + if (from.find('@') != std::string_view::npos || to.find('@') != std::string_view::npos) + error(fmt(fname, ":", lineCount, ": Name pair contains version tag: ", from, " ", to)); + lineCount++; + symbolsToRename[*symbolsToRenameKeys.insert(from).first] = to; + } + } else if (arg == "--help" || arg == "-h" ) { showHelp(argv[0]); return 0; diff --git a/src/patchelf.h b/src/patchelf.h index 677462d6..9fab18c0 100644 --- a/src/patchelf.h +++ b/src/patchelf.h @@ -10,8 +10,28 @@ using FileContents = std::shared_ptr>; -#define ElfFileParams class Elf_Ehdr, class Elf_Phdr, class Elf_Shdr, class Elf_Addr, class Elf_Off, class Elf_Dyn, class Elf_Sym, class Elf_Verneed, class Elf_Versym -#define ElfFileParamNames Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Addr, Elf_Off, Elf_Dyn, Elf_Sym, Elf_Verneed, Elf_Versym +#define ElfFileParams class Elf_Ehdr, class Elf_Phdr, class Elf_Shdr, class Elf_Addr, class Elf_Off, class Elf_Dyn, class Elf_Sym, class Elf_Verneed, class Elf_Versym, class Elf_Rel, class Elf_Rela, unsigned ElfClass +#define ElfFileParamNames Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Addr, Elf_Off, Elf_Dyn, Elf_Sym, Elf_Verneed, Elf_Versym, Elf_Rel, Elf_Rela, ElfClass + +template +struct span +{ + explicit span(T* d = {}, size_t l = {}) : data(d), len(l) {} + span(T* from, T* to) : data(from), len(to-from) { assert(from <= to); } + T& operator[](std::size_t i) { checkRange(i); return data[i]; } + T* begin() { return data; } + T* end() { return data + len; } + auto size() const { return len; } + explicit operator bool() const { return size() > 0; } + +private: + void checkRange(std::size_t i) { + if (i >= size()) throw std::out_of_range("error: Span access out of range."); + } + + T* data; + size_t len; +}; template class ElfFile @@ -91,6 +111,10 @@ class ElfFile [[nodiscard]] std::optional> tryFindSectionHeader(const SectionName & sectionName) const; + template span getSectionSpan(const Elf_Shdr & shdr) const; + template span getSectionSpan(const SectionName & sectionName); + template span tryGetSectionSpan(const SectionName & sectionName); + [[nodiscard]] unsigned int getSectionIndex(const SectionName & sectionName) const; std::string & replaceSection(const SectionName & sectionName, @@ -143,6 +167,8 @@ class ElfFile void addDebugTag(); + void renameDynamicSymbols(const std::unordered_map&); + void clearSymbolVersions(const std::set & syms); enum class ExecstackMode { print, set, clear }; @@ -150,6 +176,66 @@ class ElfFile void modifyExecstack(ExecstackMode op); private: + struct GnuHashTable { + using BloomWord = Elf_Addr; + struct Header { + uint32_t numBuckets, symndx, maskwords, shift2; + } m_hdr; + span m_bloomFilters; + span m_buckets, m_table; + }; + GnuHashTable parseGnuHashTable(span gh); + + struct HashTable { + struct Header { + uint32_t numBuckets, nchain; + } m_hdr; + span m_buckets, m_chain; + }; + HashTable parseHashTable(span gh); + + void rebuildGnuHashTable(span strTab, span dynsyms); + void rebuildHashTable(span strTab, span dynsyms); + + using Elf_Rel_Info = decltype(Elf_Rel::r_info); + + uint32_t rel_getSymId(const Elf_Rel_Info& info) const + { + if constexpr (std::is_same_v) + return ELF64_R_SYM(info); + else + return ELF32_R_SYM(info); + } + + Elf_Rel_Info rel_setSymId(Elf_Rel_Info info, uint32_t id) const + { + if constexpr (std::is_same_v) + { + constexpr Elf_Rel_Info idmask = (~Elf_Rel_Info()) << 32; + info = (info & ~idmask) | (Elf_Rel_Info(id) << 32); + } + else + { + constexpr Elf_Rel_Info idmask = (~Elf_Rel_Info()) << 8; + info = (info & ~idmask) | (Elf_Rel_Info(id) << 8); + } + return info; + } + + template + void changeRelocTableSymIds(const Elf_Shdr& shdr, RemapFn&& old2newSymId) + { + static_assert(std::is_same_v || std::is_same_v); + + for (auto& r : getSectionSpan(shdr)) + { + auto info = rdi(r.r_info); + auto oldSymIdx = rel_getSymId(info); + auto newSymIdx = old2newSymId(oldSymIdx); + if (newSymIdx != oldSymIdx) + wri(r.r_info, rel_setSymId(info, newSymIdx)); + } + } /* Convert an integer in big or little endian representation (as specified by the ELF header) to this platform's integer diff --git a/tests/Makefile.am b/tests/Makefile.am index 9d366456..4a08c145 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -45,7 +45,9 @@ src_TESTS = \ add-debug-tag.sh \ empty-note.sh \ print-execstack.sh \ - modify-execstack.sh + modify-execstack.sh \ + rename-dynamic-symbols.sh \ + empty-note.sh build_TESTS = \ $(no_rpath_arch_TESTS) @@ -116,7 +118,7 @@ check_DATA = libbig-dynstr.debug # - with libtool, it is difficult to control options # - with libtool, it is not possible to compile convenience *dynamic* libraries :-( check_PROGRAMS += libfoo.so libfoo-scoped.so libbar.so libbar-scoped.so libsimple.so libsimple-execstack.so libbuildid.so libtoomanystrtab.so \ - phdr-corruption.so + phdr-corruption.so many-syms-main libmany-syms.so libbuildid_so_SOURCES = simple.c libbuildid_so_LDFLAGS = $(LDFLAGS_sharedlib) -Wl,--build-id @@ -147,6 +149,14 @@ too_many_strtab_SOURCES = too-many-strtab.c too-many-strtab2.s libtoomanystrtab_so_SOURCES = too-many-strtab.c too-many-strtab2.s libtoomanystrtab_so_LDFLAGS = $(LDFLAGS_sharedlib) +many_syms_main_SOURCES = many-syms-main.c +many_syms_main_LDFLAGS = $(LDFLAGS_local) +many_syms_main_LDADD = -lmany-syms $(AM_LDADD) +many_syms_main_DEPENDENCIES = libmany-syms.so +many_syms_main_CFLAGS = -pie +libmany_syms_so_SOURCES = many-syms.c +libmany_syms_so_LDFLAGS = $(LDFLAGS_sharedlib) + no_rpath_SOURCES = no-rpath.c # no -fpic for no-rpath.o no_rpath_CFLAGS = @@ -158,3 +168,12 @@ contiguous_note_sections_CFLAGS = -pie phdr_corruption_so_SOURCES = void.c phdr-corruption.ld phdr_corruption_so_LDFLAGS = -nostdlib -shared -Wl,-T$(srcdir)/phdr-corruption.ld phdr_corruption_so_CFLAGS = + +many-syms.c: + i=1; while [ $$i -le 2000 ]; do echo "void f$$i() {};"; i=$$(($$i + 1)); done > $@ + +many-syms-main.c: + echo "int main() {" > $@ + i=1; while [ $$i -le 2000 ]; do echo "void f$$i(); f$$i();"; i=$$(($$i + 1)); done >> $@ + echo "}" >> $@ + diff --git a/tests/rename-dynamic-symbols.sh b/tests/rename-dynamic-symbols.sh new file mode 100755 index 00000000..6fabb334 --- /dev/null +++ b/tests/rename-dynamic-symbols.sh @@ -0,0 +1,84 @@ +#!/bin/sh -e +SCRATCH=scratch/$(basename $0 .sh) +PATCHELF=$(readlink -f "../src/patchelf") + +rm -rf ${SCRATCH} +mkdir -p ${SCRATCH} + +full_main_name="${PWD}/many-syms-main" +full_lib_name="${PWD}/libmany-syms.so" +chmod -w $full_lib_name $full_main_name + +suffix="_special_suffix" + +cd ${SCRATCH} + +############################################################################### +# Test that all symbols in the dynamic symbol table will have the expected +# names after renaming. +# Also test that if we rename all symbols back, the symbols are as expected +############################################################################### + +list_symbols() { + nm -D $@ | awk '{ print $NF }' | sed '/^ *$/d' +} + +list_symbols $full_lib_name | cut -d@ -f1 | sort -u | awk "{printf \"%s %s${suffix}\n\",\$1,\$1}" > map +list_symbols $full_lib_name | cut -d@ -f1 | sort -u | awk "{printf \"%s${suffix} %s\n\",\$1,\$1}" > rmap + +${PATCHELF} --rename-dynamic-symbols map --output libmapped.so $full_lib_name +${PATCHELF} --rename-dynamic-symbols rmap --output libreversed.so libmapped.so + +list_symbols $full_lib_name | sort > orig_syms +list_symbols libmapped.so | sort > map_syms +list_symbols libreversed.so | sort > rev_syms + +diff orig_syms rev_syms > diff_orig_syms_rev_syms || exit 1 + +# Renamed symbols that match version numbers will be printed with version instead of them being ommited +# CXXABI10 is printed as CXXABI10 +# but CXXABI10_renamed is printed as CXXABI10_renamed@@CXXABI10 +# awk is used to remove these cases so that we can match the "mapped" symbols to original symbols +sed "s/${suffix}//" map_syms | awk -F @ '{ if ($1 == $2 || $1 == $3) { print $1; } else { print $0; }}' | sort > map_syms_r +diff orig_syms map_syms_r > diff_orig_syms_map_syms_r || exit 1 + +############################################################################### +# Check the relocation tables after renaming +############################################################################### + +print_relocation_table() { + readelf -W -r $1 | awk '{ printf "%s\n",$5 }' | cut -f1 -d@ +} + +print_relocation_table $full_lib_name > orig_rel +print_relocation_table libmapped.so > map_rel +print_relocation_table libreversed.so > rev_rel + +diff orig_rel rev_rel > diff_orig_rel_rev_rel || exit 1 +sed "s/${suffix}//" map_rel > map_rel_r +diff orig_rel map_rel_r > diff_orig_rel_map_rel_r || exit 1 + +############################################################################### +# Test that the hash table is correctly updated. +# For this to work, we need to rename symbols and actually use the library +# Here we: +# 1. Create a map from all symbols in libstdc++.so as "sym sym_special_suffix" +# 2. Copy Patchelf and all of its transitive library dependencies into a new directory +# 3. Rename symbols in Patchelf and all dependencies according to the map +# 4. Run patchelf with the modified dependencies +############################################################################### + +echo "# Create the map" +list_symbols --defined-only $full_lib_name | cut -d@ -f1 | sort -u | awk "{printf \"%s %s${suffix}\n\",\$1,\$1}" > map + +echo "# Copy all dependencies" +mkdir env +cd env +cp $full_lib_name $full_main_name . + +echo "# Apply renaming" +chmod +w * +${PATCHELF} --rename-dynamic-symbols ../map * + +echo "# Run the patched tool and libraries" +env LD_BIND_NOW=1 LD_LIBRARY_PATH=${PWD} ./many-syms-main