From cd20ee982e95313f5279085bde2eecf5e331c236 Mon Sep 17 00:00:00 2001 From: Breno Rodrigues Guimaraes Date: Thu, 16 Feb 2023 13:58:36 -0300 Subject: [PATCH] Add support for symbol name remapping --- src/patchelf.cc | 270 +++++++++++++++++++++++++++++++- src/patchelf.h | 78 ++++++++- tests/Makefile.am | 1 + tests/rename-dynamic-symbols.sh | 75 +++++++++ 4 files changed, 419 insertions(+), 5 deletions(-) create mode 100755 tests/rename-dynamic-symbols.sh diff --git a/src/patchelf.cc b/src/patchelf.cc index 2bb84eb7..be7f6d61 100644 --- a/src/patchelf.cc +++ b/src/patchelf.cc @@ -17,16 +17,18 @@ */ #include +#include #include #include #include +#include #include #include #include #include #include +#include #include -#include #include #include @@ -553,6 +555,27 @@ std::optional> ElfFile::tryF return {}; } +template +template +span ElfFile::getSectionSpan(const Elf_Shdr & shdr) const +{ + return span((T*)(fileContents->data() + rdi(shdr.sh_offset)), rdi(shdr.sh_size)/sizeof(T)); +} + +template +template +span ElfFile::getSectionSpan(const SectionName & sectionName) +{ + return getSectionSpan(findSectionHeader(sectionName)); +} + +template +template +span ElfFile::tryGetSectionSpan(const SectionName & sectionName) +{ + auto shdrOpt = tryFindSectionHeader(sectionName); + return shdrOpt ? getSectionSpan(*shdrOpt) : span(); +} template unsigned int ElfFile::getSectionIndex(const SectionName & sectionName) @@ -1861,6 +1884,221 @@ void ElfFile::addDebugTag() changed = true; } +static uint32_t gnuHash(std::string_view name) { + uint32_t h = 5381; + for (uint8_t c : name) + h = ((h << 5) + h) + c; + return h; +} + +template +auto ElfFile::GnuHashTable::parse(span sectionData) -> GnuHashTable +{ + auto hdr = (Header*)sectionData.begin(); + auto bloomFilters = span((BloomWord*)(hdr+1), hdr->maskwords); + auto buckets = span((uint32_t*)bloomFilters.end(), hdr->numBuckets); + auto table = span(buckets.end(), ((uint32_t*)sectionData.end()) - buckets.end()); + return GnuHashTable{*hdr, bloomFilters, buckets, table}; +} + +template +void ElfFile::rebuildGnuHashTable(const char* strTab, span dynsyms) +{ + auto sectionData = tryGetSectionSpan(".gnu.hash"); + if (!sectionData) + return; + + auto ght = GnuHashTable::parse(sectionData); + + // Only work with the last "m_table.size()" symbols from dynsyms which are the + // symbols that belong to the hash table + auto firstSymIdx = dynsyms.size() - ght.m_table.size(); + auto symsToInsert = span(dynsyms.begin() + firstSymIdx, dynsyms.end()); + + // Only use the range of symbol versions that will be changed + auto versyms = tryGetSectionSpan(".gnu.version"); + if (versyms) + versyms = span(versyms.begin() + firstSymIdx, versyms.end()); + + struct Entry + { + uint32_t hash, bucketIdx, originalPos; + }; + + std::vector entries; + entries.reserve(symsToInsert.size()); + + uint32_t pos = 0; // Track the original position of the symbol in the table + for (auto& sym : symsToInsert) + { + Entry e; + e.hash = gnuHash(strTab + rdi(sym.st_name)); + e.bucketIdx = e.hash % ght.m_hdr.numBuckets; + e.originalPos = pos++; + entries.push_back(e); + } + + // Sort the entries based on the buckets. This is a requirement for gnu hash table to work + std::sort(entries.begin(), entries.end(), [&] (auto& l, auto& r) { + return l.bucketIdx < r.bucketIdx; + }); + + // Create a map of old positions to new positions after sorting + std::vector old2new(entries.size()); + for (size_t i = 0; i < entries.size(); ++i) + old2new[entries[i].originalPos] = i; + + // Update the symbol table with the new order and + // all tables that refer to symbols through indexes in the symbol table + auto reorderSpan = [] (auto dst, auto& old2new) + { + std::vector tmp(dst.begin(), dst.end()); + for (size_t i = 0; i < tmp.size(); ++i) + dst[old2new[i]] = tmp[i]; + }; + + reorderSpan(symsToInsert, old2new); + if (versyms) + reorderSpan(versyms, old2new); + + auto fixRelocationTable = [&old2new, firstSymIdx, this] (auto& hdr) + { + auto rela = getSectionSpan(hdr); + for (auto& r : rela) + { + auto info = rdi(r.r_info); + auto oldSymIdx = rel_getSymId(info); + if (oldSymIdx >= firstSymIdx) + { + auto newSymIdx = old2new[oldSymIdx - firstSymIdx] + firstSymIdx; + if (newSymIdx != oldSymIdx) { + wri(r.r_info, rel_setSymId(info, newSymIdx)); + } + } + } + }; + + for (unsigned int i = 1; i < rdi(hdr()->e_shnum); ++i) + { + auto& shdr = shdrs.at(i); + auto shtype = rdi(shdr.sh_type); + if (shtype == SHT_REL) + fixRelocationTable.template operator()(shdr); + else if (shtype == SHT_RELA) + fixRelocationTable.template operator()(shdr); + } + + // Update bloom filters + std::fill(ght.m_bloomFilters.begin(), ght.m_bloomFilters.end(), 0); + for (size_t i = 0; i < entries.size(); ++i) + { + auto h = entries[i].hash; + size_t idx = (h / ElfWordSize) % ght.m_bloomFilters.size(); + auto val = rdi(ght.m_bloomFilters[idx]); + val |= uint64_t(1) << (h % ElfWordSize); + val |= uint64_t(1) << ((h >> ght.m_hdr.shift2) % ElfWordSize); + wri(ght.m_bloomFilters[idx], val); + } + + // Fill buckets + std::fill(ght.m_buckets.begin(), ght.m_buckets.end(), 0); + for (size_t i = 0; i < entries.size(); ++i) + { + auto symBucketIdx = entries[i].bucketIdx; + if (!ght.m_buckets[symBucketIdx]) + ght.m_buckets[symBucketIdx] = i + ght.m_hdr.symndx; + } + + // Fill hash table + for (size_t i = 0; i < entries.size(); ++i) + { + auto& n = entries[i]; + bool isLast = (i == entries.size() - 1) || (n.bucketIdx != entries[i+1].bucketIdx); + // Add hash with first bit indicating end of chain + ght.m_table[i] = isLast ? (n.hash | 1) : (n.hash & ~1); + } +} + +static uint32_t sysvHash(std::string_view name) { + uint32_t h = 0; + for (uint8_t c : name) + { + h = (h << 4) + c; + uint32_t g = h & 0xf0000000; + if (g != 0) + h ^= g >> 24; + h &= ~g; + } + return h; +} + +template +auto ElfFile::HashTable::parse(span sectionData) -> HashTable +{ + auto hdr = (Header*)sectionData.begin(); + auto buckets = span((uint32_t*)(hdr+1), hdr->numBuckets); + auto table = span(buckets.end(), ((uint32_t*)sectionData.end()) - buckets.end()); + return HashTable{*hdr, buckets, table}; +} + +template +void ElfFile::rebuildHashTable(const char* strTab, span dynsyms) +{ + auto sectionData = tryGetSectionSpan(".hash"); + if (!sectionData) + return; + + auto ht = HashTable::parse(sectionData); + + std::fill(ht.m_buckets.begin(), ht.m_buckets.end(), 0); + std::fill(ht.m_chain.begin(), ht.m_chain.end(), 0); + + auto symsToInsert = span(dynsyms.end() - ht.m_chain.size(), dynsyms.end()); + + for (auto& sym : symsToInsert) + { + auto name = strTab + rdi(sym.st_name); + uint32_t i = &sym - dynsyms.begin(); + uint32_t hash = sysvHash(name) % ht.m_buckets.size(); + ht.m_chain[i] = ht.m_buckets[hash]; + ht.m_buckets[hash] = i; + } +} + +template +void ElfFile::renameDynamicSymbols(const std::unordered_map& remap) +{ + auto dynsyms = getSectionSpan(".dynsym"); + auto strTab = getSectionSpan(".dynstr"); + + std::vector extraStrings; + extraStrings.reserve(remap.size() * 30); // Just an estimate + for (size_t i = 0; i < dynsyms.size(); i++) + { + auto& dynsym = dynsyms[i]; + std::string_view name = &strTab[rdi(dynsym.st_name)]; + auto it = remap.find(name); + if (it != remap.end()) + { + wri(dynsym.st_name, strTab.size() + extraStrings.size()); + auto& newName = it->second; + extraStrings.insert(extraStrings.end(), newName.data(), newName.data() + newName.size() + 1); + changed = true; + } + } + + if (changed) + { + auto& newSec = replaceSection(".dynstr", strTab.size() + extraStrings.size()); + std::copy(extraStrings.begin(), extraStrings.end(), newSec.begin() + strTab.size()); + + rebuildGnuHashTable(newSec.data(), dynsyms); + rebuildHashTable(newSec.data(), dynsyms); + } + + this->rewriteSections(); +} + template void ElfFile::clearSymbolVersions(const std::set & syms) { @@ -1904,12 +2142,15 @@ static bool removeRPath = false; static bool setRPath = false; static bool addRPath = false; static bool addDebugTag = false; +static bool renameDynamicSymbols = false; static bool printRPath = false; static std::string newRPath; static std::set neededLibsToRemove; static std::map neededLibsToReplace; static std::set neededLibsToAdd; static std::set symbolsToClearVersion; +static std::unordered_map symbolsToRename; +static std::unordered_set symbolsToRenameKeys; static bool printNeeded = false; static bool noDefaultLib = false; @@ -1959,6 +2200,9 @@ static void patchElf2(ElfFile && elfFile, const FileContents & fileContents, con if (addDebugTag) elfFile.addDebugTag(); + if (renameDynamicSymbols) + elfFile.renameDynamicSymbols(symbolsToRename); + if (elfFile.isChanged()){ writeFile(fileName, elfFile.fileContents); } else if (alwaysWrite) { @@ -1978,9 +2222,9 @@ static void patchElf() const std::string & outputFileName2 = outputFileName.empty() ? fileName : outputFileName; if (getElfType(fileContents).is32Bit) - patchElf2(ElfFile(fileContents), fileContents, outputFileName2); + patchElf2(ElfFile(fileContents), fileContents, outputFileName2); else - patchElf2(ElfFile(fileContents), fileContents, outputFileName2); + patchElf2(ElfFile(fileContents), fileContents, outputFileName2); } } @@ -2019,6 +2263,7 @@ void showHelp(const std::string & progName) [--no-sort]\t\tDo not sort program+section headers; useful for debugging patchelf.\n\ [--clear-symbol-version SYMBOL]\n\ [--add-debug-tag]\n\ + [--rename-dynamic-symbols NAME_MAP_FILE]\tRenames dynamic symbols. The name map file should contain two symbols (old_name new_name) per line\n\ [--output FILE]\n\ [--debug]\n\ [--version]\n\ @@ -2141,6 +2386,25 @@ int mainWrapped(int argc, char * * argv) else if (arg == "--add-debug-tag") { addDebugTag = true; } + else if (arg == "--rename-dynamic-symbols") { + renameDynamicSymbols = true; + if (++i == argc) error("missing argument"); + + std::ifstream infile(argv[i]); + if (!infile) error(fmt("Cannot open map file ", argv[i])); + + std::string from, to; + while (true) + { + if (!(infile >> from)) + break; + if (!(infile >> to)) + error("Odd number of symbols in map file"); + if (symbolsToRenameKeys.count(from)) + error(fmt("Symbol appears twice in the map file: ", from.c_str())); + symbolsToRename[*symbolsToRenameKeys.insert(from).first] = to; + } + } else if (arg == "--help" || arg == "-h" ) { showHelp(argv[0]); return 0; diff --git a/src/patchelf.h b/src/patchelf.h index f4eec6f2..a234986a 100644 --- a/src/patchelf.h +++ b/src/patchelf.h @@ -1,7 +1,22 @@ using FileContents = std::shared_ptr>; -#define ElfFileParams class Elf_Ehdr, class Elf_Phdr, class Elf_Shdr, class Elf_Addr, class Elf_Off, class Elf_Dyn, class Elf_Sym, class Elf_Verneed, class Elf_Versym -#define ElfFileParamNames Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Addr, Elf_Off, Elf_Dyn, Elf_Sym, Elf_Verneed, Elf_Versym +#define ElfFileParams class Elf_Ehdr, class Elf_Phdr, class Elf_Shdr, class Elf_Addr, class Elf_Off, class Elf_Dyn, class Elf_Sym, class Elf_Verneed, class Elf_Versym, class Elf_Rel, class Elf_Rela +#define ElfFileParamNames Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Addr, Elf_Off, Elf_Dyn, Elf_Sym, Elf_Verneed, Elf_Versym, Elf_Rel, Elf_Rela + +template +struct span +{ + span(T* d = {}, size_t l = {}) : data(d), len(l) {} + span(T* from, T* to) : data(from), len(to-from) {} + T& operator[](std::size_t i) { return data[i]; } + T* begin() { return data; } + T* end() { return data + len; } + auto size() { return len; } + explicit operator bool() { return size() > 0; } + + T* data; + size_t len; +}; template class ElfFile @@ -85,6 +100,10 @@ class ElfFile std::optional> tryFindSectionHeader(const SectionName & sectionName); + template span getSectionSpan(const Elf_Shdr & shdr) const; + template span getSectionSpan(const SectionName & sectionName); + template span tryGetSectionSpan(const SectionName & sectionName); + unsigned int getSectionIndex(const SectionName & sectionName); std::string & replaceSection(const SectionName & sectionName, @@ -137,6 +156,61 @@ class ElfFile void addDebugTag(); + void renameDynamicSymbols(const std::unordered_map&); + + static constexpr auto ElfWordSize = std::is_same_v ? 64 : 32; + struct GnuHashTable + { + using BloomWord = std::conditional_t; + + static GnuHashTable parse(span gh); + + struct Header { + uint32_t numBuckets, symndx, maskwords, shift2; + } m_hdr; + span m_bloomFilters; + span m_buckets, m_table; + }; + + void rebuildGnuHashTable(const char* strTab, span dynsyms); + + struct HashTable + { + static HashTable parse(span gh); + + struct Header { + uint32_t numBuckets, nchain; + } m_hdr; + span m_buckets, m_chain; + }; + void rebuildHashTable(const char* strTab, span dynsyms); + + using Elf_Rel_Info = decltype(Elf_Rel::r_info); + + uint32_t rel_getSymId(const Elf_Rel_Info& info) const + { + if constexpr (std::is_same_v) + return ELF64_R_SYM(info); + else + return ELF32_R_SYM(info); + } + + Elf_Rel_Info rel_setSymId(Elf_Rel_Info info, uint32_t id) const + { + if constexpr (std::is_same_v) + { + constexpr Elf_Rel_Info idmask = (~Elf_Rel_Info()) << 32; + info = (info & ~idmask) | (Elf_Rel_Info(id) << 32); + } + else + { + constexpr Elf_Rel_Info idmask = (~Elf_Rel_Info()) << 8; + info = (info & ~idmask) | (Elf_Rel_Info(id) << 8); + } + return info; + } + + void clearSymbolVersions(const std::set & syms); private: diff --git a/tests/Makefile.am b/tests/Makefile.am index 219f238d..5ef11d6f 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -43,6 +43,7 @@ src_TESTS = \ replace-needed.sh \ replace-add-needed.sh \ add-debug-tag.sh \ + rename-dynamic-symbols.sh \ empty-note.sh build_TESTS = \ diff --git a/tests/rename-dynamic-symbols.sh b/tests/rename-dynamic-symbols.sh new file mode 100755 index 00000000..505ab259 --- /dev/null +++ b/tests/rename-dynamic-symbols.sh @@ -0,0 +1,75 @@ +#! /bin/sh -e +SCRATCH=scratch/$(basename $0 .sh) +PATCHELF=$(readlink -f "../src/patchelf") + +rm -rf ${SCRATCH} +mkdir -p ${SCRATCH} + +# Use the c++ standard library used by patchelf for our testing +# It helps exercising a large number of symbols, versioning and validate +# the feature in a more real scenario +full_lib_name=$(ldd ${PATCHELF} | awk '/ => / { print $3 }' | grep "c++") +lib_name="$(basename $full_lib_name)" +suffix="_special_suffix" + +cd ${SCRATCH} + +############################################################################### +# Test that all symbols in the dynamic symbol table will have the expected +# names after renaming. +# Also test that if we rename all symbols back, the symbols are as expected +############################################################################### + +function list_symbols { + nm -D $@ | awk '{ print $NF }' | sed '/^ *$/d' +} + +list_symbols $full_lib_name | cut -d@ -f1 | sort -u | awk "{printf \"%s %s${suffix}\n\",\$1,\$1}" > map +list_symbols $full_lib_name | cut -d@ -f1 | sort -u | awk "{printf \"%s${suffix} %s\n\",\$1,\$1}" > rmap + +${PATCHELF} --rename-dynamic-symbols map --output libmapped.so $full_lib_name +${PATCHELF} --rename-dynamic-symbols rmap --output libreversed.so libmapped.so + +list_symbols $full_lib_name | sort > orig_syms +list_symbols libmapped.so | sort > map_syms +list_symbols libreversed.so | sort > rev_syms + +diff orig_syms rev_syms > diff_orig_syms_rev_syms || exit 1 + +# Renamed symbols that match version numbers will be printed with version instead of them being ommited +# CXXABI10 is printed as CXXABI10 +# but CXXABI10_renamed is printed as CXXABI10_renamed@@CXXABI10 +# awk is used to remove these cases so that we can match the "mapped" symbols to original symbols +sed "s/${suffix}//" map_syms | awk -F @ '{ if ($1 == $2 || $1 == $3) { print $1; } else { print $0; }}' | sort > map_syms_r +diff orig_syms map_syms_r > diff_orig_syms_map_syms_r || exit 1 + +############################################################################### +# Check the relocation tables after renaming +############################################################################### + +function print_relocation_table { + readelf -W -r $1 | awk '{ printf "%s\n",$5 }' | cut -f1 -d@ +} + +print_relocation_table $full_lib_name > orig_rel +print_relocation_table libmapped.so > map_rel +print_relocation_table libreversed.so > rev_rel + +diff orig_rel rev_rel > diff_orig_rel_rev_rel || exit 1 +sed "s/${suffix}//" map_rel > map_rel_r +diff orig_rel map_rel_r > diff_orig_rel_map_rel_r || exit 1 + +############################################################################### +# Test that the hash table is correctly updated. +# For this to work, we need to rename symbols and actually use the library +# Here we: +# 1. Take all symbols defined by libstdc++.so used by patchelf +# 2. Rename all symbols in both libstdc++.so and patchelf +# 3. Run patchelf with the modified library +############################################################################### +list_symbols --defined-only $full_lib_name | cut -d@ -f1 | sort -u | awk "{printf \"%s %s${suffix}\n\",\$1,\$1}" > map + +cp ${PATCHELF} ${full_lib_name} . +${PATCHELF} --rename-dynamic-symbols map ./patchelf $lib_name + +env LD_BIND_NOW=1 LD_LIBRARY_PATH=. ./patchelf --version