From c3cf1015fa65db38aaa08045725b3dbcc707fb28 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Thu, 10 Oct 2019 17:10:11 +0000 Subject: [PATCH] Add GsymCreator and GsymReader. This patch adds the ability to create GSYM files with GsymCreator, and read them with GsymReader. Full testing has been added for both new classes. This patch differs from the original patch https://reviews.llvm.org/D53379 in that is uses a StringTableBuilder class from llvm instead of a custom version. Support for big and little endian files has been added. If the endianness matches the current host, we use efficient extraction for the header, address table and address info offset tables. Differential Revision: https://reviews.llvm.org/D68744 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374381 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/DebugInfo/GSYM/FileWriter.h | 4 + include/llvm/DebugInfo/GSYM/GsymCreator.h | 229 ++++++++++++++++++ include/llvm/DebugInfo/GSYM/GsymReader.h | 228 ++++++++++++++++++ include/llvm/DebugInfo/GSYM/Header.h | 23 +- lib/DebugInfo/GSYM/CMakeLists.txt | 5 + lib/DebugInfo/GSYM/FunctionInfo.cpp | 2 +- lib/DebugInfo/GSYM/GsymCreator.cpp | 274 ++++++++++++++++++++++ lib/DebugInfo/GSYM/GsymReader.cpp | 274 ++++++++++++++++++++++ lib/DebugInfo/GSYM/Header.cpp | 24 +- unittests/DebugInfo/GSYM/CMakeLists.txt | 1 + unittests/DebugInfo/GSYM/GSYMTest.cpp | 254 ++++++++++++++++++++ 11 files changed, 1295 insertions(+), 23 deletions(-) create mode 100644 include/llvm/DebugInfo/GSYM/GsymCreator.h create mode 100644 include/llvm/DebugInfo/GSYM/GsymReader.h create mode 100644 lib/DebugInfo/GSYM/GsymCreator.cpp create mode 100644 lib/DebugInfo/GSYM/GsymReader.cpp diff --git a/include/llvm/DebugInfo/GSYM/FileWriter.h b/include/llvm/DebugInfo/GSYM/FileWriter.h index e102de5de61..cd568765a4f 100644 --- a/include/llvm/DebugInfo/GSYM/FileWriter.h +++ b/include/llvm/DebugInfo/GSYM/FileWriter.h @@ -109,6 +109,10 @@ public: /// file position. uint64_t tell(); + llvm::raw_pwrite_stream &get_stream() { + return OS; + } + private: FileWriter(const FileWriter &rhs) = delete; void operator=(const FileWriter &rhs) = delete; diff --git a/include/llvm/DebugInfo/GSYM/GsymCreator.h b/include/llvm/DebugInfo/GSYM/GsymCreator.h new file mode 100644 index 00000000000..2449aba034a --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -0,0 +1,229 @@ +//===- GsymCreator.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H +#define LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H + +#include +#include +#include +#include +#include + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" + +namespace llvm { + +namespace gsym { +class FileWriter; + +/// GsymCreator is used to emit GSYM data to a stand alone file or section +/// within a file. +/// +/// The GsymCreator is designed to be used in 3 stages: +/// - Create FunctionInfo objects and add them +/// - Finalize the GsymCreator object +/// - Save to file or section +/// +/// The first stage involves creating FunctionInfo objects from another source +/// of information like compiler debug info metadata, DWARF or Breakpad files. +/// Any strings in the FunctionInfo or contained information, like InlineInfo +/// or LineTable objects, should get the string table offsets by calling +/// GsymCreator::insertString(...). Any file indexes that are needed should be +/// obtained by calling GsymCreator::insertFile(...). All of the function calls +/// in GsymCreator are thread safe. This allows multiple threads to create and +/// add FunctionInfo objects while parsing debug information. +/// +/// Once all of the FunctionInfo objects have been added, the +/// GsymCreator::finalize(...) must be called prior to saving. This function +/// will sort the FunctionInfo objects, finalize the string table, and do any +/// other passes on the information needed to prepare the information to be +/// saved. +/// +/// Once the object has been finalized, it can be saved to a file or section. +/// +/// ENCODING +/// +/// GSYM files are designed to be memory mapped into a process as shared, read +/// only data, and used as is. +/// +/// The GSYM file format when in a stand alone file consists of: +/// - Header +/// - Address Table +/// - Function Info Offsets +/// - File Table +/// - String Table +/// - Function Info Data +/// +/// HEADER +/// +/// The header is fully described in "llvm/DebugInfo/GSYM/Header.h". +/// +/// ADDRESS TABLE +/// +/// The address table immediately follows the header in the file and consists +/// of Header.NumAddresses address offsets. These offsets are sorted and can be +/// binary searched for efficient lookups. Addresses in the address table are +/// stored as offsets from a 64 bit base address found in Header.BaseAddress. +/// This allows the address table to contain 8, 16, or 32 offsets. This allows +/// the address table to not require full 64 bit addresses for each address. +/// The resulting GSYM size is smaller and causes fewer pages to be touched +/// during address lookups when the address table is smaller. The size of the +/// address offsets in the address table is specified in the header in +/// Header.AddrOffSize. The first offset in the address table is alinged to +/// Header.AddrOffSize alignement to ensure efficient access when loaded into +/// memory. +/// +/// FUNCTION INFO OFFSETS TABLE +/// +/// The function info offsets table immediately follows the address table and +/// consists of Header.NumAddresses 32 bit file offsets: one for each address +/// in the address table. This data is algined to a 4 byte boundary. The +/// offsets in this table are the relative offsets from the start offset of the +/// GSYM header and point to the function info data for each address in the +/// address table. Keeping this data separate from the address table helps to +/// reduce the number of pages that are touched when address lookups occur on a +/// GSYM file. +/// +/// FILE TABLE +/// +/// The file table immediately follows the function info offsets table. The +/// encoding of the FileTable is: +/// +/// struct FileTable { +/// uint32_t Count; +/// FileEntry Files[]; +/// }; +/// +/// The file table starts with a 32 bit count of the number of files that are +/// used in all of the function info, followed by that number of FileEntry +/// structures. The file table is aligned to a 4 byte boundary, Each file in +/// the file table is represented with a FileEntry structure. +/// See "llvm/DebugInfo/GSYM/FileEntry.h" for details. +/// +/// STRING TABLE +/// +/// The string table follows the file table in stand alone GSYM files and +/// contains all strings for everything contained in the GSYM file. Any string +/// data should be added to the string table and any references to strings +/// inside GSYM information must be stored as 32 bit string table offsets into +/// this string table. The string table always starts with an empty string at +/// offset zero and is followed by any strings needed by the GSYM information. +/// The start of the string table is not aligned to any boundary. +/// +/// FUNCTION INFO DATA +/// +/// The function info data is the payload that contains information about the +/// address that is being looked up. It contains all of the encoded +/// FunctionInfo objects. Each encoded FunctionInfo's data is pointed to by an +/// entry in the Function Info Offsets Table. For details on the exact encoding +/// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h". +class GsymCreator { + // Private member variables require Mutex protections + mutable std::recursive_mutex Mutex; + std::vector Funcs; + StringTableBuilder StrTab; + DenseMap FileEntryToIndex; + std::vector Files; + std::vector UUID; + bool Finalized = false; + +public: + + GsymCreator(); + + /// Save a GSYM file to a stand alone file. + /// + /// \param Path The file path to save the GSYM file to. + /// \param ByteOrder The endianness to use when saving the file. + /// \returns An error object that indicates success or failure of the save. + llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const; + + /// Encode a GSYM into the file writer stream at the current position. + /// + /// \param O The stream to save the binary data to + /// \returns An error object that indicates success or failure of the save. + llvm::Error encode(FileWriter &O) const; + + /// Insert a string into the GSYM string table. + /// + /// All strings used by GSYM files must be uniqued by adding them to this + /// string pool and using the returned offset for any string values. + /// + /// \param S The string to insert into the string table. + /// \returns The unique 32 bit offset into the string table. + uint32_t insertString(StringRef S); + + /// Insert a file into this GSYM creator. + /// + /// Inserts a file by adding a FileEntry into the "Files" member variable if + /// the file has not already been added. The file path is split into + /// directory and filename which are both added to the string table. This + /// allows paths to be stored efficiently by reusing the directories that are + /// common between multiple files. + /// + /// \param Path The path to the file to insert. + /// \param Style The path style for the "Path" parameter. + /// \returns The unique file index for the inserted file. + uint32_t insertFile(StringRef Path, + llvm::sys::path::Style = llvm::sys::path::Style::native); + + /// Add a function info to this GSYM creator. + /// + /// All information in the FunctionInfo object must use the + /// GsymCreator::insertString(...) function when creating string table + /// offsets for names and other strings. + /// + /// \param FI The function info object to emplace into our functions list. + void addFunctionInfo(FunctionInfo &&FI); + + /// Finalize the data in the GSYM creator prior to saving the data out. + /// + /// Finalize must be called after all FunctionInfo objects have been added + /// and before GsymCreator::save() is called. + /// + /// \param OS Output stream to report duplicate function infos, overlapping + /// function infos, and function infos that were merged or removed. + /// \returns An error object that indicates success or failure of the + /// finalize. + llvm::Error finalize(llvm::raw_ostream &OS); + + /// Set the UUID value. + /// + /// \param UUIDBytes The new UUID bytes. + void setUUID(llvm::ArrayRef UUIDBytes) { + UUID.assign(UUIDBytes.begin(), UUIDBytes.end()); + } + + /// Thread safe iteration over all function infos. + /// + /// \param Callback A callback function that will get called with each + /// FunctionInfo. If the callback returns false, stop iterating. + void forEachFunctionInfo( + std::function const &Callback); + + /// Thread safe const iteration over all function infos. + /// + /// \param Callback A callback function that will get called with each + /// FunctionInfo. If the callback returns false, stop iterating. + void forEachFunctionInfo( + std::function const &Callback) const; + +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H diff --git a/include/llvm/DebugInfo/GSYM/GsymReader.h b/include/llvm/DebugInfo/GSYM/GsymReader.h new file mode 100644 index 00000000000..113bcee9c9a --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/GsymReader.h @@ -0,0 +1,228 @@ +//===- GsymReader.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H +#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H + + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorOr.h" + +#include +#include +#include +#include +#include + +namespace llvm { +class MemoryBuffer; +class raw_ostream; + +namespace gsym { + +/// GsymReader is used to read GSYM data from a file or buffer. +/// +/// This class is optimized for very quick lookups when the endianness matches +/// the host system. The Header, address table, address info offsets, and file +/// table is designed to be mmap'ed as read only into memory and used without +/// any parsing needed. If the endianness doesn't match, we swap these objects +/// and tables into GsymReader::SwappedData and then point our header and +/// ArrayRefs to this swapped internal data. +/// +/// GsymReader objects must use one of the static functions to create an +/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...). + +class GsymReader { + GsymReader(std::unique_ptr Buffer); + llvm::Error parse(); + + std::unique_ptr MemBuffer; + StringRef GsymBytes; + llvm::support::endianness Endian; + const Header *Hdr = nullptr; + ArrayRef AddrOffsets; + ArrayRef AddrInfoOffsets; + ArrayRef Files; + StringTable StrTab; + /// When the GSYM file's endianness doesn't match the host system then + /// we must decode all data structures that need to be swapped into + /// local storage and set point the ArrayRef objects above to these swapped + /// copies. + struct SwappedData { + Header Hdr; + std::vector AddrOffsets; + std::vector AddrInfoOffsets; + std::vector Files; + }; + std::unique_ptr Swap; + +public: + GsymReader(GsymReader &&RHS); + ~GsymReader(); + + /// Construct a GsymReader from a file on disk. + /// + /// \param Path The file path the GSYM file to read. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected openFile(StringRef Path); + + /// Construct a GsymReader from a buffer. + /// + /// \param Bytes A set of bytes that will be copied and owned by the + /// returned object on success. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected copyBuffer(StringRef Bytes); + + /// Access the GSYM header. + /// \returns A native endian version of the GSYM header. + const Header &getHeader() const; + + /// Get the full function info for an address. + /// + /// \param Addr A virtual address from the orignal object file to lookup. + /// \returns An expected FunctionInfo that contains the function info object + /// or an error object that indicates reason for failing to lookup the + /// address, + llvm::Expected getFunctionInfo(uint64_t Addr) const; + + /// Get a string from the string table. + /// + /// \param Offset The string table offset for the string to retrieve. + /// \returns The string from the strin table. + StringRef getString(uint32_t Offset) const { return StrTab[Offset]; } + +protected: + /// Gets an address from the address table. + /// + /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress. + /// + /// \param Index A index into the address table. + /// \returns A resolved virtual address for adddress in the address table + /// or llvm::None if Index is out of bounds. + Optional getAddress(size_t Index) const; + + /// Get the a file entry for the suppplied file index. + /// + /// Used to convert any file indexes in the FunctionInfo data back into + /// files. This function can be used for iteration, but is more commonly used + /// for random access when doing lookups. + /// + /// \param Index An index into the file table. + /// \returns An optional FileInfo that will be valid if the file index is + /// valid, or llvm::None if the file index is out of bounds, + Optional getFile(uint32_t Index) const { + if (Index < Files.size()) + return Files[Index]; + return llvm::None; + } + + /// Get an appropriate address info offsets array. + /// + /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 + /// byte offsets from the The gsym::Header::BaseAddress. The table is stored + /// internally as a array of bytes that are in the correct endianness. When + /// we access this table we must get an array that matches those sizes. This + /// templatized helper function is used when accessing address offsets in the + /// AddrOffsets member variable. + /// + /// \returns An ArrayRef of an appropriate address offset size. + template ArrayRef + getAddrOffsets() const { + return ArrayRef(reinterpret_cast(AddrOffsets.data()), + AddrOffsets.size()/sizeof(T)); + } + + /// Get an appropriate address from the address table. + /// + /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 + /// byte address offsets from the The gsym::Header::BaseAddress. The table is + /// stored internally as a array of bytes that are in the correct endianness. + /// In order to extract an address from the address table we must access the + /// address offset using the correct size and then add it to the BaseAddress + /// in the header. + /// + /// \param Index An index into the AddrOffsets array. + /// \returns An virtual address that matches the original object file for the + /// address as the specified index, or llvm::None if Index is out of bounds. + template Optional + addressForIndex(size_t Index) const { + ArrayRef AIO = getAddrOffsets(); + if (Index < AIO.size()) + return AIO[Index] + Hdr->BaseAddress; + return llvm::None; + } + /// Lookup an address offset in the AddrOffsets table. + /// + /// Given an address offset, look it up using a binary search of the + /// AddrOffsets table. + /// + /// \param AddrOffset An address offset, that has already been computed by + /// subtracting the gsym::Header::BaseAddress. + /// \returns The matching address offset index. This index will be used to + /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. + template + uint64_t getAddressOffsetIndex(const uint64_t AddrOffset) const { + ArrayRef AIO = getAddrOffsets(); + const auto Begin = AIO.begin(); + const auto End = AIO.end(); + auto Iter = std::lower_bound(Begin, End, AddrOffset); + if (Iter == End || AddrOffset < *Iter) + --Iter; + return std::distance(Begin, Iter); + } + + /// Create a GSYM from a memory buffer. + /// + /// Called by both openFile() and copyBuffer(), this function does all of the + /// work of parsing the GSYM file and returning an error. + /// + /// \param MemBuffer A memory buffer that will transfer ownership into the + /// GsymReader. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected + create(std::unique_ptr &MemBuffer); + + + /// Given an address, find the address index. + /// + /// Binary search the address table and find the matching address index. + /// + /// \param Addr A virtual address that matches the original object file + /// to lookup. + /// \returns An index into the address table. This index can be used to + /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. + /// Returns an error if the address isn't in the GSYM with details of why. + Expected getAddressIndex(const uint64_t Addr) const; + + /// Given an address index, get the offset for the FunctionInfo. + /// + /// Looking up an address is done by finding the corresponding address + /// index for the address. This index is then used to get the offset of the + /// FunctionInfo data that we will decode using this function. + /// + /// \param Index An index into the address table. + /// \returns An optional GSYM data offset for the offset of the FunctionInfo + /// that needs to be decoded. + Optional getAddressInfoOffset(size_t Index) const; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H diff --git a/include/llvm/DebugInfo/GSYM/Header.h b/include/llvm/DebugInfo/GSYM/Header.h index b81f68c1078..6652c59c97a 100644 --- a/include/llvm/DebugInfo/GSYM/Header.h +++ b/include/llvm/DebugInfo/GSYM/Header.h @@ -22,6 +22,7 @@ namespace gsym { class FileWriter; constexpr uint32_t GSYM_MAGIC = 0x4753594d; // 'GSYM' +constexpr uint32_t GSYM_CIGAM = 0x4d595347; // 'MYSG' constexpr uint32_t GSYM_VERSION = 1; constexpr size_t GSYM_MAX_UUID_SIZE = 20; @@ -84,16 +85,20 @@ struct Header { /// be set to zero. uint8_t UUID[GSYM_MAX_UUID_SIZE]; - /// Check if a header is valid. + /// Check if a header is valid and return an error if anything is wrong. /// - /// \returns True if the header is valid and if the version is supported. - bool isValid() const { - if (Magic != GSYM_MAGIC) - return false; - if (Version != GSYM_VERSION) - return false; - return true; - } + /// This function can be used prior to encoding a header to ensure it is + /// valid, or after decoding a header to ensure it is valid and supported. + /// + /// Check a correctly byte swapped header for errors: + /// - check magic value + /// - check that version number is supported + /// - check that the address offset size is supported + /// - check that the UUID size is valid + /// + /// \returns An error if anything is wrong in the header, or Error::success() + /// if there are no errors. + llvm::Error checkForError() const; /// Decode an object from a binary data stream. /// diff --git a/lib/DebugInfo/GSYM/CMakeLists.txt b/lib/DebugInfo/GSYM/CMakeLists.txt index e05b2c112ce..2369f05197a 100644 --- a/lib/DebugInfo/GSYM/CMakeLists.txt +++ b/lib/DebugInfo/GSYM/CMakeLists.txt @@ -2,6 +2,8 @@ add_llvm_library(LLVMDebugInfoGSYM Header.cpp FileWriter.cpp FunctionInfo.cpp + GsymCreator.cpp + GsymReader.cpp InlineInfo.cpp LineTable.cpp Range.cpp @@ -9,4 +11,7 @@ add_llvm_library(LLVMDebugInfoGSYM ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/GSYM ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo + + DEPENDS + LLVMMC ) diff --git a/lib/DebugInfo/GSYM/FunctionInfo.cpp b/lib/DebugInfo/GSYM/FunctionInfo.cpp index dff867d66eb..ad022fec9e3 100644 --- a/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -1,4 +1,4 @@ -//===- FunctionInfo.cpp -----------------------------------------*- C++ -*-===// +//===- FunctionInfo.cpp ---------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/lib/DebugInfo/GSYM/GsymCreator.cpp b/lib/DebugInfo/GSYM/GsymCreator.cpp new file mode 100644 index 00000000000..71bdc744337 --- /dev/null +++ b/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -0,0 +1,274 @@ +//===- GsymCreator.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + + +using namespace llvm; +using namespace gsym; + + +GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) { + insertFile(StringRef()); +} + +uint32_t GsymCreator::insertFile(StringRef Path, + llvm::sys::path::Style Style) { + llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); + llvm::StringRef filename = llvm::sys::path::filename(Path, Style); + FileEntry FE(insertString(directory), insertString(filename)); + + std::lock_guard Guard(Mutex); + const auto NextIndex = Files.size(); + // Find FE in hash map and insert if not present. + auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); + if (R.second) + Files.emplace_back(FE); + return R.first->second; +} + +llvm::Error GsymCreator::save(StringRef Path, + llvm::support::endianness ByteOrder) const { + std::error_code EC; + raw_fd_ostream OutStrm(Path, EC); + if (EC) + return llvm::errorCodeToError(EC); + FileWriter O(OutStrm, ByteOrder); + return encode(O); +} + +llvm::Error GsymCreator::encode(FileWriter &O) const { + std::lock_guard Guard(Mutex); + if (Funcs.empty()) + return createStringError(std::errc::invalid_argument, + "no functions to encode"); + if (!Finalized) + return createStringError(std::errc::invalid_argument, + "GsymCreator wasn't finalized prior to encoding"); + + if (Funcs.size() > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "too many FunctionInfos"); + const uint64_t MinAddr = Funcs.front().startAddress(); + const uint64_t MaxAddr = Funcs.back().startAddress(); + const uint64_t AddrDelta = MaxAddr - MinAddr; + Header Hdr; + Hdr.Magic = GSYM_MAGIC; + Hdr.Version = GSYM_VERSION; + Hdr.AddrOffSize = 0; + Hdr.UUIDSize = static_cast(UUID.size()); + Hdr.BaseAddress = MinAddr; + Hdr.NumAddresses = static_cast(Funcs.size()); + Hdr.StrtabOffset = 0; // We will fix this up later. + Hdr.StrtabOffset = 0; // We will fix this up later. + bzero(Hdr.UUID, sizeof(Hdr.UUID)); + if (UUID.size() > sizeof(Hdr.UUID)) + return createStringError(std::errc::invalid_argument, + "invalid UUID size %u", (uint32_t)UUID.size()); + // Set the address offset size correctly in the GSYM header. + if (AddrDelta <= UINT8_MAX) + Hdr.AddrOffSize = 1; + else if (AddrDelta <= UINT16_MAX) + Hdr.AddrOffSize = 2; + else if (AddrDelta <= UINT32_MAX) + Hdr.AddrOffSize = 4; + else + Hdr.AddrOffSize = 8; + // Copy the UUID value if we have one. + if (UUID.size() > 0) + memcpy(Hdr.UUID, UUID.data(), UUID.size()); + // Write out the header. + llvm::Error Err = Hdr.encode(O); + if (Err) + return Err; + + // Write out the address offsets. + O.alignTo(Hdr.AddrOffSize); + for (const auto &FuncInfo : Funcs) { + uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; + switch(Hdr.AddrOffSize) { + case 1: O.writeU8(static_cast(AddrOffset)); break; + case 2: O.writeU16(static_cast(AddrOffset)); break; + case 4: O.writeU32(static_cast(AddrOffset)); break; + case 8: O.writeU64(AddrOffset); break; + } + } + + // Write out all zeros for the AddrInfoOffsets. + O.alignTo(4); + const off_t AddrInfoOffsetsOffset = O.tell(); + for (size_t i = 0, n = Funcs.size(); i < n; ++i) + O.writeU32(0); + + // Write out the file table + O.alignTo(4); + assert(!Files.empty()); + assert(Files[0].Dir == 0); + assert(Files[0].Base == 0); + size_t NumFiles = Files.size(); + if (NumFiles > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "too many files"); + O.writeU32(static_cast(NumFiles)); + for (auto File: Files) { + O.writeU32(File.Dir); + O.writeU32(File.Base); + } + + // Write out the sting table. + const off_t StrtabOffset = O.tell(); + StrTab.write(O.get_stream()); + const off_t StrtabSize = O.tell() - StrtabOffset; + std::vector AddrInfoOffsets; + + // Write out the address infos for each function info. + for (const auto &FuncInfo : Funcs) { + if (Expected OffsetOrErr = FuncInfo.encode(O)) + AddrInfoOffsets.push_back(OffsetOrErr.get()); + else + return OffsetOrErr.takeError(); + } + // Fixup the string table offset and size in the header + O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); + O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); + + // Fixup all address info offsets + uint64_t Offset = 0; + for (auto AddrInfoOffset: AddrInfoOffsets) { + O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); + Offset += 4; + } + return ErrorSuccess(); +} + +llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { + std::lock_guard Guard(Mutex); + if (Finalized) + return createStringError(std::errc::invalid_argument, + "already finalized"); + Finalized = true; + + // Sort function infos so we can emit sorted functions. + llvm::sort(Funcs.begin(), Funcs.end()); + + // Don't let the string table indexes change by finalizing in order. + StrTab.finalizeInOrder(); + + // Remove duplicates function infos that have both entries from debug info + // (DWARF or Breakpad) and entries from the SymbolTable. + // + // Also handle overlapping function. Usually there shouldn't be any, but they + // can and do happen in some rare cases. + // + // (a) (b) (c) + // ^ ^ ^ ^ + // |X |Y |X ^ |X + // | | | |Y | ^ + // | | | v v |Y + // v v v v + // + // In (a) and (b), Y is ignored and X will be reported for the full range. + // In (c), both functions will be included in the result and lookups for an + // address in the intersection will return Y because of binary search. + // + // Note that in case of (b), we cannot include Y in the result because then + // we wouldn't find any function for range (end of Y, end of X) + // with binary search + auto NumBefore = Funcs.size(); + auto Curr = Funcs.begin(); + auto Prev = Funcs.end(); + while (Curr != Funcs.end()) { + // Can't check for overlaps or same address ranges if we don't have a + // previous entry + if (Prev != Funcs.end()) { + if (Prev->Range.intersects(Curr->Range)) { + // Overlapping address ranges. + if (Prev->Range == Curr->Range) { + // Same address range. Check if one is from debug info and the other + // is from a symbol table. If so, then keep the one with debug info. + // Our sorting guarantees that entries with matching address ranges + // that have debug info are last in the sort. + if (*Prev == *Curr) { + // FunctionInfo entries match exactly (range, lines, inlines) + OS << "warning: duplicate function info entries, removing " + "duplicate:\n" + << *Curr << '\n'; + Curr = Funcs.erase(Prev); + } else { + if (!Prev->hasRichInfo() && Curr->hasRichInfo()) { + // Same address range, one with no debug info (symbol) and the + // next with debug info. Keep the latter. + Curr = Funcs.erase(Prev); + } else { + OS << "warning: same address range contains different debug " + << "info. Removing:\n" + << *Prev << "\nIn favor of this one:\n" + << *Curr << "\n"; + Curr = Funcs.erase(Prev); + } + } + } else { + // print warnings about overlaps + OS << "warning: function ranges overlap:\n" + << *Prev << "\n" + << *Curr << "\n"; + } + } else if (Prev->Range.size() == 0 && + Curr->Range.contains(Prev->Range.Start)) { + OS << "warning: removing symbol:\n" + << *Prev << "\nKeeping:\n" + << *Curr << "\n"; + Curr = Funcs.erase(Prev); + } + } + if (Curr == Funcs.end()) + break; + Prev = Curr++; + } + + OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " + << Funcs.size() << " total\n"; + return Error::success(); +} + +uint32_t GsymCreator::insertString(StringRef S) { + std::lock_guard Guard(Mutex); + if (S.empty()) + return 0; + return StrTab.add(S); +} + +void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { + std::lock_guard Guard(Mutex); + Funcs.emplace_back(FI); +} + +void GsymCreator::forEachFunctionInfo( + std::function const &Callback) { + std::lock_guard Guard(Mutex); + for (auto &FI : Funcs) { + if (!Callback(FI)) + break; + } +} + +void GsymCreator::forEachFunctionInfo( + std::function const &Callback) const { + std::lock_guard Guard(Mutex); + for (const auto &FI : Funcs) { + if (!Callback(FI)) + break; + } +} diff --git a/lib/DebugInfo/GSYM/GsymReader.cpp b/lib/DebugInfo/GSYM/GsymReader.cpp new file mode 100644 index 00000000000..2e1b5c9cd1a --- /dev/null +++ b/lib/DebugInfo/GSYM/GsymReader.cpp @@ -0,0 +1,274 @@ +//===- GsymReader.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymReader.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace gsym; + +GsymReader::GsymReader(std::unique_ptr Buffer) : + MemBuffer(std::move(Buffer)), + Endian(support::endian::system_endianness()) {} + + GsymReader::GsymReader(GsymReader &&RHS) = default; + +GsymReader::~GsymReader() = default; + +llvm::Expected GsymReader::openFile(StringRef Filename) { + // Open the input file and return an appropriate error if needed. + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + auto Err = BuffOrErr.getError(); + if (Err) + return llvm::errorCodeToError(Err); + return create(BuffOrErr.get()); +} + +llvm::Expected GsymReader::copyBuffer(StringRef Bytes) { + auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes"); + return create(MemBuffer); +} + +llvm::Expected +GsymReader::create(std::unique_ptr &MemBuffer) { + if (!MemBuffer.get()) + return createStringError(std::errc::invalid_argument, + "invalid memory buffer"); + GsymReader GR(std::move(MemBuffer)); + llvm::Error Err = GR.parse(); + if (Err) + return std::move(Err); + return std::move(GR); +} + +llvm::Error +GsymReader::parse() { + BinaryStreamReader FileData(MemBuffer->getBuffer(), + support::endian::system_endianness()); + // Check for the magic bytes. This file format is designed to be mmap'ed + // into a process and accessed as read only. This is done for performance + // and efficiency for symbolicating and parsing GSYM data. + if (FileData.readObject(Hdr)) + return createStringError(std::errc::invalid_argument, + "not enough data for a GSYM header"); + + const auto HostByteOrder = support::endian::system_endianness(); + switch (Hdr->Magic) { + case GSYM_MAGIC: + Endian = HostByteOrder; + break; + case GSYM_CIGAM: + // This is a GSYM file, but not native endianness. + Endian = sys::IsBigEndianHost ? support::little : support::big; + Swap.reset(new SwappedData); + break; + default: + return createStringError(std::errc::invalid_argument, + "not a GSYM file"); + } + + bool DataIsLittleEndian = HostByteOrder != support::little; + // Read a correctly byte swapped header if we need to. + if (Swap) { + DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); + if (auto ExpectedHdr = Header::decode(Data)) + Swap->Hdr = ExpectedHdr.get(); + else + return ExpectedHdr.takeError(); + Hdr = &Swap->Hdr; + } + + // Detect errors in the header and report any that are found. If we make it + // past this without errors, we know we have a good magic value, a supported + // version number, verified address offset size and a valid UUID size. + if (Error Err = Hdr->checkForError()) + return Err; + + if (!Swap) { + // This is the native endianness case that is most common and optimized for + // efficient lookups. Here we just grab pointers to the native data and + // use ArrayRef objects to allow efficient read only access. + + // Read the address offsets. + if (FileData.padToAlignment(Hdr->AddrOffSize) || + FileData.readArray(AddrOffsets, + Hdr->NumAddresses * Hdr->AddrOffSize)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + + // Read the address info offsets. + if (FileData.padToAlignment(4) || + FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address info offsets table"); + + // Read the file table. + uint32_t NumFiles = 0; + if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles)) + return createStringError(std::errc::invalid_argument, + "failed to read file table"); + + // Get the string table. + FileData.setOffset(Hdr->StrtabOffset); + if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize)) + return createStringError(std::errc::invalid_argument, + "failed to read string table"); +} else { + // This is the non native endianness case that is not common and not + // optimized for lookups. Here we decode the important tables into local + // storage and then set the ArrayRef objects to point to these swapped + // copies of the read only data so lookups can be as efficient as possible. + DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); + + // Read the address offsets. + uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize); + Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize); + switch (Hdr->AddrOffSize) { + case 1: + if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 2: + if (!Data.getU16(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 4: + if (!Data.getU32(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 8: + if (!Data.getU64(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + } + AddrOffsets = ArrayRef(Swap->AddrOffsets); + + // Read the address info offsets. + Offset = alignTo(Offset, 4); + Swap->AddrInfoOffsets.resize(Hdr->NumAddresses); + if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses)) + AddrInfoOffsets = ArrayRef(Swap->AddrInfoOffsets); + else + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + // Read the file table. + const uint32_t NumFiles = Data.getU32(&Offset); + if (NumFiles > 0) { + Swap->Files.resize(NumFiles); + if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2)) + Files = ArrayRef(Swap->Files); + else + return createStringError(std::errc::invalid_argument, + "failed to read file table"); + } + // Get the string table. + StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset, + Hdr->StrtabSize); + if (StrTab.Data.empty()) + return createStringError(std::errc::invalid_argument, + "failed to read string table"); + } + return Error::success(); + +} + +const Header &GsymReader::getHeader() const { + // The only way to get a GsymReader is from GsymReader::openFile(...) or + // GsymReader::copyBuffer() and the header must be valid and initialized to + // a valid pointer value, so the assert below should not trigger. + assert(Hdr); + return *Hdr; +} + +Optional GsymReader::getAddress(size_t Index) const { + switch (Hdr->AddrOffSize) { + case 1: return addressForIndex(Index); + case 2: return addressForIndex(Index); + case 4: return addressForIndex(Index); + case 8: return addressForIndex(Index); + } + return llvm::None; +} + +Optional GsymReader::getAddressInfoOffset(size_t Index) const { + const auto NumAddrInfoOffsets = AddrInfoOffsets.size(); + if (Index < NumAddrInfoOffsets) + return AddrInfoOffsets[Index]; + return llvm::None; +} + +Expected +GsymReader::getAddressIndex(const uint64_t Addr) const { + if (Addr < Hdr->BaseAddress) + return createStringError(std::errc::invalid_argument, + "address 0x%" PRIx64 " not in GSYM", Addr); + const uint64_t AddrOffset = Addr - Hdr->BaseAddress; + switch (Hdr->AddrOffSize) { + case 1: return getAddressOffsetIndex(AddrOffset); + case 2: return getAddressOffsetIndex(AddrOffset); + case 4: return getAddressOffsetIndex(AddrOffset); + case 8: return getAddressOffsetIndex(AddrOffset); + default: break; + } + return createStringError(std::errc::invalid_argument, + "unsupported address offset size %u", + Hdr->AddrOffSize); +} + +llvm::Expected GsymReader::getFunctionInfo(uint64_t Addr) const { + Expected AddressIndex = getAddressIndex(Addr); + if (!AddressIndex) + return AddressIndex.takeError(); + // Address info offsets size should have been checked in parse(). + assert(*AddressIndex < AddrInfoOffsets.size()); + auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex]; + DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4); + if (Optional OptAddr = getAddress(*AddressIndex)) { + auto ExpectedFI = FunctionInfo::decode(Data, *OptAddr); + if (ExpectedFI) { + if (ExpectedFI->Range.contains(Addr) || ExpectedFI->Range.size() == 0) + return ExpectedFI; + return createStringError(std::errc::invalid_argument, + "address 0x%" PRIx64 " not in GSYM", Addr); + } + } + return createStringError(std::errc::invalid_argument, + "failed to extract address[%" PRIu64 "]", + *AddressIndex); +} diff --git a/lib/DebugInfo/GSYM/Header.cpp b/lib/DebugInfo/GSYM/Header.cpp index fbf991682e7..0b3fb9c4989 100644 --- a/lib/DebugInfo/GSYM/Header.cpp +++ b/lib/DebugInfo/GSYM/Header.cpp @@ -38,14 +38,14 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const Header &H) { } /// Check the header and detect any errors. -static llvm::Error getHeaderError(const Header &H) { - if (H.Magic != GSYM_MAGIC) +llvm::Error Header::checkForError() const { + if (Magic != GSYM_MAGIC) return createStringError(std::errc::invalid_argument, - "invalid GSYM magic 0x%8.8x", H.Magic); - if (H.Version != GSYM_VERSION) + "invalid GSYM magic 0x%8.8x", Magic); + if (Version != GSYM_VERSION) return createStringError(std::errc::invalid_argument, - "unsupported GSYM version %u", H.Version); - switch (H.AddrOffSize) { + "unsupported GSYM version %u", Version); + switch (AddrOffSize) { case 1: break; case 2: break; case 4: break; @@ -53,11 +53,11 @@ static llvm::Error getHeaderError(const Header &H) { default: return createStringError(std::errc::invalid_argument, "invalid address offset size %u", - H.AddrOffSize); + AddrOffSize); } - if (H.UUIDSize > GSYM_MAX_UUID_SIZE) + if (UUIDSize > GSYM_MAX_UUID_SIZE) return createStringError(std::errc::invalid_argument, - "invalid UUID size %u", H.UUIDSize); + "invalid UUID size %u", UUIDSize); return Error::success(); } @@ -77,16 +77,14 @@ llvm::Expected
Header::decode(DataExtractor &Data) { H.StrtabOffset = Data.getU32(&Offset); H.StrtabSize = Data.getU32(&Offset); Data.getU8(&Offset, H.UUID, GSYM_MAX_UUID_SIZE); - llvm::Error Err = getHeaderError(H); - if (Err) + if (llvm::Error Err = H.checkForError()) return std::move(Err); return H; } llvm::Error Header::encode(FileWriter &O) const { // Users must verify the Header is valid prior to calling this funtion. - llvm::Error Err = getHeaderError(*this); - if (Err) + if (llvm::Error Err = checkForError()) return Err; O.writeU32(Magic); O.writeU16(Version); diff --git a/unittests/DebugInfo/GSYM/CMakeLists.txt b/unittests/DebugInfo/GSYM/CMakeLists.txt index fb9d5e564b3..849b0f64b40 100644 --- a/unittests/DebugInfo/GSYM/CMakeLists.txt +++ b/unittests/DebugInfo/GSYM/CMakeLists.txt @@ -1,5 +1,6 @@ set(LLVM_LINK_COMPONENTS DebugInfoGSYM + MC Support ) diff --git a/unittests/DebugInfo/GSYM/GSYMTest.cpp b/unittests/DebugInfo/GSYM/GSYMTest.cpp index 421544ee1d4..90d8594eec6 100644 --- a/unittests/DebugInfo/GSYM/GSYMTest.cpp +++ b/unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -13,6 +13,8 @@ #include "llvm/DebugInfo/GSYM/FileEntry.h" #include "llvm/DebugInfo/GSYM/FileWriter.h" #include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" #include "llvm/DebugInfo/GSYM/Range.h" #include "llvm/DebugInfo/GSYM/StringTable.h" @@ -1046,3 +1048,255 @@ TEST(GSYMTest, TestHeaderEncodeDecode) { TestHeaderEncodeDecode(H, llvm::support::little); TestHeaderEncodeDecode(H, llvm::support::big); } + +static void TestGsymCreatorEncodeError(llvm::support::endianness ByteOrder, + const GsymCreator &GC, + std::string ExpectedErrorMsg) { + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + llvm::Error Err = GC.encode(FW); + ASSERT_TRUE(bool(Err)); + checkError(ExpectedErrorMsg, std::move(Err)); +} + +TEST(GSYMTest, TestGsymCreatorEncodeErrors) { + const uint8_t ValidUUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16}; + const uint8_t InvalidUUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21}; + // Verify we get an error when trying to encode an GsymCreator with no + // function infos. We shouldn't be saving a GSYM file in this case since + // there is nothing inside of it. + GsymCreator GC; + TestGsymCreatorEncodeError(llvm::support::little, GC, + "no functions to encode"); + const uint64_t FuncAddr = 0x1000; + const uint64_t FuncSize = 0x100; + const uint32_t FuncName = GC.insertString("foo"); + // Verify we get an error trying to encode a GsymCreator that isn't + // finalized. + GC.addFunctionInfo(FunctionInfo(FuncAddr, FuncSize, FuncName)); + TestGsymCreatorEncodeError(llvm::support::little, GC, + "GsymCreator wasn't finalized prior to encoding"); + std::string finalizeIssues; + raw_string_ostream OS(finalizeIssues); + llvm::Error finalizeErr = GC.finalize(OS); + ASSERT_FALSE(bool(finalizeErr)); + finalizeErr = GC.finalize(OS); + ASSERT_TRUE(bool(finalizeErr)); + checkError("already finalized", std::move(finalizeErr)); + // Verify we get an error trying to encode a GsymCreator with a UUID that is + // too long. + GC.setUUID(InvalidUUID); + TestGsymCreatorEncodeError(llvm::support::little, GC, + "invalid UUID size 21"); + GC.setUUID(ValidUUID); + // Verify errors are propagated when we try to encoding an invalid line + // table. + GC.forEachFunctionInfo([](FunctionInfo &FI) -> bool { + FI.OptLineTable = LineTable(); // Invalid line table. + return false; // Stop iterating + }); + TestGsymCreatorEncodeError(llvm::support::little, GC, + "attempted to encode invalid LineTable object"); + // Verify errors are propagated when we try to encoding an invalid inline + // info. + GC.forEachFunctionInfo([](FunctionInfo &FI) -> bool { + FI.OptLineTable = llvm::None; + FI.Inline = InlineInfo(); // Invalid InlineInfo. + return false; // Stop iterating + }); + TestGsymCreatorEncodeError(llvm::support::little, GC, + "attempted to encode invalid InlineInfo object"); +} + +static void Compare(const GsymCreator &GC, const GsymReader &GR) { + // Verify that all of the data in a GsymCreator is correctly decoded from + // a GsymReader. To do this, we iterator over + GC.forEachFunctionInfo([&](const FunctionInfo &FI) -> bool { + auto DecodedFI = GR.getFunctionInfo(FI.Range.Start); + EXPECT_TRUE(bool(DecodedFI)); + EXPECT_EQ(FI, *DecodedFI); + return true; // Keep iterating over all FunctionInfo objects. + }); +} + +static void TestEncodeDecode(const GsymCreator &GC, + support::endianness ByteOrder, uint16_t Version, + uint8_t AddrOffSize, uint64_t BaseAddress, + uint32_t NumAddresses, ArrayRef UUID) { + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + llvm::Error Err = GC.encode(FW); + ASSERT_FALSE((bool)Err); + Expected GR = GsymReader::copyBuffer(OutStrm.str()); + ASSERT_TRUE(bool(GR)); + const Header &Hdr = GR->getHeader(); + EXPECT_EQ(Hdr.Version, Version); + EXPECT_EQ(Hdr.AddrOffSize, AddrOffSize); + EXPECT_EQ(Hdr.UUIDSize, UUID.size()); + EXPECT_EQ(Hdr.BaseAddress, BaseAddress); + EXPECT_EQ(Hdr.NumAddresses, NumAddresses); + EXPECT_EQ(ArrayRef(Hdr.UUID, Hdr.UUIDSize), UUID); + Compare(GC, GR.get()); +} + +TEST(GSYMTest, TestGsymCreator1ByteAddrOffsets) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint8_t AddrOffSize = 1; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x00, 0x10, Func1Name)); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x20, 0x10, Func2Name)); + Error Err = GC.finalize(llvm::nulls()); + ASSERT_FALSE(Err); + TestEncodeDecode(GC, llvm::support::little, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); + TestEncodeDecode(GC, llvm::support::big, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); +} + +TEST(GSYMTest, TestGsymCreator2ByteAddrOffsets) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint8_t AddrOffSize = 2; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x000, 0x100, Func1Name)); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x200, 0x100, Func2Name)); + Error Err = GC.finalize(llvm::nulls()); + ASSERT_FALSE(Err); + TestEncodeDecode(GC, llvm::support::little, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); + TestEncodeDecode(GC, llvm::support::big, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); +} + +TEST(GSYMTest, TestGsymCreator4ByteAddrOffsets) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint8_t AddrOffSize = 4; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x000, 0x100, Func1Name)); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x20000, 0x100, Func2Name)); + Error Err = GC.finalize(llvm::nulls()); + ASSERT_FALSE(Err); + TestEncodeDecode(GC, llvm::support::little, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); + TestEncodeDecode(GC, llvm::support::big, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); +} + +TEST(GSYMTest, TestGsymCreator8ByteAddrOffsets) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint8_t AddrOffSize = 8; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x000, 0x100, Func1Name)); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x100000000, 0x100, Func2Name)); + Error Err = GC.finalize(llvm::nulls()); + ASSERT_FALSE(Err); + TestEncodeDecode(GC, llvm::support::little, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); + TestEncodeDecode(GC, llvm::support::big, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); +} + +static void VerifyFunctionInfo(const GsymReader &GR, uint64_t Addr, + const FunctionInfo &FI) { + auto ExpFI = GR.getFunctionInfo(Addr); + ASSERT_TRUE(bool(ExpFI)); + ASSERT_EQ(FI, ExpFI.get()); +} + +static void VerifyFunctionInfoError(const GsymReader &GR, uint64_t Addr, + std::string ErrMessage) { + auto ExpFI = GR.getFunctionInfo(Addr); + ASSERT_FALSE(bool(ExpFI)); + checkError(ErrMessage, ExpFI.takeError()); +} + +TEST(GSYMTest, TestGsymReader) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint64_t Func1Addr = BaseAddr; + constexpr uint64_t Func2Addr = BaseAddr+0x20; + constexpr uint64_t FuncSize = 0x10; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + const auto ByteOrder = support::endian::system_endianness(); + GC.addFunctionInfo(FunctionInfo(Func1Addr, FuncSize, Func1Name)); + GC.addFunctionInfo(FunctionInfo(Func2Addr, FuncSize, Func2Name)); + Error FinalizeErr = GC.finalize(llvm::nulls()); + ASSERT_FALSE(FinalizeErr); + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + llvm::Error Err = GC.encode(FW); + ASSERT_FALSE((bool)Err); + if (auto ExpectedGR = GsymReader::copyBuffer(OutStrm.str())) { + const GsymReader &GR = ExpectedGR.get(); + VerifyFunctionInfoError(GR, Func1Addr-1, "address 0xfff not in GSYM"); + + FunctionInfo Func1(Func1Addr, FuncSize, Func1Name); + VerifyFunctionInfo(GR, Func1Addr, Func1); + VerifyFunctionInfo(GR, Func1Addr+1, Func1); + VerifyFunctionInfo(GR, Func1Addr+FuncSize-1, Func1); + VerifyFunctionInfoError(GR, Func1Addr+FuncSize, + "address 0x1010 not in GSYM"); + VerifyFunctionInfoError(GR, Func2Addr-1, "address 0x101f not in GSYM"); + FunctionInfo Func2(Func2Addr, FuncSize, Func2Name); + VerifyFunctionInfo(GR, Func2Addr, Func2); + VerifyFunctionInfo(GR, Func2Addr+1, Func2); + VerifyFunctionInfo(GR, Func2Addr+FuncSize-1, Func2); + VerifyFunctionInfoError(GR, Func2Addr+FuncSize, + "address 0x1030 not in GSYM"); + } +} -- 2.40.0