From: Zachary Turner Date: Mon, 20 Mar 2017 23:33:18 +0000 (+0000) Subject: Add a function to MD5 a file's contents. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e40105539c1633507fcc94ed1286f62d36461494;p=llvm Add a function to MD5 a file's contents. In doing so, clean up the MD5 interface a little. Most existing users only care about the lower 8 bytes of an MD5, but for some users that care about the upper and lower, there wasn't a good interface. Furthermore, consumers of the MD5 checksum were required to handle endianness details on their own, so it seems reasonable to abstract this into a nicer interface that just gives you the right value. Differential Revision: https://reviews.llvm.org/D31105 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298322 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index ab8cbf7567d..ebb493d30ad 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -33,6 +33,7 @@ #include "llvm/Support/Chrono.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MD5.h" #include #include #include @@ -399,6 +400,16 @@ std::error_code copy_file(const Twine &From, const Twine &To); /// platform-specific error_code. std::error_code resize_file(int FD, uint64_t Size); +/// @brief Compute an MD5 hash of a file's contents. +/// +/// @param FD Input file descriptor. +/// @returns An MD5Result with the hash computed, if successful, otherwise a +/// std::error_code. +ErrorOr md5_contents(int FD); + +/// @brief Version of compute_md5 that doesn't require an open file descriptor. +ErrorOr md5_contents(const Twine &Path); + /// @} /// @name Physical Observers /// @{ diff --git a/include/llvm/Support/MD5.h b/include/llvm/Support/MD5.h index 190cf4cbc52..2c0dc76485f 100644 --- a/include/llvm/Support/MD5.h +++ b/include/llvm/Support/MD5.h @@ -52,7 +52,32 @@ class MD5 { MD5_u32plus block[16]; public: - typedef uint8_t MD5Result[16]; + struct MD5Result { + std::array Bytes; + + operator std::array() const { return Bytes; } + + const uint8_t &operator[](size_t I) const { return Bytes[I]; } + uint8_t &operator[](size_t I) { return Bytes[I]; } + + SmallString<32> digest() const; + + uint64_t low() const { + // Our MD5 implementation returns the result in little endian, so the low + // word is first. + using namespace support; + return endian::read(Bytes.data()); + } + + uint64_t high() const { + using namespace support; + return endian::read(Bytes.data() + 8); + } + std::pair words() const { + using namespace support; + return std::make_pair(high(), low()); + } + }; MD5(); @@ -76,6 +101,10 @@ private: const uint8_t *body(ArrayRef Data); }; +inline bool operator==(const MD5::MD5Result &LHS, const MD5::MD5Result &RHS) { + return LHS.Bytes == RHS.Bytes; +} + /// Helper to compute and return lower 64 bits of the given string's MD5 hash. inline uint64_t MD5Hash(StringRef Str) { using namespace support; @@ -84,9 +113,8 @@ inline uint64_t MD5Hash(StringRef Str) { Hash.update(Str); MD5::MD5Result Result; Hash.final(Result); - // Return the least significant 8 bytes. Our MD5 implementation returns the - // result in little endian, so we may need to swap bytes. - return endian::read(Result); + // Return the least significant word. + return Result.low(); } } // end namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp index d8ecc7ccfb9..8e3b88d0af0 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -490,9 +490,9 @@ uint64_t DIEHash::computeCUSignature(const DIE &Die) { Hash.final(Result); // ... take the least significant 8 bytes and return those. Our MD5 - // implementation always returns its results in little endian, swap bytes - // appropriately. - return support::endian::read64le(Result + 8); + // implementation always returns its results in little endian, so we actually + // need the "high" word. + return Result.high(); } /// This is based on the type signature computation given in section 7.27 of the @@ -514,7 +514,7 @@ uint64_t DIEHash::computeTypeSignature(const DIE &Die) { Hash.final(Result); // ... take the least significant 8 bytes and return those. Our MD5 - // implementation always returns its results in little endian, swap bytes - // appropriately. - return support::endian::read64le(Result + 8); + // implementation always returns its results in little endian, so we actually + // need the "high" word. + return Result.high(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 0b9bacd86e5..15e5be2e65d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -39,7 +39,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" @@ -1945,11 +1944,11 @@ uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) { MD5 Hash; Hash.update(Identifier); // ... take the least significant 8 bytes and return those. Our MD5 - // implementation always returns its results in little endian, swap bytes - // appropriately. + // implementation always returns its results in little endian, so we actually + // need the "high" word. MD5::MD5Result Result; Hash.final(Result); - return support::endian::read64le(Result + 8); + return Result.high(); } void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, diff --git a/lib/Support/MD5.cpp b/lib/Support/MD5.cpp index 809dbbce708..bdbf1d67793 100644 --- a/lib/Support/MD5.cpp +++ b/lib/Support/MD5.cpp @@ -261,10 +261,16 @@ void MD5::final(MD5Result &Result) { support::endian::write32le(&Result[12], d); } -void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) { +SmallString<32> MD5::MD5Result::digest() const { + SmallString<32> Str; raw_svector_ostream Res(Str); for (int i = 0; i < 16; ++i) - Res << format("%.2x", Result[i]); + Res << format("%.2x", Bytes[i]); + return Str; +} + +void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) { + Str = Result.digest(); } std::array MD5::hash(ArrayRef Data) { @@ -273,7 +279,5 @@ std::array MD5::hash(ArrayRef Data) { MD5::MD5Result Res; Hash.final(Res); - std::array Arr; - memcpy(Arr.data(), Res, sizeof(Res)); - return Arr; + return Res; } diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index ffb8ab22088..9fd6652ce4b 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -11,13 +11,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Path.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/COFF.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" +#include "llvm/Support/MachO.h" #include "llvm/Support/Process.h" #include #include @@ -924,6 +925,36 @@ std::error_code copy_file(const Twine &From, const Twine &To) { return std::error_code(); } +ErrorOr md5_contents(int FD) { + MD5 Hash; + + constexpr size_t BufSize = 4096; + std::vector Buf(BufSize); + int BytesRead = 0; + for (;;) { + BytesRead = read(FD, Buf.data(), BufSize); + if (BytesRead <= 0) + break; + Hash.update(makeArrayRef(Buf.data(), BytesRead)); + } + + if (BytesRead < 0) + return std::error_code(errno, std::generic_category()); + MD5::MD5Result Result; + Hash.final(Result); + return Result; +} + +ErrorOr md5_contents(const Twine &Path) { + int FD; + if (auto EC = openFileForRead(Path, FD)) + return EC; + + auto Result = md5_contents(FD); + close(FD); + return Result; +} + bool exists(file_status status) { return status_known(status) && status.type() != file_type::file_not_found; } diff --git a/unittests/Support/MD5Test.cpp b/unittests/Support/MD5Test.cpp index 4d790254503..fa9372fde33 100644 --- a/unittests/Support/MD5Test.cpp +++ b/unittests/Support/MD5Test.cpp @@ -63,8 +63,10 @@ TEST(MD5HashTest, MD5) { std::array Vec = MD5::hash(Input); MD5::MD5Result MD5Res; SmallString<32> Res; - memcpy(MD5Res, Vec.data(), Vec.size()); + memcpy(MD5Res.Bytes.data(), Vec.data(), Vec.size()); MD5::stringifyResult(MD5Res, Res); EXPECT_EQ(Res, "c3fcd3d76192e4007dfb496cca67e13b"); + EXPECT_EQ(0x3be167ca6c49fb7dULL, MD5Res.high()); + EXPECT_EQ(0x00e49261d7d3fcc3ULL, MD5Res.low()); } } diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp index edda1cd6d24..4883adef165 100644 --- a/unittests/Support/Path.cpp +++ b/unittests/Support/Path.cpp @@ -1011,6 +1011,20 @@ TEST_F(FileSystemTest, Resize) { ASSERT_NO_ERROR(fs::remove(TempPath)); } +TEST_F(FileSystemTest, MD5) { + int FD; + SmallString<64> TempPath; + ASSERT_NO_ERROR(fs::createTemporaryFile("prefix", "temp", FD, TempPath)); + StringRef Data("abcdefghijklmnopqrstuvwxyz"); + write(FD, Data.data(), Data.size()); + lseek(FD, 0, SEEK_SET); + auto Hash = fs::md5_contents(FD); + ::close(FD); + ASSERT_NO_ERROR(Hash.getError()); + + EXPECT_STREQ("c3fcd3d76192e4007dfb496cca67e13b", Hash->digest().c_str()); +} + TEST_F(FileSystemTest, FileMapping) { // Create a temp file. int FileDescriptor;