]> granicus.if.org Git - llvm/commitdiff
Minidump: Add support for reading/writing strings
authorPavel Labath <pavel@labath.sk>
Fri, 5 Apr 2019 08:06:26 +0000 (08:06 +0000)
committerPavel Labath <pavel@labath.sk>
Fri, 5 Apr 2019 08:06:26 +0000 (08:06 +0000)
Summary:
Strings in minidump files are stored as a 32-bit length field, giving
the length of the string in *bytes*, which is followed by the
appropriate number of UTF16 code units. The string is also supposed to
be null-terminated, and the null-terminator is not a part of the length
field. This patch:
- adds support for reading these strings out of the minidump file (this
  implementation does not depend on proper null-termination)
- adds support for writing them to a minidump file
- using the previous two pieces implements proper (de)serialization of
  the CSDVersion field of the SystemInfo stream. Previously, this was
  only read/written as hex, and no attempt was made to access the
  referenced string -- now this string is read and written correctly.

The changes are tested via yaml2obj|obj2yaml round-trip as well as a
unit test which checks the corner cases of the string deserialization
logic.

Reviewers: jhenderson, zturner, clayborg

Subscribers: llvm-commits, aprantl, markmentovai, amccarth, lldb-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D59775

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357749 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/Object/Minidump.h
include/llvm/ObjectYAML/MinidumpYAML.h
lib/Object/Minidump.cpp
lib/ObjectYAML/MinidumpYAML.cpp
test/tools/obj2yaml/basic-minidump.yaml
unittests/Object/MinidumpTest.cpp
unittests/ObjectYAML/MinidumpYAMLTest.cpp

index bb7d95bd546df398cc1e5f79ac8db84c730c11bd..63a43d5bd88668c7901c823e35bddedcd56918cf 100644 (file)
@@ -43,6 +43,10 @@ public:
   /// file does not contain a stream of this type.
   Optional<ArrayRef<uint8_t>> getRawStream(minidump::StreamType Type) const;
 
+  /// Returns the minidump string at the given offset. An error is returned if
+  /// we fail to parse the string, or the string is invalid UTF16.
+  Expected<std::string> getString(size_t Offset) const;
+
   /// Returns the contents of the SystemInfo stream, cast to the appropriate
   /// type. An error is returned if the file does not contain this stream, or
   /// the stream is smaller than the size of the SystemInfo structure. The
index 98c918b1e37b514fe111e997df2adeb2f4729305..0965d6484dc709dddbc52f219cbcd3c93853a0c5 100644 (file)
@@ -67,10 +67,12 @@ struct RawContentStream : public Stream {
 /// SystemInfo minidump stream.
 struct SystemInfoStream : public Stream {
   minidump::SystemInfo Info;
+  std::string CSDVersion;
 
-  explicit SystemInfoStream(const minidump::SystemInfo &Info)
+  explicit SystemInfoStream(const minidump::SystemInfo &Info,
+                            std::string CSDVersion)
       : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo),
-        Info(Info) {}
+        Info(Info), CSDVersion(std::move(CSDVersion)) {}
 
   SystemInfoStream()
       : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo) {
index d0d49b4e8d33d580328f8cb82b0c02f74bd4dbe5..3e3255a2f17cb663a1ea83a63718a95b07dccc0c 100644 (file)
@@ -8,6 +8,7 @@
 
 #include "llvm/Object/Minidump.h"
 #include "llvm/Object/Error.h"
+#include "llvm/Support/ConvertUTF.h"
 
 using namespace llvm;
 using namespace llvm::object;
@@ -21,6 +22,33 @@ MinidumpFile::getRawStream(minidump::StreamType Type) const {
   return None;
 }
 
+Expected<std::string> MinidumpFile::getString(size_t Offset) const {
+  // Minidump strings consist of a 32-bit length field, which gives the size of
+  // the string in *bytes*. This is followed by the actual string encoded in
+  // UTF16.
+  auto ExpectedSize =
+      getDataSliceAs<support::ulittle32_t>(getData(), Offset, 1);
+  if (!ExpectedSize)
+    return ExpectedSize.takeError();
+  size_t Size = (*ExpectedSize)[0];
+  if (Size % 2 != 0)
+    return createError("String size not even");
+  Size /= 2;
+  if (Size == 0)
+    return "";
+
+  Offset += sizeof(support::ulittle32_t);
+  auto ExpectedData = getDataSliceAs<UTF16>(getData(), Offset, Size);
+  if (!ExpectedData)
+    return ExpectedData.takeError();
+
+  std::string Result;
+  if (!convertUTF16ToUTF8String(*ExpectedData, Result))
+    return createError("String decoding failed");
+
+  return Result;
+}
+
 Expected<ArrayRef<uint8_t>>
 MinidumpFile::getDataSlice(ArrayRef<uint8_t> Data, size_t Offset, size_t Size) {
   // Check for overflow.
index a2e0b209da50c257b11747bfbadfbd5bfc0a8cb8..38d23d9dfc65dcd2bf9a579e7532eadb1253c380 100644 (file)
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ObjectYAML/MinidumpYAML.h"
+#include "llvm/Support/ConvertUTF.h"
 
 using namespace llvm;
 using namespace llvm::MinidumpYAML;
@@ -39,6 +40,8 @@ public:
     return allocateArray(makeArrayRef(Data));
   }
 
+  size_t allocateString(StringRef Str);
+
   void writeTo(raw_ostream &OS) const;
 
 private:
@@ -48,6 +51,26 @@ private:
 };
 } // namespace
 
+size_t BlobAllocator::allocateString(StringRef Str) {
+  SmallVector<UTF16, 32> WStr;
+  bool OK = convertUTF8ToUTF16String(Str, WStr);
+  assert(OK && "Invalid UTF8 in Str?");
+  (void)OK;
+
+  SmallVector<support::ulittle16_t, 32> EndianStr(WStr.size() + 1,
+                                                  support::ulittle16_t());
+  copy(WStr, EndianStr.begin());
+  return allocateCallback(
+      sizeof(uint32_t) + EndianStr.size() * sizeof(support::ulittle16_t),
+      [EndianStr](raw_ostream &OS) {
+        // Length does not include the null-terminator.
+        support::ulittle32_t Length(2 * (EndianStr.size() - 1));
+        OS.write(reinterpret_cast<const char *>(&Length), sizeof(Length));
+        OS.write(reinterpret_cast<const char *>(EndianStr.begin()),
+                 sizeof(support::ulittle16_t) * EndianStr.size());
+      });
+}
+
 void BlobAllocator::writeTo(raw_ostream &OS) const {
   size_t BeginOffset = OS.tell();
   for (const auto &Callback : Callbacks)
@@ -269,7 +292,7 @@ static void streamMapping(yaml::IO &IO, SystemInfoStream &Stream) {
   mapOptional(IO, "Minor Version", Info.MinorVersion, 0);
   mapOptional(IO, "Build Number", Info.BuildNumber, 0);
   IO.mapRequired("Platform ID", Info.PlatformId);
-  mapOptionalHex(IO, "CSD Version RVA", Info.CSDVersionRVA, 0);
+  IO.mapOptional("CSD Version", Stream.CSDVersion, "");
   mapOptionalHex(IO, "Suite Mask", Info.SuiteMask, 0);
   mapOptionalHex(IO, "Reserved", Info.Reserved, 0);
   switch (static_cast<ProcessorArchitecture>(Info.ProcessorArch)) {
@@ -337,6 +360,7 @@ static Directory layout(BlobAllocator &File, Stream &S) {
   Directory Result;
   Result.Type = S.Type;
   Result.Location.RVA = File.tell();
+  Optional<size_t> DataEnd;
   switch (S.Kind) {
   case Stream::StreamKind::RawContent: {
     RawContentStream &Raw = cast<RawContentStream>(S);
@@ -347,14 +371,22 @@ static Directory layout(BlobAllocator &File, Stream &S) {
     });
     break;
   }
-  case Stream::StreamKind::SystemInfo:
-    File.allocateObject(cast<SystemInfoStream>(S).Info);
+  case Stream::StreamKind::SystemInfo: {
+    SystemInfoStream &SystemInfo = cast<SystemInfoStream>(S);
+    File.allocateObject(SystemInfo.Info);
+    // The CSD string is not a part of the stream.
+    DataEnd = File.tell();
+    SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion);
     break;
+  }
   case Stream::StreamKind::TextContent:
     File.allocateArray(arrayRefFromStringRef(cast<TextContentStream>(S).Text));
     break;
   }
-  Result.Location.DataSize = File.tell() - Result.Location.RVA;
+  // If DataEnd is not set, we assume everything we generated is a part of the
+  // stream.
+  Result.Location.DataSize =
+      DataEnd.getValueOr(File.tell()) - Result.Location.RVA;
   return Result;
 }
 
@@ -395,7 +427,11 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
     auto ExpectedInfo = File.getSystemInfo();
     if (!ExpectedInfo)
       return ExpectedInfo.takeError();
-    return make_unique<SystemInfoStream>(*ExpectedInfo);
+    auto ExpectedCSDVersion = File.getString(ExpectedInfo->CSDVersionRVA);
+    if (!ExpectedCSDVersion)
+      return ExpectedInfo.takeError();
+    return make_unique<SystemInfoStream>(*ExpectedInfo,
+                                         std::move(*ExpectedCSDVersion));
   }
   case StreamKind::TextContent:
     return make_unique<TextContentStream>(
index ce2c1b197e10671956e436224eb67a44cd5f8b31..aebaafa88a9cb21b574ac11435a6bad963629e46 100644 (file)
@@ -5,7 +5,7 @@ Streams:
   - Type:            SystemInfo
     Processor Arch:  ARM64
     Platform ID:     Linux
-    CSD Version RVA: 0x01020304
+    CSD Version:     Linux 3.13.0-91-generic
     CPU:             
       CPUID:           0x05060708
   - Type:            LinuxAuxv
@@ -22,7 +22,7 @@ Streams:
 # CHECK-NEXT:   - Type:            SystemInfo
 # CHECK-NEXT:     Processor Arch:  ARM64
 # CHECK-NEXT:     Platform ID:     Linux
-# CHECK-NEXT:     CSD Version RVA: 0x01020304
+# CHECK-NEXT:     CSD Version:     Linux 3.13.0-91-generic
 # CHECK-NEXT:     CPU:             
 # CHECK-NEXT:       CPUID:           0x05060708
 # CHECK-NEXT:   - Type:            LinuxAuxv
index 97fde12c0f8cb47eecb7a075e458abfa02b064e7..06034e1e5adb4b30b91af1a6a1ce3fa78ef5e11b 100644 (file)
@@ -249,3 +249,38 @@ TEST(MinidumpFile, getSystemInfo) {
   EXPECT_EQ(0x08070605u, Info.CPU.X86.FeatureInfo);
   EXPECT_EQ(0x02010009u, Info.CPU.X86.AMDExtendedFeatures);
 }
+
+TEST(MinidumpFile, getString) {
+  std::vector<uint8_t> ManyStrings{
+      // Header
+      'M', 'D', 'M', 'P', 0x93, 0xa7, 0, 0, // Signature, Version
+      2, 0, 0, 0,                           // NumberOfStreams,
+      0x20, 0, 0, 0,                        // StreamDirectoryRVA
+      0, 1, 2, 3, 4, 5, 6, 7,               // Checksum, TimeDateStamp
+      8, 9, 0, 1, 2, 3, 4, 5,               // Flags
+                                            // Stream Directory
+      0, 0, 0, 0, 0, 0, 0, 0,               // Type, DataSize,
+      0x20, 0, 0, 0,                        // RVA
+      1, 0, 0, 0, 0, 0,                     // String1 - odd length
+      0, 0, 1, 0, 0, 0,                     // String2 - too long
+      2, 0, 0, 0, 0, 0xd8,                  // String3 - invalid utf16
+      0, 0, 0, 0, 0, 0,                     // String4 - ""
+      2, 0, 0, 0, 'a', 0,                   // String5 - "a"
+      0,                                    // Mis-align next string
+      2, 0, 0, 0, 'a', 0,                   // String6 - "a"
+
+  };
+  auto ExpectedFile = create(ManyStrings);
+  ASSERT_THAT_EXPECTED(ExpectedFile, Succeeded());
+  const MinidumpFile &File = **ExpectedFile;
+  EXPECT_THAT_EXPECTED(File.getString(44), Failed<BinaryError>());
+  EXPECT_THAT_EXPECTED(File.getString(50), Failed<BinaryError>());
+  EXPECT_THAT_EXPECTED(File.getString(56), Failed<BinaryError>());
+  EXPECT_THAT_EXPECTED(File.getString(62), HasValue(""));
+  EXPECT_THAT_EXPECTED(File.getString(68), HasValue("a"));
+  EXPECT_THAT_EXPECTED(File.getString(75), HasValue("a"));
+
+  // Check the case when the size field does not fit into the remaining data.
+  EXPECT_THAT_EXPECTED(File.getString(ManyStrings.size() - 2),
+                       Failed<BinaryError>());
+}
index d172ccf93011b3e9e7cce4b765f8ffac5fdaf8c7..66d44e0675964a71034ce9c08851536a50b54630 100644 (file)
@@ -33,7 +33,6 @@ Streams:
   - Type:            SystemInfo
     Processor Arch:  ARM64
     Platform ID:     Linux
-    CSD Version RVA: 0x01020304
     CPU:
       CPUID:           0x05060708
   - Type:            LinuxMaps
@@ -54,7 +53,6 @@ Streams:
   const SystemInfo &SysInfo = *ExpectedSysInfo;
   EXPECT_EQ(ProcessorArchitecture::ARM64, SysInfo.ProcessorArch);
   EXPECT_EQ(OSPlatform::Linux, SysInfo.PlatformId);
-  EXPECT_EQ(0x01020304u, SysInfo.CSDVersionRVA);
   EXPECT_EQ(0x05060708u, SysInfo.CPU.Arm.CPUID);
 
   EXPECT_EQ(StreamType::LinuxMaps, File.streams()[1].Type);