From: Kostya Serebryany Date: Wed, 21 Sep 2016 22:42:17 +0000 (+0000) Subject: [libFuzzer] add stats to the corpus; more refactoring X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e4977b2b7e12d4bcbc2e82fedd6955920f64e52b;p=llvm [libFuzzer] add stats to the corpus; more refactoring git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282121 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Fuzzer/FuzzerCorpus.h b/lib/Fuzzer/FuzzerCorpus.h index d42e7be475b..5a8581f8200 100644 --- a/lib/Fuzzer/FuzzerCorpus.h +++ b/lib/Fuzzer/FuzzerCorpus.h @@ -13,6 +13,7 @@ #define LLVM_FUZZER_CORPUS #include +#include #include "FuzzerDefs.h" #include "FuzzerRandom.h" @@ -22,6 +23,9 @@ namespace fuzzer { struct InputInfo { Unit U; // The actual input data. uint8_t Sha1[kSHA1NumBytes]; // Checksum. + // Stats. + uintptr_t NumExecutedMutations = 0; + uintptr_t NumSuccessfullMutations = 0; }; class InputCorpus { @@ -32,11 +36,7 @@ class InputCorpus { size_t size() const { return Inputs.size(); } bool empty() const { return Inputs.empty(); } const Unit &operator[] (size_t Idx) const { return Inputs[Idx].U; } - void Append(const std::vector &V) { - for (auto &U : V) - push_back(U); - } - void push_back(const Unit &U) { + void AddToCorpus(const Unit &U) { auto H = Hash(U); if (!Hashes.insert(H).second) return; InputInfo II; @@ -51,7 +51,7 @@ class InputCorpus { ConstIter end() const { return Inputs.end(); } bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } - const InputInfo &ChooseUnitToMutate(Random &Rand) { + InputInfo &ChooseUnitToMutate(Random &Rand) { return Inputs[ChooseUnitIdxToMutate(Rand)]; }; @@ -59,12 +59,20 @@ class InputCorpus { // Hypothesis: units added to the corpus last are more likely to be // interesting. This function gives more weight to the more recent units. size_t ChooseUnitIdxToMutate(Random &Rand) { - size_t Idx = - static_cast(CorpusDistribution(Rand.Get_mt19937())); + size_t Idx = static_cast(CorpusDistribution(Rand.Get_mt19937())); assert(Idx < Inputs.size()); return Idx; } + void PrintStats() { + for (size_t i = 0; i < Inputs.size(); i++) { + const auto &II = Inputs[i]; + Printf(" [%zd %s]\tsz: %zd\truns: %zd\tsucc: %zd\n", i, + Sha1ToString(II.Sha1).c_str(), II.U.size(), + II.NumExecutedMutations, II.NumSuccessfullMutations); + } + } + private: // Updates the probability distribution for the units in the corpus. diff --git a/lib/Fuzzer/FuzzerDefs.h b/lib/Fuzzer/FuzzerDefs.h index b871771fab2..79a6f0111cb 100644 --- a/lib/Fuzzer/FuzzerDefs.h +++ b/lib/Fuzzer/FuzzerDefs.h @@ -43,6 +43,7 @@ class DictionaryEntry; class MutationDispatcher; struct FuzzingOptions; class InputCorpus; +struct InputInfo; struct ExternalFunctions; // Global interface to functions that may or may not be available. @@ -92,7 +93,7 @@ size_t GetPeakRSSMb(); static const int kSHA1NumBytes = 20; // Computes SHA1 hash of 'Len' bytes in 'Data', writes kSHA1NumBytes to 'Out'. void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out); -std::string Sha1ToString(uint8_t Sha1[kSHA1NumBytes]); +std::string Sha1ToString(const uint8_t Sha1[kSHA1NumBytes]); // Changes U to contain only ASCII (isprint+isspace) characters. // Returns true iff U has been changed. diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp index 253aa8e7108..99cc3682464 100644 --- a/lib/Fuzzer/FuzzerDriver.cpp +++ b/lib/Fuzzer/FuzzerDriver.cpp @@ -9,6 +9,7 @@ // FuzzerDriver and flag parsing. //===----------------------------------------------------------------------===// +#include "FuzzerCorpus.h" #include "FuzzerInterface.h" #include "FuzzerInternal.h" #include "FuzzerMutate.h" @@ -335,7 +336,7 @@ int MinimizeCrashInput(const std::vector &Args) { return 0; } -int MinimizeCrashInputInternalStep(Fuzzer *F) { +int MinimizeCrashInputInternalStep(Fuzzer *F, InputCorpus *Corpus) { assert(Inputs->size() == 1); std::string InputFilePath = Inputs->at(0); Unit U = FileToVector(InputFilePath); @@ -345,7 +346,7 @@ int MinimizeCrashInputInternalStep(Fuzzer *F) { for (size_t I = 0; I < U.size(); I++) { std::copy(U.begin(), U.begin() + I, X.begin()); std::copy(U.begin() + I + 1, U.end(), X.begin() + I); - F->AddToCorpus(X); + Corpus->AddToCorpus(X); } F->SetMaxLen(U.size() - 1); F->Loop(); @@ -427,6 +428,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { !DoPlainRun || Flags.minimize_crash_internal_step; Options.PrintNewCovPcs = Flags.print_pcs; Options.PrintFinalStats = Flags.print_final_stats; + Options.PrintCorpusStats = Flags.print_corpus_stats; Options.PrintCoverage = Flags.print_coverage; Options.PruneCorpus = Flags.prune_corpus; @@ -443,7 +445,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Random Rand(Seed); MutationDispatcher MD(Rand, Options); - Fuzzer F(Callback, MD, Options); + InputCorpus Corpus; + Fuzzer F(Callback, Corpus, MD, Options); for (auto &U: Dictionary) if (U.size() <= Word::GetMaxSize()) @@ -463,7 +466,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { if (Flags.handle_term) SetSigTermHandler(); if (Flags.minimize_crash_internal_step) - return MinimizeCrashInputInternalStep(&F); + return MinimizeCrashInputInternalStep(&F, &Corpus); if (DoPlainRun) { Options.SaveArtifacts = false; diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def index fe488228a34..ff6f1c08acc 100644 --- a/lib/Fuzzer/FuzzerFlags.def +++ b/lib/Fuzzer/FuzzerFlags.def @@ -74,6 +74,8 @@ FUZZER_FLAG_STRING(exact_artifact_path, FUZZER_FLAG_INT(output_csv, 0, "Enable pulse output in CSV format.") FUZZER_FLAG_INT(print_pcs, 0, "If 1, print out newly covered PCs.") FUZZER_FLAG_INT(print_final_stats, 0, "If 1, print statistics at exit.") +FUZZER_FLAG_INT(print_corpus_stats, 0, + "If 1, print statistics on corpus elements at exit.") FUZZER_FLAG_INT(print_coverage, 0, "If 1, print coverage information at exit." " Experimental, only with trace-pc-guard") FUZZER_FLAG_INT(handle_segv, 1, "If 1, try to intercept SIGSEGV.") diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h index bc3f61a6e85..3142aac1187 100644 --- a/lib/Fuzzer/FuzzerInternal.h +++ b/lib/Fuzzer/FuzzerInternal.h @@ -18,14 +18,12 @@ #include #include #include -#include #include "FuzzerDefs.h" #include "FuzzerExtFunctions.h" #include "FuzzerInterface.h" #include "FuzzerOptions.h" #include "FuzzerValueBitMap.h" -#include "FuzzerCorpus.h" namespace fuzzer { @@ -64,15 +62,13 @@ public: size_t VPMapBits; }; - Fuzzer(UserCallback CB, MutationDispatcher &MD, FuzzingOptions Options); + Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD, + FuzzingOptions Options); ~Fuzzer(); - void AddToCorpus(const Unit &U) { Corpus.push_back(U); } void Loop(); void ShuffleAndMinimize(UnitVector *V); void InitializeTraceState(); void AssignTaintLabels(uint8_t *Data, size_t Size); - size_t CorpusSize() const { return Corpus.size(); } - void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize); void RereadOutputCorpus(size_t MaxSize); size_t secondsSinceProcessStartUp() { @@ -113,11 +109,10 @@ private: void CrashCallback(); void InterruptCallback(); void MutateAndTestOne(); - void ReportNewCoverage(const Unit &U); + void ReportNewCoverage(InputInfo *II, const Unit &U); void PrintNewPCs(); void PrintOneNewPC(uintptr_t PC); bool RunOne(const Unit &U) { return RunOne(U.data(), U.size()); } - void RunOneAndUpdateCorpus(const uint8_t *Data, size_t Size); void WriteToOutputCorpus(const Unit &U); void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); void PrintStats(const char *Where, const char *End = "\n"); @@ -158,11 +153,11 @@ private: bool HasMoreMallocsThanFrees = false; size_t NumberOfLeakDetectionAttempts = 0; - InputCorpus Corpus; - UserCallback CB; + InputCorpus &Corpus; MutationDispatcher &MD; FuzzingOptions Options; + system_clock::time_point ProcessStartTime = system_clock::now(); system_clock::time_point UnitStartTime; long TimeOfLongestUnitInSeconds = 0; diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index 0edc21db3ac..78063085726 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "FuzzerInternal.h" +#include "FuzzerCorpus.h" #include "FuzzerMutate.h" #include "FuzzerTracePC.h" #include "FuzzerRandom.h" @@ -157,8 +158,9 @@ void FreeHook(const volatile void *ptr) { AllocTracer.Frees++; } -Fuzzer::Fuzzer(UserCallback CB, MutationDispatcher &MD, FuzzingOptions Options) - : CB(CB), MD(MD), Options(Options) { +Fuzzer::Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD, + FuzzingOptions Options) + : CB(CB), Corpus(Corpus), MD(MD), Options(Options) { SetDeathCallback(); InitializeTraceState(); assert(!F); @@ -337,6 +339,8 @@ void Fuzzer::PrintStats(const char *Where, const char *End) { void Fuzzer::PrintFinalStats() { if (Options.PrintCoverage) TPC.PrintCoverage(); + if (Options.PrintCorpusStats) + Corpus.PrintStats(); if (!Options.PrintFinalStats) return; size_t ExecPerSec = execPerSec(); Printf("stat::number_of_executed_units: %zd\n", TotalNumberOfRuns); @@ -353,14 +357,6 @@ void Fuzzer::SetMaxLen(size_t MaxLen) { Printf("INFO: -max_len is not provided, using %zd\n", Options.MaxLen); } -void Fuzzer::ReadDir(const std::string &Path, long *Epoch, size_t MaxSize) { - Printf("Loading corpus: %s\n", Path.c_str()); - std::vector V; - ReadDirToVectorOfUnits(Path.c_str(), &V, Epoch, MaxSize); - for (auto &U : V) - Corpus.push_back(U); -} - void Fuzzer::RereadOutputCorpus(size_t MaxSize) { if (Options.OutputCorpus.empty() || !Options.Reload) return; std::vector AdditionalCorpus; @@ -373,7 +369,7 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) { X.resize(MaxSize); if (!Corpus.HasUnit(X)) { if (RunOne(X)) { - Corpus.push_back(X); + Corpus.AddToCorpus(X); PrintStats("RELOAD"); } } @@ -396,7 +392,7 @@ void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) { for (const auto &U : *InitialCorpus) { bool NewCoverage = RunOne(U); if (!Options.PruneCorpus || NewCoverage) { - Corpus.push_back(U); + Corpus.AddToCorpus(U); if (Options.Verbosity >= 2) Printf("NEW0: %zd L %zd\n", MaxCoverage.BlockCoverage, U.size()); } @@ -439,13 +435,6 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size) { return Res; } -void Fuzzer::RunOneAndUpdateCorpus(const uint8_t *Data, size_t Size) { - if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) - return; - if (RunOne(Data, Size)) - ReportNewCoverage({Data, Data + Size}); -} - size_t Fuzzer::GetCurrentUnitInFuzzingThead(const uint8_t **Data) const { assert(InFuzzingThread()); *Data = CurrentUnitData; @@ -539,8 +528,9 @@ void Fuzzer::PrintNewPCs() { PrintOneNewPC(PCs[i]); } -void Fuzzer::ReportNewCoverage(const Unit &U) { - Corpus.push_back(U); +void Fuzzer::ReportNewCoverage(InputInfo *II, const Unit &U) { + II->NumSuccessfullMutations++; + Corpus.AddToCorpus(U); MD.RecordSuccessfulMutationSequence(); PrintStatusForNewUnit(U); WriteToOutputCorpus(U); @@ -653,7 +643,7 @@ void Fuzzer::MutateAndTestOne() { LazyAllocateCurrentUnitData(); MD.StartMutationSequence(); - const auto &II = Corpus.ChooseUnitToMutate(MD.GetRand()); + auto &II = Corpus.ChooseUnitToMutate(MD.GetRand()); const auto &U = II.U; memcpy(BaseSha1, II.Sha1, sizeof(BaseSha1)); assert(CurrentUnitData); @@ -662,6 +652,8 @@ void Fuzzer::MutateAndTestOne() { memcpy(CurrentUnitData, U.data(), Size); for (int i = 0; i < Options.MutateDepth; i++) { + if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) + break; size_t NewSize = 0; NewSize = MD.Mutate(CurrentUnitData, Size, Options.MaxLen); assert(NewSize > 0 && "Mutator returned empty unit"); @@ -669,7 +661,9 @@ void Fuzzer::MutateAndTestOne() { Size = NewSize; if (i == 0) StartTraceRecording(); - RunOneAndUpdateCorpus(CurrentUnitData, Size); + II.NumExecutedMutations++; + if (RunOne(CurrentUnitData, Size)) + ReportNewCoverage(&II, {CurrentUnitData, CurrentUnitData + Size}); StopTraceRecording(); TryDetectingAMemoryLeak(CurrentUnitData, Size, /*DuringInitialCorpusExecution*/ false); diff --git a/lib/Fuzzer/FuzzerMutate.cpp b/lib/Fuzzer/FuzzerMutate.cpp index b243aa653c3..69ef9640d77 100644 --- a/lib/Fuzzer/FuzzerMutate.cpp +++ b/lib/Fuzzer/FuzzerMutate.cpp @@ -10,7 +10,6 @@ //===----------------------------------------------------------------------===// #include -#include #include "FuzzerCorpus.h" #include "FuzzerDefs.h" diff --git a/lib/Fuzzer/FuzzerOptions.h b/lib/Fuzzer/FuzzerOptions.h index fc8141caca9..2c0970647e1 100644 --- a/lib/Fuzzer/FuzzerOptions.h +++ b/lib/Fuzzer/FuzzerOptions.h @@ -45,6 +45,7 @@ struct FuzzingOptions { bool OutputCSV = false; bool PrintNewCovPcs = false; bool PrintFinalStats = false; + bool PrintCorpusStats = false; bool PrintCoverage = false; bool DetectLeaks = true; bool PruneCorpus = true; diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp index e4d8352ee82..a21570c13fd 100644 --- a/lib/Fuzzer/FuzzerUtil.cpp +++ b/lib/Fuzzer/FuzzerUtil.cpp @@ -59,7 +59,7 @@ void PrintASCII(const Unit &U, const char *PrintAfter) { PrintASCII(U.data(), U.size(), PrintAfter); } -std::string Sha1ToString(uint8_t Sha1[kSHA1NumBytes]) { +std::string Sha1ToString(const uint8_t Sha1[kSHA1NumBytes]) { std::stringstream SS; for (int i = 0; i < kSHA1NumBytes; i++) SS << std::hex << std::setfill('0') << std::setw(2) << (unsigned)Sha1[i]; diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp index 4bad901ac59..fdde1d3fbb9 100644 --- a/lib/Fuzzer/test/FuzzerUnittest.cpp +++ b/lib/Fuzzer/test/FuzzerUnittest.cpp @@ -5,6 +5,7 @@ // with ASan) involving C++ standard library types when using libcxx. #define _LIBCPP_HAS_NO_ASAN +#include "FuzzerCorpus.h" #include "FuzzerInternal.h" #include "FuzzerDictionary.h" #include "FuzzerMutate.h" @@ -582,7 +583,7 @@ TEST(Corpus, Distribution) { size_t N = 10; size_t TriesPerUnit = 1<<20; for (size_t i = 0; i < N; i++) - C.push_back(Unit{ static_cast(i) }); + C.AddToCorpus(Unit{ static_cast(i) }); std::vector Hist(N); for (size_t i = 0; i < N * TriesPerUnit; i++) {