//===- FuzzerInternal.h - Internal header for the Fuzzer --------*- C++ -* ===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // Define the main class fuzzer::Fuzzer and most functions. //===----------------------------------------------------------------------===// #ifndef LLVM_FUZZER_INTERNAL_H #define LLVM_FUZZER_INTERNAL_H #include #include #include #include #include #include #include #include #include #include #include #include // Platform detection. #ifdef __linux__ #define LIBFUZZER_LINUX 1 #define LIBFUZZER_APPLE 0 #elif __APPLE__ #define LIBFUZZER_LINUX 0 #define LIBFUZZER_APPLE 1 #else #error "Support for your platform has not been implemented" #endif #ifdef __x86_64 #define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt"))) #else #define ATTRIBUTE_TARGET_POPCNT #endif #include "FuzzerExtFunctions.h" #include "FuzzerInterface.h" #include "FuzzerValueBitMap.h" namespace fuzzer { typedef int (*UserCallback)(const uint8_t *Data, size_t Size); int FuzzerDriver(int *argc, char ***argv, UserCallback Callback); using namespace std::chrono; typedef std::vector Unit; typedef std::vector UnitVector; // A simple POD sized array of bytes. template class FixedWord { public: FixedWord() {} FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); } void Set(const uint8_t *B, uint8_t S) { assert(S <= kMaxSize); memcpy(Data, B, S); Size = S; } bool operator==(const FixedWord &w) const { return Size == w.Size && 0 == memcmp(Data, w.Data, Size); } bool operator<(const FixedWord &w) const { if (Size != w.Size) return Size < w.Size; return memcmp(Data, w.Data, Size) < 0; } static size_t GetMaxSize() { return kMaxSize; } const uint8_t *data() const { return Data; } uint8_t size() const { return Size; } private: uint8_t Size = 0; uint8_t Data[kMaxSize]; }; typedef FixedWord<27> Word; // 28 bytes. bool IsFile(const std::string &Path); long GetEpoch(const std::string &Path); std::string FileToString(const std::string &Path); Unit FileToVector(const std::string &Path, size_t MaxSize = 0); void ReadDirToVectorOfUnits(const char *Path, std::vector *V, long *Epoch, size_t MaxSize); void WriteToFile(const Unit &U, const std::string &Path); void CopyFileToErr(const std::string &Path); // Returns "Dir/FileName" or equivalent for the current OS. std::string DirPlusFile(const std::string &DirPath, const std::string &FileName); void DupAndCloseStderr(); void CloseStdout(); void Printf(const char *Fmt, ...); void PrintHexArray(const Unit &U, const char *PrintAfter = ""); void PrintHexArray(const uint8_t *Data, size_t Size, const char *PrintAfter = ""); void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter = ""); void PrintASCII(const Unit &U, const char *PrintAfter = ""); void PrintASCII(const Word &W, const char *PrintAfter = ""); void PrintPC(const char *SymbolizedFMT, const char *FallbackFMT, uintptr_t PC); std::string Hash(const Unit &U); void SetTimer(int Seconds); void SetSigSegvHandler(); void SetSigBusHandler(); void SetSigAbrtHandler(); void SetSigIllHandler(); void SetSigFpeHandler(); void SetSigIntHandler(); void SetSigTermHandler(); std::string Base64(const Unit &U); int ExecuteCommand(const std::string &Command); size_t GetPeakRSSMb(); // Private copy of SHA1 implementation. static const int kSHA1NumBytes = 20; // Computes SHA1 hash of 'Len' bytes in 'Data', writes kSHA1NumBytes to 'Out'. void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out); std::string Sha1ToString(uint8_t Sha1[kSHA1NumBytes]); // Changes U to contain only ASCII (isprint+isspace) characters. // Returns true iff U has been changed. bool ToASCII(uint8_t *Data, size_t Size); bool IsASCII(const Unit &U); bool IsASCII(const uint8_t *Data, size_t Size); int NumberOfCpuCores(); int GetPid(); void SleepSeconds(int Seconds); // See FuzzerTraceState.cpp void EnableValueProfile(); size_t VPMapMergeFromCurrent(ValueBitMap &M); class Random { public: Random(unsigned int seed) : R(seed) {} size_t Rand() { return R(); } size_t RandBool() { return Rand() % 2; } size_t operator()(size_t n) { return n ? Rand() % n : 0; } std::mt19937 &Get_mt19937() { return R; } private: std::mt19937 R; }; // Dictionary. // Parses one dictionary entry. // If successfull, write the enty to Unit and returns true, // otherwise returns false. bool ParseOneDictionaryEntry(const std::string &Str, Unit *U); // Parses the dictionary file, fills Units, returns true iff all lines // were parsed succesfully. bool ParseDictionaryFile(const std::string &Text, std::vector *Units); class DictionaryEntry { public: DictionaryEntry() {} DictionaryEntry(Word W) : W(W) {} DictionaryEntry(Word W, size_t PositionHint) : W(W), PositionHint(PositionHint) {} const Word &GetW() const { return W; } bool HasPositionHint() const { return PositionHint != std::numeric_limits::max(); } size_t GetPositionHint() const { assert(HasPositionHint()); return PositionHint; } void IncUseCount() { UseCount++; } void IncSuccessCount() { SuccessCount++; } size_t GetUseCount() const { return UseCount; } size_t GetSuccessCount() const {return SuccessCount; } private: Word W; size_t PositionHint = std::numeric_limits::max(); size_t UseCount = 0; size_t SuccessCount = 0; }; class Dictionary { public: static const size_t kMaxDictSize = 1 << 14; bool ContainsWord(const Word &W) const { return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) { return DE.GetW() == W; }); } const DictionaryEntry *begin() const { return &DE[0]; } const DictionaryEntry *end() const { return begin() + Size; } DictionaryEntry & operator[] (size_t Idx) { assert(Idx < Size); return DE[Idx]; } void push_back(DictionaryEntry DE) { if (Size < kMaxDictSize) this->DE[Size++] = DE; } void clear() { Size = 0; } bool empty() const { return Size == 0; } size_t size() const { return Size; } private: DictionaryEntry DE[kMaxDictSize]; size_t Size = 0; }; struct FuzzingOptions { int Verbosity = 1; size_t MaxLen = 0; int UnitTimeoutSec = 300; int TimeoutExitCode = 77; int ErrorExitCode = 77; int MaxTotalTimeSec = 0; int RssLimitMb = 0; bool DoCrossOver = true; int MutateDepth = 5; bool UseCounters = false; bool UseIndirCalls = true; bool UseMemcmp = true; bool UseMemmem = true; bool UseFullCoverageSet = false; bool Reload = true; bool ShuffleAtStartUp = true; bool PreferSmall = true; size_t MaxNumberOfRuns = ULONG_MAX; int ReportSlowUnits = 10; bool OnlyASCII = false; std::string OutputCorpus; std::string ArtifactPrefix = "./"; std::string ExactArtifactPath; bool SaveArtifacts = true; bool PrintNEW = true; // Print a status line when new units are found; bool OutputCSV = false; bool PrintNewCovPcs = false; bool PrintFinalStats = false; bool PrintCoverage = false; bool DetectLeaks = true; bool PruneCorpus = true; }; struct InputInfo { Unit U; // The actual input data. }; class InputCorpus { public: InputCorpus() { Corpus.reserve(1 << 14); // Avoid too many resizes. } size_t size() const { return Corpus.size(); } bool empty() const { return Corpus.empty(); } const Unit &operator[] (size_t Idx) const { return Corpus[Idx].U; } void Append(const std::vector &V) { for (auto &U : V) push_back(U); } void push_back(const Unit &U) { auto H = Hash(U); if (!Hashes.insert(H).second) return; InputInfo II; II.U = U; Corpus.push_back(II); } typedef const std::vector::const_iterator ConstIter; ConstIter begin() const { return Corpus.begin(); } ConstIter end() const { return Corpus.end(); } bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } private: std::unordered_set Hashes; std::vector Corpus; }; class MutationDispatcher { public: MutationDispatcher(Random &Rand, const FuzzingOptions &Options); ~MutationDispatcher() {} /// Indicate that we are about to start a new sequence of mutations. void StartMutationSequence(); /// Print the current sequence of mutations. void PrintMutationSequence(); /// Indicate that the current sequence of mutations was successfull. void RecordSuccessfulMutationSequence(); /// Mutates data by invoking user-provided mutator. size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by invoking user-provided crossover. size_t Mutate_CustomCrossOver(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by shuffling bytes. size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by erasing bytes. size_t Mutate_EraseBytes(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by inserting a byte. size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by inserting several repeated bytes. size_t Mutate_InsertRepeatedBytes(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by chanding one byte. size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by chanding one bit. size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by copying/inserting a part of data into a different place. size_t Mutate_CopyPart(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by adding a word from the manual dictionary. size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by adding a word from the temporary automatic dictionary. size_t Mutate_AddWordFromTemporaryAutoDictionary(uint8_t *Data, size_t Size, size_t MaxSize); /// Mutates data by adding a word from the persistent automatic dictionary. size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size, size_t MaxSize); /// Tries to find an ASCII integer in Data, changes it to another ASCII int. size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize); /// Change a 1-, 2-, 4-, or 8-byte integer in interesting ways. size_t Mutate_ChangeBinaryInteger(uint8_t *Data, size_t Size, size_t MaxSize); /// CrossOver Data with some other element of the corpus. size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize); /// Applies one of the configured mutations. /// Returns the new size of data which could be up to MaxSize. size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); /// Applies one of the default mutations. Provided as a service /// to mutation authors. size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize); /// Creates a cross-over of two pieces of Data, returns its size. size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, size_t Size2, uint8_t *Out, size_t MaxOutSize); void AddWordToManualDictionary(const Word &W); void AddWordToAutoDictionary(DictionaryEntry DE); void ClearAutoDictionary(); void PrintRecommendedDictionary(); void SetCorpus(const InputCorpus *Corpus) { this->Corpus = Corpus; } Random &GetRand() { return Rand; } private: struct Mutator { size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max); const char *Name; }; size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size, size_t MaxSize); size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize, const std::vector &Mutators); size_t InsertPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, size_t ToSize, size_t MaxToSize); size_t CopyPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, size_t ToSize); Random &Rand; const FuzzingOptions Options; // Dictionary provided by the user via -dict=DICT_FILE. Dictionary ManualDictionary; // Temporary dictionary modified by the fuzzer itself, // recreated periodically. Dictionary TempAutoDictionary; // Persistent dictionary modified by the fuzzer, consists of // entries that led to successfull discoveries in the past mutations. Dictionary PersistentAutoDictionary; std::vector CurrentMutatorSequence; std::vector CurrentDictionaryEntrySequence; const InputCorpus *Corpus = nullptr; std::vector MutateInPlaceHere; std::vector Mutators; std::vector DefaultMutators; }; // See TracePC.cpp class TracePC { public: void HandleTrace(uintptr_t *guard, uintptr_t PC); void HandleInit(uintptr_t *start, uintptr_t *stop); void HandleCallerCallee(uintptr_t Caller, uintptr_t Callee); size_t GetTotalCoverage() { return TotalCoverage; } void SetUseCounters(bool UC) { UseCounters = UC; } size_t UpdateCounterMap(ValueBitMap *Map); void FinalizeTrace(); size_t GetNewPCsAndFlush(uintptr_t **NewPCsPtr = nullptr) { if (NewPCsPtr) *NewPCsPtr = NewPCs; size_t Res = NumNewPCs; NumNewPCs = 0; return Res; } void Reset() { TotalCoverage = 0; TotalCounterBits = 0; NumNewPCs = 0; CounterMap.Reset(); TotalCoverageMap.Reset(); ResetGuards(); } void PrintModuleInfo(); void PrintCoverage(); private: bool UseCounters = false; size_t TotalCoverage = 0; size_t TotalCounterBits = 0; static const size_t kMaxNewPCs = 64; uintptr_t NewPCs[kMaxNewPCs]; size_t NumNewPCs = 0; void AddNewPC(uintptr_t PC) { NewPCs[(NumNewPCs++) % kMaxNewPCs] = PC; } void ResetGuards(); struct Module { uintptr_t *Start, *Stop; }; Module Modules[4096]; size_t NumModules = 0; size_t NumGuards = 0; static const size_t kNumCounters = 1 << 14; uint8_t Counters[kNumCounters]; static const size_t kNumPCs = 1 << 20; uintptr_t PCs[kNumPCs]; ValueBitMap CounterMap; ValueBitMap TotalCoverageMap; }; extern TracePC TPC; class Fuzzer { public: // Aggregates all available coverage measurements. struct Coverage { Coverage() { Reset(); } void Reset() { BlockCoverage = 0; CallerCalleeCoverage = 0; CounterBitmapBits = 0; CounterBitmap.clear(); VPMap.Reset(); TPCMap.Reset(); VPMapBits = 0; } std::string DebugString() const; size_t BlockCoverage; size_t CallerCalleeCoverage; // Precalculated number of bits in CounterBitmap. size_t CounterBitmapBits; std::vector CounterBitmap; ValueBitMap TPCMap; ValueBitMap VPMap; size_t VPMapBits; }; Fuzzer(UserCallback CB, MutationDispatcher &MD, FuzzingOptions Options); ~Fuzzer(); void AddToCorpus(const Unit &U) { Corpus.push_back(U); UpdateCorpusDistribution(); } size_t ChooseUnitIdxToMutate(); const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; }; void Loop(); void ShuffleAndMinimize(UnitVector *V); void InitializeTraceState(); void AssignTaintLabels(uint8_t *Data, size_t Size); size_t CorpusSize() const { return Corpus.size(); } void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize); void RereadOutputCorpus(size_t MaxSize); size_t secondsSinceProcessStartUp() { return duration_cast(system_clock::now() - ProcessStartTime) .count(); } size_t execPerSec() { size_t Seconds = secondsSinceProcessStartUp(); return Seconds ? TotalNumberOfRuns / Seconds : 0; } size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; } static void StaticAlarmCallback(); static void StaticCrashSignalCallback(); static void StaticInterruptCallback(); void ExecuteCallback(const uint8_t *Data, size_t Size); bool RunOne(const uint8_t *Data, size_t Size); // Merge Corpora[1:] into Corpora[0]. void Merge(const std::vector &Corpora); // Returns a subset of 'Extra' that adds coverage to 'Initial'. UnitVector FindExtraUnits(const UnitVector &Initial, const UnitVector &Extra); MutationDispatcher &GetMD() { return MD; } void PrintFinalStats(); void SetMaxLen(size_t MaxLen); void RssLimitCallback(); // Public for tests. void ResetCoverage(); bool InFuzzingThread() const { return IsMyThread; } size_t GetCurrentUnitInFuzzingThead(const uint8_t **Data) const; private: void AlarmCallback(); void CrashCallback(); void InterruptCallback(); void MutateAndTestOne(); void ReportNewCoverage(const Unit &U); void PrintNewPCs(); void PrintOneNewPC(uintptr_t PC); bool RunOne(const Unit &U) { return RunOne(U.data(), U.size()); } void RunOneAndUpdateCorpus(const uint8_t *Data, size_t Size); void WriteToOutputCorpus(const Unit &U); void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); void PrintStats(const char *Where, const char *End = "\n"); void PrintStatusForNewUnit(const Unit &U); void ShuffleCorpus(UnitVector *V); void TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size, bool DuringInitialCorpusExecution); // Updates the probability distribution for the units in the corpus. // Must be called whenever the corpus or unit weights are changed. void UpdateCorpusDistribution(); bool UpdateMaxCoverage(); // Trace-based fuzzing: we run a unit with some kind of tracing // enabled and record potentially useful mutations. Then // We apply these mutations one by one to the unit and run it again. // Start tracing; forget all previously proposed mutations. void StartTraceRecording(); // Stop tracing. void StopTraceRecording(); void SetDeathCallback(); static void StaticDeathCallback(); void DumpCurrentUnit(const char *Prefix); void DeathCallback(); void ResetEdgeCoverage(); void ResetCounters(); void PrepareCounters(Fuzzer::Coverage *C); bool RecordMaxCoverage(Fuzzer::Coverage *C); void LazyAllocateCurrentUnitData(); uint8_t *CurrentUnitData = nullptr; std::atomic CurrentUnitSize; uint8_t BaseSha1[kSHA1NumBytes]; // Checksum of the base unit. size_t TotalNumberOfRuns = 0; size_t NumberOfNewUnitsAdded = 0; bool HasMoreMallocsThanFrees = false; size_t NumberOfLeakDetectionAttempts = 0; InputCorpus Corpus; std::piecewise_constant_distribution CorpusDistribution; UserCallback CB; MutationDispatcher &MD; FuzzingOptions Options; system_clock::time_point ProcessStartTime = system_clock::now(); system_clock::time_point UnitStartTime; long TimeOfLongestUnitInSeconds = 0; long EpochOfLastReadOfOutputCorpus = 0; // Maximum recorded coverage. Coverage MaxCoverage; // For -print_pcs uintptr_t* PcBuffer = nullptr; size_t PcBufferLen = 0; size_t PcBufferPos = 0, PrevPcBufferPos = 0; // Need to know our own thread. static thread_local bool IsMyThread; bool InMergeMode = false; }; // Global interface to functions that may or may not be available. extern ExternalFunctions *EF; }; // namespace fuzzer #endif // LLVM_FUZZER_INTERNAL_H