mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-23 04:28:40 +00:00
1a3900e81e
In an effort to get libfuzzer working on Windows, we need to make a distinction between what functions require platform specific code (e.g. different code on Windows vs Linux) and what code doesn't. IO functions, for example, tend to be platform specific. This patch separates out some of the functions which will need to have platform specific implementations into different headers, so that we can then provide different implementations for each platform. Aside from that, this patch contains no functional change. It is purely a re-organization. Patch by Marcos Pividori Differential Revision: https://reviews.llvm.org/D27230 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288264 91177308-0d34-0410-b5e6-96231b3b80d8
218 lines
6.6 KiB
C++
218 lines
6.6 KiB
C++
//===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
// fuzzer::InputCorpus
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_FUZZER_CORPUS
|
|
#define LLVM_FUZZER_CORPUS
|
|
|
|
#include <random>
|
|
#include <unordered_set>
|
|
|
|
#include "FuzzerDefs.h"
|
|
#include "FuzzerIO.h"
|
|
#include "FuzzerRandom.h"
|
|
#include "FuzzerSHA1.h"
|
|
#include "FuzzerTracePC.h"
|
|
|
|
namespace fuzzer {
|
|
|
|
struct InputInfo {
|
|
Unit U; // The actual input data.
|
|
uint8_t Sha1[kSHA1NumBytes]; // Checksum.
|
|
// Number of features that this input has and no smaller input has.
|
|
size_t NumFeatures = 0;
|
|
size_t Tmp = 0; // Used by ValidateFeatureSet.
|
|
// Stats.
|
|
size_t NumExecutedMutations = 0;
|
|
size_t NumSuccessfullMutations = 0;
|
|
bool MayDeleteFile = false;
|
|
};
|
|
|
|
class InputCorpus {
|
|
public:
|
|
static const size_t kFeatureSetSize = 1 << 16;
|
|
InputCorpus(const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) {
|
|
memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature));
|
|
memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature));
|
|
}
|
|
~InputCorpus() {
|
|
for (auto II : Inputs)
|
|
delete II;
|
|
}
|
|
size_t size() const { return Inputs.size(); }
|
|
size_t SizeInBytes() const {
|
|
size_t Res = 0;
|
|
for (auto II : Inputs)
|
|
Res += II->U.size();
|
|
return Res;
|
|
}
|
|
size_t NumActiveUnits() const {
|
|
size_t Res = 0;
|
|
for (auto II : Inputs)
|
|
Res += !II->U.empty();
|
|
return Res;
|
|
}
|
|
bool empty() const { return Inputs.empty(); }
|
|
const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
|
|
void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile = false) {
|
|
assert(!U.empty());
|
|
uint8_t Hash[kSHA1NumBytes];
|
|
if (FeatureDebug)
|
|
Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures);
|
|
ComputeSHA1(U.data(), U.size(), Hash);
|
|
Hashes.insert(Sha1ToString(Hash));
|
|
Inputs.push_back(new InputInfo());
|
|
InputInfo &II = *Inputs.back();
|
|
II.U = U;
|
|
II.NumFeatures = NumFeatures;
|
|
II.MayDeleteFile = MayDeleteFile;
|
|
memcpy(II.Sha1, Hash, kSHA1NumBytes);
|
|
UpdateCorpusDistribution();
|
|
ValidateFeatureSet();
|
|
}
|
|
|
|
bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); }
|
|
bool HasUnit(const std::string &H) { return Hashes.count(H); }
|
|
InputInfo &ChooseUnitToMutate(Random &Rand) {
|
|
InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)];
|
|
assert(!II.U.empty());
|
|
return II;
|
|
};
|
|
|
|
// Returns an index of random unit from the corpus to mutate.
|
|
// Hypothesis: units added to the corpus last are more likely to be
|
|
// interesting. This function gives more weight to the more recent units.
|
|
size_t ChooseUnitIdxToMutate(Random &Rand) {
|
|
size_t Idx = static_cast<size_t>(CorpusDistribution(Rand.Get_mt19937()));
|
|
assert(Idx < Inputs.size());
|
|
return Idx;
|
|
}
|
|
|
|
void PrintStats() {
|
|
for (size_t i = 0; i < Inputs.size(); i++) {
|
|
const auto &II = *Inputs[i];
|
|
Printf(" [%zd %s]\tsz: %zd\truns: %zd\tsucc: %zd\n", i,
|
|
Sha1ToString(II.Sha1).c_str(), II.U.size(),
|
|
II.NumExecutedMutations, II.NumSuccessfullMutations);
|
|
}
|
|
}
|
|
|
|
void PrintFeatureSet() {
|
|
for (size_t i = 0; i < kFeatureSetSize; i++) {
|
|
if(size_t Sz = GetFeature(i))
|
|
Printf("[%zd: id %zd sz%zd] ", i, SmallestElementPerFeature[i], Sz);
|
|
}
|
|
Printf("\n\t");
|
|
for (size_t i = 0; i < Inputs.size(); i++)
|
|
if (size_t N = Inputs[i]->NumFeatures)
|
|
Printf(" %zd=>%zd ", i, N);
|
|
Printf("\n");
|
|
}
|
|
|
|
void DeleteInput(size_t Idx) {
|
|
InputInfo &II = *Inputs[Idx];
|
|
if (!OutputCorpus.empty() && II.MayDeleteFile)
|
|
DeleteFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1)));
|
|
Unit().swap(II.U);
|
|
if (FeatureDebug)
|
|
Printf("EVICTED %zd\n", Idx);
|
|
}
|
|
|
|
bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) {
|
|
assert(NewSize);
|
|
Idx = Idx % kFeatureSetSize;
|
|
uint32_t OldSize = GetFeature(Idx);
|
|
if (OldSize == 0 || (Shrink && OldSize > NewSize)) {
|
|
if (OldSize > 0) {
|
|
size_t OldIdx = SmallestElementPerFeature[Idx];
|
|
InputInfo &II = *Inputs[OldIdx];
|
|
assert(II.NumFeatures > 0);
|
|
II.NumFeatures--;
|
|
if (II.NumFeatures == 0)
|
|
DeleteInput(OldIdx);
|
|
}
|
|
if (FeatureDebug)
|
|
Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize);
|
|
SmallestElementPerFeature[Idx] = Inputs.size();
|
|
InputSizesPerFeature[Idx] = NewSize;
|
|
CountingFeatures = true;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
size_t NumFeatures() const {
|
|
size_t Res = 0;
|
|
for (size_t i = 0; i < kFeatureSetSize; i++)
|
|
Res += GetFeature(i) != 0;
|
|
return Res;
|
|
}
|
|
|
|
void ResetFeatureSet() {
|
|
assert(Inputs.empty());
|
|
memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature));
|
|
memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature));
|
|
}
|
|
|
|
private:
|
|
|
|
static const bool FeatureDebug = false;
|
|
|
|
size_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; }
|
|
|
|
void ValidateFeatureSet() {
|
|
if (!CountingFeatures) return;
|
|
if (FeatureDebug)
|
|
PrintFeatureSet();
|
|
for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++)
|
|
if (GetFeature(Idx))
|
|
Inputs[SmallestElementPerFeature[Idx]]->Tmp++;
|
|
for (auto II: Inputs) {
|
|
if (II->Tmp != II->NumFeatures)
|
|
Printf("ZZZ %zd %zd\n", II->Tmp, II->NumFeatures);
|
|
assert(II->Tmp == II->NumFeatures);
|
|
II->Tmp = 0;
|
|
}
|
|
}
|
|
|
|
// Updates the probability distribution for the units in the corpus.
|
|
// Must be called whenever the corpus or unit weights are changed.
|
|
void UpdateCorpusDistribution() {
|
|
size_t N = Inputs.size();
|
|
Intervals.resize(N + 1);
|
|
Weights.resize(N);
|
|
std::iota(Intervals.begin(), Intervals.end(), 0);
|
|
if (CountingFeatures)
|
|
for (size_t i = 0; i < N; i++)
|
|
Weights[i] = Inputs[i]->NumFeatures * (i + 1);
|
|
else
|
|
std::iota(Weights.begin(), Weights.end(), 1);
|
|
CorpusDistribution = std::piecewise_constant_distribution<double>(
|
|
Intervals.begin(), Intervals.end(), Weights.begin());
|
|
}
|
|
std::piecewise_constant_distribution<double> CorpusDistribution;
|
|
|
|
std::vector<double> Intervals;
|
|
std::vector<double> Weights;
|
|
|
|
std::unordered_set<std::string> Hashes;
|
|
std::vector<InputInfo*> Inputs;
|
|
|
|
bool CountingFeatures = false;
|
|
uint32_t InputSizesPerFeature[kFeatureSetSize];
|
|
uint32_t SmallestElementPerFeature[kFeatureSetSize];
|
|
|
|
std::string OutputCorpus;
|
|
};
|
|
|
|
} // namespace fuzzer
|
|
|
|
#endif // LLVM_FUZZER_CORPUS
|