[clang][deps] Separate filesystem caches for minimized and original files

This patch separates the local and global caches of `DependencyScanningFilesystem` into two buckets: minimized files and original files. This is necessary to deal with precompiled modules/headers.

Consider a single worker with its instance of filesystem:
1. Build system uses the worker to scan dependencies of module A => filesystem cache gets populated with minimized input files.
2. Build system uses the results to explicitly build module A => explicitly built module captures the state of the real filesystem (containing non-minimized input files).
3. Build system uses the prebuilt module A as an explicit precompiled dependency for another compile job B.
4. Build system uses the same worker to scan dependencies for job B => worker uses implicit modular build to discover dependencies, which validates the filesystem state embedded in the prebuilt module (non-minimized files) to the current view of the filesystem (minimized files), resulting in validation failures.

This problem can be avoided in step 4 by collecting input files from the precompiled module and marking them as "ignored" in the minimizing filesystem. This way, the validation should succeed, since we should be always dealing with the original (non-minized) input files. However, the filesystem already minimized the input files in step 1 and put it in the cache, which gets used in step 4 as well even though it's marked ignored (do not minimize). This patch essentially fixes this oversight by making the `"file is minimized"` part of the cache key (from high level).

Depends on D106064.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D106146
This commit is contained in:
Jan Svoboda 2021-07-20 11:42:34 +02:00
parent 9dc2636623
commit bc1a2979fc
4 changed files with 101 additions and 36 deletions

View File

@ -103,7 +103,8 @@ private:
};
/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
/// underlying real file system.
/// underlying real file system. It distinguishes between minimized and original
/// files.
///
/// It is sharded based on the hash of the key to reduce the lock contention for
/// the worker threads.
@ -114,21 +115,62 @@ public:
CachedFileSystemEntry Value;
};
DependencyScanningFilesystemSharedCache();
/// Returns a cache entry for the corresponding key.
///
/// A new cache entry is created if the key is not in the cache. This is a
/// thread safe call.
SharedFileSystemEntry &get(StringRef Key);
SharedFileSystemEntry &get(StringRef Key, bool Minimized);
private:
struct CacheShard {
std::mutex CacheLock;
llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
class SingleCache {
public:
SingleCache();
SharedFileSystemEntry &get(StringRef Key);
private:
struct CacheShard {
std::mutex CacheLock;
llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
};
std::unique_ptr<CacheShard[]> CacheShards;
unsigned NumShards;
};
std::unique_ptr<CacheShard[]> CacheShards;
unsigned NumShards;
SingleCache CacheMinimized;
SingleCache CacheOriginal;
};
/// This class is a local cache, that caches the 'stat' and 'open' calls to the
/// underlying real file system. It distinguishes between minimized and original
/// files.
class DependencyScanningFilesystemLocalCache {
private:
using SingleCache =
llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>;
SingleCache CacheMinimized;
SingleCache CacheOriginal;
SingleCache &selectCache(bool Minimized) {
return Minimized ? CacheMinimized : CacheOriginal;
}
public:
void setCachedEntry(StringRef Filename, bool Minimized,
const CachedFileSystemEntry *Entry) {
SingleCache &Cache = selectCache(Minimized);
bool IsInserted = Cache.try_emplace(Filename, Entry).second;
(void)IsInserted;
assert(IsInserted && "local cache is updated more than once");
}
const CachedFileSystemEntry *getCachedEntry(StringRef Filename,
bool Minimized) {
SingleCache &Cache = selectCache(Minimized);
auto It = Cache.find(Filename);
return It == Cache.end() ? nullptr : It->getValue();
}
};
/// A virtual file system optimized for the dependency discovery.
@ -159,24 +201,14 @@ public:
private:
bool shouldIgnoreFile(StringRef Filename);
void setCachedEntry(StringRef Filename, const CachedFileSystemEntry *Entry) {
bool IsInserted = Cache.try_emplace(Filename, Entry).second;
(void)IsInserted;
assert(IsInserted && "local cache is updated more than once");
}
const CachedFileSystemEntry *getCachedEntry(StringRef Filename) {
auto It = Cache.find(Filename);
return It == Cache.end() ? nullptr : It->getValue();
}
llvm::ErrorOr<const CachedFileSystemEntry *>
getOrCreateFileSystemEntry(const StringRef Filename);
/// The global cache shared between worker threads.
DependencyScanningFilesystemSharedCache &SharedCache;
/// The local cache is used by the worker thread to cache file system queries
/// locally instead of querying the global cache every time.
llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
DependencyScanningFilesystemLocalCache Cache;
/// The optional mapping structure which records information about the
/// excluded conditional directive skip mappings that are used by the
/// currently active preprocessor.

View File

@ -99,8 +99,7 @@ CachedFileSystemEntry::createDirectoryEntry(llvm::vfs::Status &&Stat) {
return Result;
}
DependencyScanningFilesystemSharedCache::
DependencyScanningFilesystemSharedCache() {
DependencyScanningFilesystemSharedCache::SingleCache::SingleCache() {
// This heuristic was chosen using a empirical testing on a
// reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
// sharding gives a performance edge by reducing the lock contention.
@ -111,18 +110,20 @@ DependencyScanningFilesystemSharedCache::
CacheShards = std::make_unique<CacheShard[]>(NumShards);
}
/// Returns a cache entry for the corresponding key.
///
/// A new cache entry is created if the key is not in the cache. This is a
/// thread safe call.
DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
DependencyScanningFilesystemSharedCache::get(StringRef Key) {
DependencyScanningFilesystemSharedCache::SingleCache::get(StringRef Key) {
CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
std::unique_lock<std::mutex> LockGuard(Shard.CacheLock);
auto It = Shard.Cache.try_emplace(Key);
return It.first->getValue();
}
DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
DependencyScanningFilesystemSharedCache::get(StringRef Key, bool Minimized) {
SingleCache &Cache = Minimized ? CacheMinimized : CacheOriginal;
return Cache.get(Key);
}
/// Whitelist file extensions that should be minimized, treating no extension as
/// a source file that should be minimized.
///
@ -165,17 +166,17 @@ bool DependencyScanningWorkerFilesystem::shouldIgnoreFile(
llvm::ErrorOr<const CachedFileSystemEntry *>
DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
const StringRef Filename) {
if (const CachedFileSystemEntry *Entry = getCachedEntry(Filename)) {
bool ShouldMinimize =
!IgnoredFiles.count(Filename) && shouldMinimize(Filename);
if (const auto *Entry = Cache.getCachedEntry(Filename, ShouldMinimize))
return Entry;
}
// FIXME: Handle PCM/PCH files.
// FIXME: Handle module map files.
bool KeepOriginalSource = shouldIgnoreFile(Filename) ||
!shouldMinimize(Filename);
DependencyScanningFilesystemSharedCache::SharedFileSystemEntry
&SharedCacheEntry = SharedCache.get(Filename);
&SharedCacheEntry = SharedCache.get(Filename, ShouldMinimize);
const CachedFileSystemEntry *Result;
{
std::unique_lock<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
@ -197,15 +198,15 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
CacheEntry = CachedFileSystemEntry::createDirectoryEntry(
std::move(*MaybeStatus));
else
CacheEntry = CachedFileSystemEntry::createFileEntry(
Filename, FS, !KeepOriginalSource);
CacheEntry = CachedFileSystemEntry::createFileEntry(Filename, FS,
ShouldMinimize);
}
Result = &CacheEntry;
}
// Store the result in the local cache.
setCachedEntry(Filename, Result);
Cache.setCachedEntry(Filename, ShouldMinimize, Result);
return Result;
}

View File

@ -68,6 +68,7 @@ clang_target_link_libraries(ToolingTests
clangAST
clangASTMatchers
clangBasic
clangDependencyScanning
clangFormat
clangFrontend
clangLex

View File

@ -15,6 +15,7 @@
#include "clang/Frontend/FrontendActions.h"
#include "clang/Tooling/CompilationDatabase.h"
#include "clang/Tooling/Tooling.h"
#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Path.h"
@ -203,5 +204,35 @@ TEST(DependencyScanner, ScanDepsReuseFilemanagerHasInclude) {
EXPECT_EQ(convert_to_slash(Deps[5]), "/root/symlink.h");
}
namespace dependencies {
TEST(DependencyScanningFilesystem, IgnoredFilesHaveSeparateCache) {
auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
VFS->addFile("/mod.h", 0, llvm::MemoryBuffer::getMemBuffer("// hi there!\n"));
DependencyScanningFilesystemSharedCache SharedCache;
auto Mappings = std::make_unique<ExcludedPreprocessorDirectiveSkipMapping>();
DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings.get());
auto StatusMinimized0 = DepFS.status("/mod.h");
DepFS.ignoreFile("/mod.h");
auto StatusFull1 = DepFS.status("/mod.h");
DepFS.clearIgnoredFiles();
auto StatusMinimized2 = DepFS.status("/mod.h");
DepFS.ignoreFile("/mod.h");
auto StatusFull3 = DepFS.status("/mod.h");
EXPECT_TRUE(StatusMinimized0);
EXPECT_EQ(StatusMinimized0->getSize(), 0u);
EXPECT_TRUE(StatusFull1);
EXPECT_EQ(StatusFull1->getSize(), 13u);
EXPECT_TRUE(StatusMinimized2);
EXPECT_EQ(StatusMinimized2->getSize(), 0u);
EXPECT_TRUE(StatusFull3);
EXPECT_EQ(StatusFull3->getSize(), 13u);
}
} // end namespace dependencies
} // end namespace tooling
} // end namespace clang