mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-21 07:42:52 +00:00
[profile] Support online merging with continuous sync mode
Make it possible to use online profile merging ("%m" mode) with continuous sync ("%c" mode). To implement this, the merged profile is locked in the runtime initialization step and either a) filled out for the first time or b) checked for compatibility. Then, the profile can simply be mmap()'d with MAP_SHARED set. With the mmap() in place, counter updates from every process which uses an image are mapped onto the same set of physical pages assigned by the filesystem cache. After the mmap() is set up, the profile is unlocked. Differential Revision: https://reviews.llvm.org/D69586
This commit is contained in:
parent
cc1b0bc24d
commit
2492b5a125
@ -90,12 +90,11 @@ directory structure will be created. Additionally, the following special
|
||||
* "%c" expands out to nothing, but enables a mode in which profile counter
|
||||
updates are continuously synced to a file. This means that if the
|
||||
instrumented program crashes, or is killed by a signal, perfect coverage
|
||||
information can still be recovered. Continuous mode is not yet compatible with
|
||||
the "%Nm" merging mode described above, does not support value profiling for
|
||||
PGO, and is only supported on Darwin. Support for Linux may be mostly
|
||||
complete but requires testing, and support for Fuchsia/Windows may require
|
||||
more extensive changes: please get involved if you are interested in porting
|
||||
this feature.
|
||||
information can still be recovered. Continuous mode does not support value
|
||||
profiling for PGO, and is only supported on Darwin at the moment. Support for
|
||||
Linux may be mostly complete but requires testing, and support for
|
||||
Fuchsia/Windows may require more extensive changes: please get involved if
|
||||
you are interested in porting this feature.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
|
@ -10,11 +10,7 @@
|
||||
#include "InstrProfilingInternal.h"
|
||||
#include "InstrProfilingPort.h"
|
||||
|
||||
/* When continuous mode is enabled (%c), this parameter is set to 1. This is
|
||||
* incompatible with the in-process merging mode. Lifting this restriction
|
||||
* may be complicated, as merging mode requires a lock on the profile, and
|
||||
* mmap() mode would require that lock to be held for the entire process
|
||||
* lifetime.
|
||||
/* When continuous mode is enabled (%c), this parameter is set to 1.
|
||||
*
|
||||
* This parameter is defined here in InstrProfilingBuffer.o, instead of in
|
||||
* InstrProfilingFile.o, to sequester all libc-dependent code in
|
||||
|
@ -380,11 +380,6 @@ static void truncateCurrentFile(void) {
|
||||
if (!Filename)
|
||||
return;
|
||||
|
||||
/* By pass file truncation to allow online raw profile
|
||||
* merging. */
|
||||
if (lprofCurFilename.MergePoolSize)
|
||||
return;
|
||||
|
||||
/* Only create the profile directory and truncate an existing profile once.
|
||||
* In continuous mode, this is necessary, as the profile is written-to by the
|
||||
* runtime initializer. */
|
||||
@ -397,8 +392,14 @@ static void truncateCurrentFile(void) {
|
||||
setenv(LPROF_INIT_ONCE_ENV, LPROF_INIT_ONCE_ENV, 1);
|
||||
#endif
|
||||
|
||||
/* Create the profile dir (even if online merging is enabled), so that
|
||||
* the profile file can be set up if continuous mode is enabled. */
|
||||
createProfileDir(Filename);
|
||||
|
||||
/* By pass file truncation to allow online raw profile merging. */
|
||||
if (lprofCurFilename.MergePoolSize)
|
||||
return;
|
||||
|
||||
/* Truncate the file. Later we'll reopen and append. */
|
||||
File = fopen(Filename, "w");
|
||||
if (!File)
|
||||
@ -406,6 +407,33 @@ static void truncateCurrentFile(void) {
|
||||
fclose(File);
|
||||
}
|
||||
|
||||
#ifndef _MSC_VER
|
||||
static void assertIsZero(int *i) {
|
||||
if (*i)
|
||||
PROF_WARN("Expected flag to be 0, but got: %d\n", *i);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Write a partial profile to \p Filename, which is required to be backed by
|
||||
* the open file object \p File. */
|
||||
static int writeProfileWithFileObject(const char *Filename, FILE *File) {
|
||||
setProfileFile(File);
|
||||
int rc = writeFile(Filename);
|
||||
if (rc)
|
||||
PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno));
|
||||
setProfileFile(NULL);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Unlock the profile \p File and clear the unlock flag. */
|
||||
static void unlockProfile(int *ProfileRequiresUnlock, FILE *File) {
|
||||
if (!*ProfileRequiresUnlock) {
|
||||
PROF_WARN("%s", "Expected to require profile unlock\n");
|
||||
}
|
||||
lprofUnlockFileHandle(File);
|
||||
*ProfileRequiresUnlock = 0;
|
||||
}
|
||||
|
||||
static void initializeProfileForContinuousMode(void) {
|
||||
#if defined(__Fuchsia__) || defined(_WIN32)
|
||||
PROF_ERR("%s\n", "Continuous mode not yet supported on Fuchsia or Windows.");
|
||||
@ -438,28 +466,72 @@ static void initializeProfileForContinuousMode(void) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Open the raw profile in append mode. */
|
||||
int Length = getCurFilenameLength();
|
||||
char *FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
|
||||
const char *Filename = getCurFilename(FilenameBuf, 0);
|
||||
if (!Filename)
|
||||
return;
|
||||
FILE *File = fopen(Filename, "a+b");
|
||||
if (!File)
|
||||
return;
|
||||
|
||||
FILE *File = NULL;
|
||||
off_t CurrentFileOffset = 0;
|
||||
off_t OffsetModPage = 0;
|
||||
|
||||
/* Whether an exclusive lock on the profile must be dropped after init.
|
||||
* Use a cleanup to warn if the unlock does not occur. */
|
||||
COMPILER_RT_CLEANUP(assertIsZero) int ProfileRequiresUnlock = 0;
|
||||
|
||||
if (!doMerging()) {
|
||||
/* We are not merging profiles, so open the raw profile in append mode. */
|
||||
File = fopen(Filename, "a+b");
|
||||
if (!File)
|
||||
return;
|
||||
|
||||
/* Check that the offset within the file is page-aligned. */
|
||||
CurrentFileOffset = ftello(File);
|
||||
OffsetModPage = CurrentFileOffset % PageSize;
|
||||
if (OffsetModPage != 0) {
|
||||
PROF_ERR("Continuous counter sync mode is enabled, but raw profile is not"
|
||||
"page-aligned. CurrentFileOffset = %" PRIu64 ", pagesz = %u.\n",
|
||||
(uint64_t)CurrentFileOffset, PageSize);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Grow the profile so that mmap() can succeed. Leak the file handle, as
|
||||
* the file should stay open. */
|
||||
if (writeProfileWithFileObject(Filename, File) != 0)
|
||||
return;
|
||||
} else {
|
||||
/* We are merging profiles. Map the counter section as shared memory into
|
||||
* the profile, i.e. into each participating process. An increment in one
|
||||
* process should be visible to every other process with the same counter
|
||||
* section mapped. */
|
||||
File = lprofOpenFileEx(Filename);
|
||||
if (!File)
|
||||
return;
|
||||
|
||||
ProfileRequiresUnlock = 1;
|
||||
|
||||
uint64_t ProfileFileSize;
|
||||
if (getProfileFileSizeForMerging(File, &ProfileFileSize) == -1)
|
||||
return unlockProfile(&ProfileRequiresUnlock, File);
|
||||
|
||||
if (ProfileFileSize == 0) {
|
||||
/* Grow the profile so that mmap() can succeed. Leak the file handle, as
|
||||
* the file should stay open. */
|
||||
if (writeProfileWithFileObject(Filename, File) != 0)
|
||||
return unlockProfile(&ProfileRequiresUnlock, File);
|
||||
} else {
|
||||
/* The merged profile has a non-zero length. Check that it is compatible
|
||||
* with the data in this process. */
|
||||
char *ProfileBuffer;
|
||||
if (mmapProfileForMerging(File, ProfileFileSize, &ProfileBuffer) == -1 ||
|
||||
munmap(ProfileBuffer, ProfileFileSize) == -1)
|
||||
return unlockProfile(&ProfileRequiresUnlock, File);
|
||||
}
|
||||
}
|
||||
|
||||
int Fileno = fileno(File);
|
||||
|
||||
/* Check that the offset within the file is page-aligned. */
|
||||
off_t CurrentFileOffset = ftello(File);
|
||||
off_t OffsetModPage = CurrentFileOffset % PageSize;
|
||||
if (OffsetModPage != 0) {
|
||||
PROF_ERR("Continuous counter sync mode is enabled, but raw profile is not"
|
||||
"page-aligned. CurrentFileOffset = %" PRIu64 ", pagesz = %u.\n",
|
||||
(uint64_t) CurrentFileOffset, PageSize);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Determine how much padding is needed before/after the counters and after
|
||||
* the names. */
|
||||
uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
|
||||
@ -474,14 +546,6 @@ static void initializeProfileForContinuousMode(void) {
|
||||
CurrentFileOffset + sizeof(__llvm_profile_header) +
|
||||
(DataSize * sizeof(__llvm_profile_data)) + PaddingBytesBeforeCounters;
|
||||
|
||||
/* Write the partial profile. This grows the file to a point where the mmap()
|
||||
* can succeed. Leak the file handle, as the file should stay open. */
|
||||
setProfileFile(File);
|
||||
int rc = writeFile(Filename);
|
||||
if (rc)
|
||||
PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno));
|
||||
setProfileFile(NULL);
|
||||
|
||||
uint64_t *CounterMmap = (uint64_t *)mmap(
|
||||
(void *)CountersBegin, PageAlignedCountersLength, PROT_READ | PROT_WRITE,
|
||||
MAP_FIXED | MAP_SHARED, Fileno, FileOffsetToCounters);
|
||||
@ -494,8 +558,9 @@ static void initializeProfileForContinuousMode(void) {
|
||||
" - FileOffsetToCounters: %" PRIu64 "\n",
|
||||
strerror(errno), CountersBegin, PageAlignedCountersLength, Fileno,
|
||||
FileOffsetToCounters);
|
||||
return;
|
||||
}
|
||||
|
||||
unlockProfile(&ProfileRequiresUnlock, File);
|
||||
#endif // defined(__Fuchsia__) || defined(_WIN32)
|
||||
}
|
||||
|
||||
@ -568,12 +633,6 @@ static int parseFilenamePattern(const char *FilenamePat,
|
||||
FilenamePat);
|
||||
return -1;
|
||||
}
|
||||
if (MergingEnabled) {
|
||||
PROF_WARN("%%c specifier can not be used with profile merging (%%m) "
|
||||
"in %s.\n",
|
||||
FilenamePat);
|
||||
return -1;
|
||||
}
|
||||
|
||||
__llvm_profile_enable_continuous_mode();
|
||||
I++; /* advance to 'c' */
|
||||
@ -583,12 +642,6 @@ static int parseFilenamePattern(const char *FilenamePat,
|
||||
FilenamePat);
|
||||
return -1;
|
||||
}
|
||||
if (__llvm_profile_is_continuous_mode_enabled()) {
|
||||
PROF_WARN("%%c specifier can not be used with profile merging (%%m) "
|
||||
"in %s.\n",
|
||||
FilenamePat);
|
||||
return -1;
|
||||
}
|
||||
MergingEnabled = 1;
|
||||
if (FilenamePat[I] == 'm')
|
||||
lprofCurFilename.MergePoolSize = 1;
|
||||
|
@ -22,6 +22,7 @@
|
||||
/* Need to include <stdio.h> and <io.h> */
|
||||
#define COMPILER_RT_FTRUNCATE(f,l) _chsize(_fileno(f),l)
|
||||
#define COMPILER_RT_ALWAYS_INLINE __forceinline
|
||||
#define COMPILER_RT_CLEANUP(x)
|
||||
#elif __GNUC__
|
||||
#define COMPILER_RT_ALIGNAS(x) __attribute__((aligned(x)))
|
||||
#define COMPILER_RT_VISIBILITY __attribute__((visibility("hidden")))
|
||||
@ -29,6 +30,7 @@
|
||||
#define COMPILER_RT_ALLOCA __builtin_alloca
|
||||
#define COMPILER_RT_FTRUNCATE(f,l) ftruncate(fileno(f),l)
|
||||
#define COMPILER_RT_ALWAYS_INLINE inline __attribute((always_inline))
|
||||
#define COMPILER_RT_CLEANUP(x) __attribute__((cleanup(x)))
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
|
138
compiler-rt/test/profile/ContinuousSyncMode/online-merging.c
Normal file
138
compiler-rt/test/profile/ContinuousSyncMode/online-merging.c
Normal file
@ -0,0 +1,138 @@
|
||||
// Test the online merging mode (%m) along with continuous mode (%c).
|
||||
//
|
||||
// Create & cd into a temporary directory.
|
||||
// RUN: rm -rf %t.dir && mkdir -p %t.dir && cd %t.dir
|
||||
//
|
||||
// Create two DSOs and a driver program that uses them.
|
||||
// RUN: echo "void dso1(void) {}" > dso1.c
|
||||
// RUN: echo "void dso2(void) {}" > dso2.c
|
||||
// RUN: %clang_pgogen -dynamiclib -o %t.dir/dso1.dylib dso1.c
|
||||
// RUN: %clang_pgogen -dynamiclib -o %t.dir/dso2.dylib dso2.c
|
||||
// RUN: %clang_pgogen -o main.exe %s %t.dir/dso1.dylib %t.dir/dso2.dylib
|
||||
//
|
||||
// === Round 1 ===
|
||||
// Test merging+continuous mode without any file contention.
|
||||
//
|
||||
// RUN: env LLVM_PROFILE_FILE="%t.dir/profdir/%m%c.profraw" %run %t.dir/main.exe nospawn
|
||||
// RUN: llvm-profdata merge -o %t.profdata %t.dir/profdir
|
||||
// RUN: llvm-profdata show --counts --all-functions %t.profdata | FileCheck %s -check-prefix=ROUND1
|
||||
|
||||
// ROUND1-LABEL: Counters:
|
||||
// ROUND1-DAG: dso1:
|
||||
// ROUND1-DAG: Hash: 0x{{.*}}
|
||||
// ROUND1-DAG: Counters: 1
|
||||
// ROUND1-DAG: Block counts: [1]
|
||||
// ROUND1-DAG: dso2:
|
||||
// ROUND1-DAG: Hash: 0x{{.*}}
|
||||
// ROUND1-DAG: Counters: 1
|
||||
// ROUND1-DAG: Block counts: [1]
|
||||
// ROUND1-DAG: main:
|
||||
// ROUND1-DAG: Hash: 0x{{.*}}
|
||||
// ROUND1-LABEL: Instrumentation level: IR
|
||||
// ROUND1-NEXT: Functions shown: 3
|
||||
// ROUND1-NEXT: Total functions: 3
|
||||
// ROUND1-NEXT: Maximum function count: 1
|
||||
// ROUND1-NEXT: Maximum internal block count: 1
|
||||
//
|
||||
// === Round 2 ===
|
||||
// Test merging+continuous mode with some file contention.
|
||||
//
|
||||
// RUN: env LLVM_PROFILE_FILE="%t.dir/profdir/%m%c.profraw" %run %t.dir/main.exe spawn 'LLVM_PROFILE_FILE=%t.dir/profdir/%m%c.profraw'
|
||||
// RUN: llvm-profdata merge -o %t.profdata %t.dir/profdir
|
||||
// RUN: llvm-profdata show --counts --all-functions %t.profdata | FileCheck %s -check-prefix=ROUND2
|
||||
|
||||
// ROUND2-LABEL: Counters:
|
||||
// ROUND2-DAG: dso1:
|
||||
// ROUND2-DAG: Hash: 0x{{.*}}
|
||||
// ROUND2-DAG: Counters: 1
|
||||
// ROUND2-DAG: Block counts: [97]
|
||||
// ROUND2-DAG: dso2:
|
||||
// ROUND2-DAG: Hash: 0x{{.*}}
|
||||
// ROUND2-DAG: Counters: 1
|
||||
// ROUND2-DAG: Block counts: [97]
|
||||
// ROUND2-DAG: main:
|
||||
// ROUND2-DAG: Hash: 0x{{.*}}
|
||||
// ROUND2-LABEL: Instrumentation level: IR
|
||||
// ROUND2-NEXT: Functions shown: 3
|
||||
// ROUND2-NEXT: Total functions: 3
|
||||
// ROUND2-NEXT: Maximum function count: 97
|
||||
// ROUND2-NEXT: Maximum internal block count: 33
|
||||
|
||||
#include <spawn.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/errno.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
const int num_child_procs_to_spawn = 32;
|
||||
|
||||
extern int __llvm_profile_is_continuous_mode_enabled(void);
|
||||
extern char *__llvm_profile_get_filename(void);
|
||||
|
||||
void dso1(void);
|
||||
void dso2(void);
|
||||
|
||||
// Change to "#define" for debug output.
|
||||
#undef DEBUG_TEST
|
||||
|
||||
#ifdef DEBUG_TEST
|
||||
#define DEBUG(...) fprintf(stderr, __VA_ARGS__);
|
||||
#else
|
||||
#define DEBUG(...)
|
||||
#endif
|
||||
|
||||
int main(int argc, char *const argv[]) {
|
||||
if (strcmp(argv[1], "nospawn") == 0) {
|
||||
DEBUG("Hello from child (pid = %d, cont-mode-enabled = %d, profile = %s).\n",
|
||||
getpid(), __llvm_profile_is_continuous_mode_enabled(), __llvm_profile_get_filename());
|
||||
|
||||
dso1();
|
||||
dso2();
|
||||
return 0;
|
||||
} else if (strcmp(argv[1], "spawn") == 0) {
|
||||
// This is the start of Round 2.
|
||||
// Expect Counts[dsoX] = 1, as this was the state at the end of Round 1.
|
||||
|
||||
int I;
|
||||
pid_t child_pids[num_child_procs_to_spawn];
|
||||
char *const child_argv[] = {argv[0], "nospawn", NULL};
|
||||
char *const child_envp[] = {argv[2], NULL};
|
||||
for (I = 0; I < num_child_procs_to_spawn; ++I) {
|
||||
dso1(); // Counts[dsoX] += 2 * num_child_procs_to_spawn
|
||||
dso2();
|
||||
|
||||
DEBUG("Spawning child with argv = {%s, %s, NULL} and envp = {%s, NULL}\n",
|
||||
child_argv[0], child_argv[1], child_envp[0]);
|
||||
|
||||
int ret = posix_spawn(&child_pids[I], argv[0], NULL, NULL, child_argv,
|
||||
child_envp);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "Child %d could not be spawned: ret = %d, msg = %s\n",
|
||||
I, ret, strerror(ret));
|
||||
return 1;
|
||||
}
|
||||
|
||||
DEBUG("Spawned child %d (pid = %d).\n", I, child_pids[I]);
|
||||
}
|
||||
for (I = 0; I < num_child_procs_to_spawn; ++I) {
|
||||
dso1(); // Counts[dsoX] += num_child_procs_to_spawn
|
||||
dso2();
|
||||
|
||||
int status;
|
||||
waitpid(child_pids[I], &status, 0);
|
||||
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
|
||||
fprintf(stderr, "Child %d did not exit with code 0.\n", I);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// At the end of Round 2, we have:
|
||||
// Counts[dsoX] = 1 + (2 * num_child_procs_to_spawn) + num_child_procs_to_spawn
|
||||
// = 97
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user