Bug 644900 - Generate probes for stack expansion when allocating large frames on Windows (r=edwsmith)

Also extend lirasm with --stkskip option to exercise the fix.

--HG--
extra : convert_revision : 670889ebdbca23505e134a116155a2bb2ca41240
This commit is contained in:
William Maddox 2011-03-30 21:45:21 -07:00
parent 6d679dc92a
commit 4268a61cf9
7 changed files with 124 additions and 45 deletions

View File

@ -2233,7 +2233,8 @@ usageAndQuit(const string& progname)
" -v --verbose print LIR and assembly code\n"
" --execute execute LIR\n"
" --[no-]optimize enable or disable optimization of the LIR (default=off)\n"
" --random [N] generate a random LIR block of size N (default=1000)\n"
" --random [N] generate a random LIR block of size N (default=100)\n"
" --stkskip [N] push approximately N Kbytes of stack before execution (default=100)\n"
"\n"
"Build query options (these print a value for this build of lirasm and exit)\n"
" --show-arch show the architecture ('i386', 'X64', 'arm', 'ppc',\n"
@ -2265,10 +2266,33 @@ struct CmdLineOptions {
bool execute;
bool optimize;
int random;
int stkskip;
string filename;
Config config;
};
bool parseOptionalInt(int argc, char** argv, int* i, int* value, int defaultValue)
{
if (*i == argc - 1) {
*value = defaultValue; // no numeric argument, use default
} else {
char* endptr;
int res = strtol(argv[*i+1], &endptr, 10);
if ('\0' == *endptr) {
// We don't bother checking for overflow.
if (res <= 0) {
return false;
}
*value = res; // next arg is a number, use that for the value
(*i)++;
} else {
*value = defaultValue; // next arg is not a number
}
}
return true;
}
static void
processCmdLine(int argc, char **argv, CmdLineOptions& opts)
{
@ -2277,6 +2301,7 @@ processCmdLine(int argc, char **argv, CmdLineOptions& opts)
opts.execute = false;
opts.random = 0;
opts.optimize = false;
opts.stkskip = 0;
// Architecture-specific options.
#if defined NANOJIT_IA32
@ -2301,22 +2326,12 @@ processCmdLine(int argc, char **argv, CmdLineOptions& opts)
else if (arg == "--no-optimize")
opts.optimize = false;
else if (arg == "--random") {
const int defaultSize = 100;
if (i == argc - 1) {
opts.random = defaultSize; // no numeric argument, use default
} else {
char* endptr;
int res = strtol(argv[i+1], &endptr, 10);
if ('\0' == *endptr) {
// We don't bother checking for overflow.
if (res <= 0)
errMsgAndQuit(opts.progname, "--random argument must be greater than zero");
opts.random = res; // next arg is a number, use that for the size
i++;
} else {
opts.random = defaultSize; // next arg is not a number
}
}
if (!parseOptionalInt(argc, argv, &i, &opts.random, 100))
errMsgAndQuit(opts.progname, "--random argument must be greater than zero");
}
else if (arg == "--stkskip") {
if (!parseOptionalInt(argc, argv, &i, &opts.stkskip, 100))
errMsgAndQuit(opts.progname, "--stkskip argument must be greater than zero");
}
else if (arg == "--show-arch") {
const char* str =
@ -2415,6 +2430,45 @@ processCmdLine(int argc, char **argv, CmdLineOptions& opts)
#endif
}
int32_t* dummy;
void
executeFragment(const LirasmFragment& fragment, int skip)
{
// Allocate a large frame, and make sure we don't optimize it away.
int32_t space[512];
dummy = space;
if (skip > 0) {
executeFragment(fragment, skip-1);
} else {
switch (fragment.mReturnType) {
case RT_INT: {
int res = fragment.rint();
cout << "Output is: " << res << endl;
break;
}
#ifdef NANOJIT_64BIT
case RT_QUAD: {
int res = fragment.rquad();
cout << "Output is: " << res << endl;
break;
}
#endif
case RT_DOUBLE: {
double res = fragment.rdouble();
cout << "Output is: " << res << endl;
break;
}
case RT_GUARD: {
LasmSideExit *ls = (LasmSideExit*) fragment.rguard()->exit;
cout << "Exited block on line: " << ls->line << endl;
break;
}
}
}
}
int
main(int argc, char **argv)
{
@ -2436,30 +2490,7 @@ main(int argc, char **argv)
i = lasm.mFragments.find("main");
if (i == lasm.mFragments.end())
errMsgAndQuit(opts.progname, "error: at least one fragment must be named 'main'");
switch (i->second.mReturnType) {
case RT_INT: {
int res = i->second.rint();
cout << "Output is: " << res << endl;
break;
}
#ifdef NANOJIT_64BIT
case RT_QUAD: {
int res = i->second.rquad();
cout << "Output is: " << res << endl;
break;
}
#endif
case RT_DOUBLE: {
double res = i->second.rdouble();
cout << "Output is: " << res << endl;
break;
}
case RT_GUARD: {
LasmSideExit *ls = (LasmSideExit*) i->second.rguard()->exit;
cout << "Exited block on line: " << ls->line << endl;
break;
}
}
executeFragment(i->second, opts.stkskip);
} else {
for (i = lasm.mFragments.begin(); i != lasm.mFragments.end(); i++)
dump_srecords(cout, i->second.fragptr);

View File

@ -0,0 +1,12 @@
; Allocate frame larger than a Win32 page
; and write to the end of the frame first.
; This test is just an exerciser for the page
; probing code, as lirasm will not crash without it.
foo = allocp 8192
bar = allocp 4
k = immi 555
sti k bar 0
sti k foo 0
res = ldi bar 0
reti res

View File

@ -0,0 +1 @@
Output is: 555

View File

@ -169,7 +169,7 @@ namespace nanojit {
#define asm_output(...) do { \
if (_logc->lcbits & LC_Native) { \
outline[0]='\0'; \
VMPI_sprintf(outline, "%p ", (void*)_nIns); \
VMPI_sprintf(outline, "%p ", _nIns); \
if (_logc->lcbits & LC_Bytes) { \
appendHexVals(outline, (char*)_nIns, (char*)_nInsAfter); \
padTo(outline, 3*15); \

View File

@ -1956,6 +1956,24 @@ namespace nanojit
uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
uint32_t amt = aligned - stackPushed;
#ifdef _WIN64
// Windows uses a single guard page for extending the stack, so
// new stack pages must be first touched in stack-growth order.
// We touch each whole page that will be allocated to the frame
// (following the saved FP) to cause the OS to commit the page if
// necessary. Since we don't calculate page boundaries, but just
// probe at intervals of the pagesize, it is possible that the
// last page of the frame will be touched unnecessarily. Note that
// we must generate the probes in the reverse order of their execution.
// We require that the page size be a power of 2.
uint32_t pageSize = uint32_t(VMPI_getVMPageSize());
NanoAssert((pageSize & (pageSize-1)) == 0);
uint32_t pageRounded = amt & ~(pageSize-1);
for (int32_t d = pageRounded; d > 0; d -= pageSize) {
MOVLMI(RBP, -d, 0);
}
#endif
// Reserve stackNeeded bytes, padded
// to preserve NJ_ALIGN_STACK-byte alignment.
if (amt) {

View File

@ -910,10 +910,27 @@ namespace nanojit
uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
uint32_t amt = aligned - stackPushed;
#ifdef _WIN32
// Windows uses a single guard page for extending the stack, so
// new stack pages must be first touched in stack-growth order.
// We touch each whole page that will be allocated to the frame
// (following the saved FP) to cause the OS to commit the page if
// necessary. Since we don't calculate page boundaries, but just
// probe at intervals of the pagesize, it is possible that the
// last page of the frame will be touched unnecessarily. Note that
// we must generate the probes in the reverse order of their execution.
// We require that the page size be a power of 2.
size_t pageSize = VMPI_getVMPageSize();
NanoAssert((pageSize & (pageSize-1)) == 0);
size_t pageRounded = amt & ~(pageSize-1);
for (int32_t d = pageRounded; d > 0; d -= pageSize) {
STi(rEBP, -d, 0);
}
#endif
// Reserve stackNeeded bytes, padded
// to preserve NJ_ALIGN_STACK-byte alignment.
if (amt)
{
if (amt) {
SUBi(SP, amt);
}

View File

@ -93,7 +93,7 @@ static long glock = LOCK_IS_FREE;
#define Lock(lock) while (_InterlockedCompareExchange(lock, LOCK_IS_TAKEN, LOCK_IS_FREE) == LOCK_IS_TAKEN){};
#define Unlock(lock) _InterlockedCompareExchange(lock, LOCK_IS_FREE, LOCK_IS_TAKEN);
#if defined(WIN32)
#if defined(WIN32) && !defined(UNDER_CE)
static void vprof_printf(const char* format, ...)
{
va_list args;