gecko-dev/build/automation.py.in
Andrew Halberstadt f94d7e71d8 Bug 1034290 - Use structured log output for test results in reftest, r=jmaher
Structured logs bring many benefits. We can stop parsing the logs for magic strings, we
can modify the format without breaking things, and we can stream results into systems like
ActiveData. The structured logs originate primarily in reftest.js. StructuredLog.jsm is
used to generate the JSON-based log stream. Finally OutputHandler in the python harness
reads structured output from stdout, and formats it into human readable form.

MozReview-Commit-ID: G3ZLkMRl6p7

--HG--
extra : commitid : J3ui9XYWR3Q
extra : rebase_source : 77ed0ba842cc8e557141fb3494212b06868c728a
extra : amend_source : 735d48225a2e627e0fe45fc11b50b6c49a885a4b
extra : source : d1779fe421c3c7cd8e3d191816776390dc104f37
2016-02-05 15:44:20 -05:00

608 lines
22 KiB
Python

#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import with_statement
import logging
import os
import re
import select
import signal
import subprocess
import sys
import tempfile
from datetime import datetime, timedelta
SCRIPT_DIR = os.path.abspath(os.path.realpath(os.path.dirname(sys.argv[0])))
sys.path.insert(0, SCRIPT_DIR)
# --------------------------------------------------------------
# TODO: this is a hack for mozbase without virtualenv, remove with bug 849900
# These paths refer to relative locations to test.zip, not the OBJDIR or SRCDIR
here = os.path.dirname(os.path.realpath(__file__))
mozbase = os.path.realpath(os.path.join(os.path.dirname(here), 'mozbase'))
if os.path.isdir(mozbase):
for package in os.listdir(mozbase):
package_path = os.path.join(mozbase, package)
if package_path not in sys.path:
sys.path.append(package_path)
import mozcrash
from mozscreenshot import printstatus, dump_screen
# ---------------------------------------------------------------
_DEFAULT_PREFERENCE_FILE = os.path.join(SCRIPT_DIR, 'prefs_general.js')
_DEFAULT_APPS_FILE = os.path.join(SCRIPT_DIR, 'webapps_mochitest.json')
_DEFAULT_WEB_SERVER = "127.0.0.1"
_DEFAULT_HTTP_PORT = 8888
_DEFAULT_SSL_PORT = 4443
_DEFAULT_WEBSOCKET_PORT = 9988
# from nsIPrincipal.idl
_APP_STATUS_NOT_INSTALLED = 0
_APP_STATUS_INSTALLED = 1
_APP_STATUS_PRIVILEGED = 2
_APP_STATUS_CERTIFIED = 3
#expand _DIST_BIN = __XPC_BIN_PATH__
#expand _IS_WIN32 = len("__WIN32__") != 0
#expand _IS_MAC = __IS_MAC__ != 0
#expand _IS_LINUX = __IS_LINUX__ != 0
#ifdef IS_CYGWIN
#expand _IS_CYGWIN = __IS_CYGWIN__ == 1
#else
_IS_CYGWIN = False
#endif
#expand _BIN_SUFFIX = __BIN_SUFFIX__
#expand _DEFAULT_APP = "./" + __BROWSER_PATH__
#expand _CERTS_SRC_DIR = __CERTS_SRC_DIR__
#expand _IS_TEST_BUILD = __IS_TEST_BUILD__
#expand _IS_DEBUG_BUILD = __IS_DEBUG_BUILD__
#expand _CRASHREPORTER = __CRASHREPORTER__ == 1
#expand _IS_ASAN = __IS_ASAN__ == 1
if _IS_WIN32:
import ctypes, ctypes.wintypes, time, msvcrt
else:
import errno
def resetGlobalLog(log):
while _log.handlers:
_log.removeHandler(_log.handlers[0])
handler = logging.StreamHandler(log)
_log.setLevel(logging.INFO)
_log.addHandler(handler)
# We use the logging system here primarily because it'll handle multiple
# threads, which is needed to process the output of the server and application
# processes simultaneously.
_log = logging.getLogger()
resetGlobalLog(sys.stdout)
#################
# PROFILE SETUP #
#################
class Automation(object):
"""
Runs the browser from a script, and provides useful utilities
for setting up the browser environment.
"""
DIST_BIN = _DIST_BIN
IS_WIN32 = _IS_WIN32
IS_MAC = _IS_MAC
IS_LINUX = _IS_LINUX
IS_CYGWIN = _IS_CYGWIN
BIN_SUFFIX = _BIN_SUFFIX
UNIXISH = not IS_WIN32 and not IS_MAC
DEFAULT_APP = _DEFAULT_APP
CERTS_SRC_DIR = _CERTS_SRC_DIR
IS_TEST_BUILD = _IS_TEST_BUILD
IS_DEBUG_BUILD = _IS_DEBUG_BUILD
CRASHREPORTER = _CRASHREPORTER
IS_ASAN = _IS_ASAN
# timeout, in seconds
DEFAULT_TIMEOUT = 60.0
DEFAULT_WEB_SERVER = _DEFAULT_WEB_SERVER
DEFAULT_HTTP_PORT = _DEFAULT_HTTP_PORT
DEFAULT_SSL_PORT = _DEFAULT_SSL_PORT
DEFAULT_WEBSOCKET_PORT = _DEFAULT_WEBSOCKET_PORT
def __init__(self):
self.log = _log
self.lastTestSeen = "automation.py"
self.haveDumpedScreen = False
def setServerInfo(self,
webServer = _DEFAULT_WEB_SERVER,
httpPort = _DEFAULT_HTTP_PORT,
sslPort = _DEFAULT_SSL_PORT,
webSocketPort = _DEFAULT_WEBSOCKET_PORT):
self.webServer = webServer
self.httpPort = httpPort
self.sslPort = sslPort
self.webSocketPort = webSocketPort
@property
def __all__(self):
return [
"UNIXISH",
"IS_WIN32",
"IS_MAC",
"log",
"runApp",
"Process",
"DIST_BIN",
"DEFAULT_APP",
"CERTS_SRC_DIR",
"environment",
"IS_TEST_BUILD",
"IS_DEBUG_BUILD",
"DEFAULT_TIMEOUT",
]
class Process(subprocess.Popen):
"""
Represents our view of a subprocess.
It adds a kill() method which allows it to be stopped explicitly.
"""
def __init__(self,
args,
bufsize=0,
executable=None,
stdin=None,
stdout=None,
stderr=None,
preexec_fn=None,
close_fds=False,
shell=False,
cwd=None,
env=None,
universal_newlines=False,
startupinfo=None,
creationflags=0):
_log.info("INFO | automation.py | Launching: %s", subprocess.list2cmdline(args))
subprocess.Popen.__init__(self, args, bufsize, executable,
stdin, stdout, stderr,
preexec_fn, close_fds,
shell, cwd, env,
universal_newlines, startupinfo, creationflags)
self.log = _log
def kill(self):
if Automation().IS_WIN32:
import platform
pid = "%i" % self.pid
if platform.release() == "2000":
# Windows 2000 needs 'kill.exe' from the
#'Windows 2000 Resource Kit tools'. (See bug 475455.)
try:
subprocess.Popen(["kill", "-f", pid]).wait()
except:
self.log.info("TEST-UNEXPECTED-FAIL | automation.py | Missing 'kill' utility to kill process with pid=%s. Kill it manually!", pid)
else:
# Windows XP and later.
subprocess.Popen(["taskkill", "/F", "/PID", pid]).wait()
else:
os.kill(self.pid, signal.SIGKILL)
def environment(self, env=None, xrePath=None, crashreporter=True, debugger=False, dmdPath=None, lsanPath=None):
if xrePath == None:
xrePath = self.DIST_BIN
if env == None:
env = dict(os.environ)
ldLibraryPath = os.path.abspath(os.path.join(SCRIPT_DIR, xrePath))
dmdLibrary = None
preloadEnvVar = None
if self.UNIXISH or self.IS_MAC:
envVar = "LD_LIBRARY_PATH"
preloadEnvVar = "LD_PRELOAD"
if self.IS_MAC:
envVar = "DYLD_LIBRARY_PATH"
dmdLibrary = "libdmd.dylib"
else: # unixish
env['MOZILLA_FIVE_HOME'] = xrePath
dmdLibrary = "libdmd.so"
if envVar in env:
ldLibraryPath = ldLibraryPath + ":" + env[envVar]
env[envVar] = ldLibraryPath
elif self.IS_WIN32:
env["PATH"] = env["PATH"] + ";" + str(ldLibraryPath)
dmdLibrary = "dmd.dll"
preloadEnvVar = "MOZ_REPLACE_MALLOC_LIB"
if dmdPath and dmdLibrary and preloadEnvVar:
env[preloadEnvVar] = os.path.join(dmdPath, dmdLibrary)
if crashreporter and not debugger:
env['MOZ_CRASHREPORTER_NO_REPORT'] = '1'
env['MOZ_CRASHREPORTER'] = '1'
else:
env['MOZ_CRASHREPORTER_DISABLE'] = '1'
# Crash on non-local network connections by default.
# MOZ_DISABLE_NONLOCAL_CONNECTIONS can be set to "0" to temporarily
# enable non-local connections for the purposes of local testing. Don't
# override the user's choice here. See bug 1049688.
env.setdefault('MOZ_DISABLE_NONLOCAL_CONNECTIONS', '1')
env['GNOME_DISABLE_CRASH_DIALOG'] = '1'
env['XRE_NO_WINDOWS_CRASH_DIALOG'] = '1'
# Set WebRTC logging in case it is not set yet
env.setdefault('NSPR_LOG_MODULES', 'signaling:3,mtransport:4,datachannel:4,jsep:4,MediaPipelineFactory:4')
env.setdefault('R_LOG_LEVEL', '6')
env.setdefault('R_LOG_DESTINATION', 'stderr')
env.setdefault('R_LOG_VERBOSE', '1')
# ASan specific environment stuff
if self.IS_ASAN and (self.IS_LINUX or self.IS_MAC):
# Symbolizer support
llvmsym = os.path.join(xrePath, "llvm-symbolizer")
if os.path.isfile(llvmsym):
env["ASAN_SYMBOLIZER_PATH"] = llvmsym
self.log.info("INFO | automation.py | ASan using symbolizer at %s", llvmsym)
else:
self.log.info("TEST-UNEXPECTED-FAIL | automation.py | Failed to find ASan symbolizer at %s", llvmsym)
try:
totalMemory = int(os.popen("free").readlines()[1].split()[1])
# Only 4 GB RAM or less available? Use custom ASan options to reduce
# the amount of resources required to do the tests. Standard options
# will otherwise lead to OOM conditions on the current test slaves.
if totalMemory <= 1024 * 1024 * 4:
self.log.info("INFO | automation.py | ASan running in low-memory configuration")
env["ASAN_OPTIONS"] = "quarantine_size=50331648:malloc_context_size=5"
else:
self.log.info("INFO | automation.py | ASan running in default memory configuration")
except OSError,err:
self.log.info("Failed determine available memory, disabling ASan low-memory configuration: %s", err.strerror)
except:
self.log.info("Failed determine available memory, disabling ASan low-memory configuration")
return env
def killPid(self, pid):
try:
os.kill(pid, getattr(signal, "SIGKILL", signal.SIGTERM))
except WindowsError:
self.log.info("Failed to kill process %d." % pid)
if IS_WIN32:
PeekNamedPipe = ctypes.windll.kernel32.PeekNamedPipe
GetLastError = ctypes.windll.kernel32.GetLastError
def readWithTimeout(self, f, timeout):
"""
Try to read a line of output from the file object |f|. |f| must be a
pipe, like the |stdout| member of a subprocess.Popen object created
with stdout=PIPE. Returns a tuple (line, did_timeout), where |did_timeout|
is True if the read timed out, and False otherwise. If no output is
received within |timeout| seconds, returns a blank line.
"""
if timeout is None:
timeout = 0
x = msvcrt.get_osfhandle(f.fileno())
l = ctypes.c_long()
done = time.time() + timeout
buffer = ""
while timeout == 0 or time.time() < done:
if self.PeekNamedPipe(x, None, 0, None, ctypes.byref(l), None) == 0:
err = self.GetLastError()
if err == 38 or err == 109: # ERROR_HANDLE_EOF || ERROR_BROKEN_PIPE
return ('', False)
else:
self.log.error("readWithTimeout got error: %d", err)
# read a character at a time, checking for eol. Return once we get there.
index = 0
while index < l.value:
char = f.read(1)
buffer += char
if char == '\n':
return (buffer, False)
index = index + 1
time.sleep(0.01)
return (buffer, True)
def isPidAlive(self, pid):
STILL_ACTIVE = 259
PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
pHandle = ctypes.windll.kernel32.OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, 0, pid)
if not pHandle:
return False
pExitCode = ctypes.wintypes.DWORD()
ctypes.windll.kernel32.GetExitCodeProcess(pHandle, ctypes.byref(pExitCode))
ctypes.windll.kernel32.CloseHandle(pHandle)
return pExitCode.value == STILL_ACTIVE
else:
def readWithTimeout(self, f, timeout):
"""Try to read a line of output from the file object |f|. If no output
is received within |timeout| seconds, return a blank line.
Returns a tuple (line, did_timeout), where |did_timeout| is True
if the read timed out, and False otherwise."""
(r, w, e) = select.select([f], [], [], timeout)
if len(r) == 0:
return ('', True)
return (f.readline(), False)
def isPidAlive(self, pid):
try:
# kill(pid, 0) checks for a valid PID without actually sending a signal
# The method throws OSError if the PID is invalid, which we catch below.
os.kill(pid, 0)
# Wait on it to see if it's a zombie. This can throw OSError.ECHILD if
# the process terminates before we get to this point.
wpid, wstatus = os.waitpid(pid, os.WNOHANG)
return wpid == 0
except OSError, err:
# Catch the errors we might expect from os.kill/os.waitpid,
# and re-raise any others
if err.errno == errno.ESRCH or err.errno == errno.ECHILD:
return False
raise
def dumpScreen(self, utilityPath):
if self.haveDumpedScreen:
self.log.info("Not taking screenshot here: see the one that was previously logged")
return
self.haveDumpedScreen = True;
dump_screen(utilityPath, self.log)
def killAndGetStack(self, processPID, utilityPath, debuggerInfo):
"""Kill the process, preferrably in a way that gets us a stack trace.
Also attempts to obtain a screenshot before killing the process."""
if not debuggerInfo:
self.dumpScreen(utilityPath)
self.killAndGetStackNoScreenshot(processPID, utilityPath, debuggerInfo)
def killAndGetStackNoScreenshot(self, processPID, utilityPath, debuggerInfo):
"""Kill the process, preferrably in a way that gets us a stack trace."""
if self.CRASHREPORTER and not debuggerInfo:
if not self.IS_WIN32:
# ABRT will get picked up by Breakpad's signal handler
os.kill(processPID, signal.SIGABRT)
return
else:
# We should have a "crashinject" program in our utility path
crashinject = os.path.normpath(os.path.join(utilityPath, "crashinject.exe"))
if os.path.exists(crashinject):
status = subprocess.Popen([crashinject, str(processPID)]).wait()
printstatus("crashinject", status)
if status == 0:
return
self.log.info("Can't trigger Breakpad, just killing process")
self.killPid(processPID)
def waitForFinish(self, proc, utilityPath, timeout, maxTime, startTime, debuggerInfo, symbolsPath, outputHandler=None):
""" Look for timeout or crashes and return the status after the process terminates """
stackFixerFunction = None
didTimeout = False
hitMaxTime = False
if proc.stdout is None:
self.log.info("TEST-INFO: Not logging stdout or stderr due to debugger connection")
else:
logsource = proc.stdout
if self.IS_DEBUG_BUILD and symbolsPath and os.path.exists(symbolsPath):
# Run each line through a function in fix_stack_using_bpsyms.py (uses breakpad symbol files)
# This method is preferred for Tinderbox builds, since native symbols may have been stripped.
sys.path.insert(0, utilityPath)
import fix_stack_using_bpsyms as stackFixerModule
stackFixerFunction = lambda line: stackFixerModule.fixSymbols(line, symbolsPath)
del sys.path[0]
elif self.IS_DEBUG_BUILD and self.IS_MAC:
# Run each line through a function in fix_macosx_stack.py (uses atos)
sys.path.insert(0, utilityPath)
import fix_macosx_stack as stackFixerModule
stackFixerFunction = lambda line: stackFixerModule.fixSymbols(line)
del sys.path[0]
elif self.IS_DEBUG_BUILD and self.IS_LINUX:
# Run each line through a function in fix_linux_stack.py (uses addr2line)
# This method is preferred for developer machines, so we don't have to run "make buildsymbols".
sys.path.insert(0, utilityPath)
import fix_linux_stack as stackFixerModule
stackFixerFunction = lambda line: stackFixerModule.fixSymbols(line)
del sys.path[0]
# With metro browser runs this script launches the metro test harness which launches the browser.
# The metro test harness hands back the real browser process id via log output which we need to
# pick up on and parse out. This variable tracks the real browser process id if we find it.
browserProcessId = -1
(line, didTimeout) = self.readWithTimeout(logsource, timeout)
while line != "" and not didTimeout:
if stackFixerFunction:
line = stackFixerFunction(line)
if outputHandler is None:
self.log.info(line.rstrip().decode("UTF-8", "ignore"))
else:
outputHandler(line)
if "TEST-START" in line and "|" in line:
self.lastTestSeen = line.split("|")[1].strip()
if not debuggerInfo and "TEST-UNEXPECTED-FAIL" in line and "Test timed out" in line:
self.dumpScreen(utilityPath)
(line, didTimeout) = self.readWithTimeout(logsource, timeout)
if not hitMaxTime and maxTime and datetime.now() - startTime > timedelta(seconds = maxTime):
# Kill the application.
hitMaxTime = True
self.log.info("TEST-UNEXPECTED-FAIL | %s | application ran for longer than allowed maximum time of %d seconds", self.lastTestSeen, int(maxTime))
self.killAndGetStack(proc.pid, utilityPath, debuggerInfo)
if didTimeout:
if line:
self.log.info(line.rstrip().decode("UTF-8", "ignore"))
self.log.info("TEST-UNEXPECTED-FAIL | %s | application timed out after %d seconds with no output", self.lastTestSeen, int(timeout))
if browserProcessId == -1:
browserProcessId = proc.pid
self.killAndGetStack(browserProcessId, utilityPath, debuggerInfo)
status = proc.wait()
printstatus("Main app process", status)
if status == 0:
self.lastTestSeen = "Main app process exited normally"
if status != 0 and not didTimeout and not hitMaxTime:
self.log.info("TEST-UNEXPECTED-FAIL | %s | Exited with code %d during test run", self.lastTestSeen, status)
return status
def buildCommandLine(self, app, debuggerInfo, profileDir, testURL, extraArgs):
""" build the application command line """
cmd = os.path.abspath(app)
if self.IS_MAC and os.path.exists(cmd + "-bin"):
# Prefer 'app-bin' in case 'app' is a shell script.
# We can remove this hack once bug 673899 etc are fixed.
cmd += "-bin"
args = []
if debuggerInfo:
args.extend(debuggerInfo.args)
args.append(cmd)
cmd = os.path.abspath(debuggerInfo.path)
if self.IS_MAC:
args.append("-foreground")
if self.IS_CYGWIN:
profileDirectory = commands.getoutput("cygpath -w \"" + profileDir + "/\"")
else:
profileDirectory = profileDir + "/"
args.extend(("-no-remote", "-profile", profileDirectory))
if testURL is not None:
args.append((testURL))
args.extend(extraArgs)
return cmd, args
def checkForZombies(self, processLog, utilityPath, debuggerInfo):
""" Look for hung processes """
if not os.path.exists(processLog):
self.log.info('Automation Error: PID log not found: %s', processLog)
# Whilst no hung process was found, the run should still display as a failure
return True
foundZombie = False
self.log.info('INFO | zombiecheck | Reading PID log: %s', processLog)
processList = []
pidRE = re.compile(r'launched child process (\d+)$')
processLogFD = open(processLog)
for line in processLogFD:
self.log.info(line.rstrip())
m = pidRE.search(line)
if m:
processList.append(int(m.group(1)))
processLogFD.close()
for processPID in processList:
self.log.info("INFO | zombiecheck | Checking for orphan process with PID: %d", processPID)
if self.isPidAlive(processPID):
foundZombie = True
self.log.info("TEST-UNEXPECTED-FAIL | zombiecheck | child process %d still alive after shutdown", processPID)
self.killAndGetStack(processPID, utilityPath, debuggerInfo)
return foundZombie
def checkForCrashes(self, minidumpDir, symbolsPath):
return mozcrash.check_for_crashes(minidumpDir, symbolsPath, test_name=self.lastTestSeen)
def runApp(self, testURL, env, app, profileDir, extraArgs, utilityPath = None,
xrePath = None, certPath = None,
debuggerInfo = None, symbolsPath = None,
timeout = -1, maxTime = None, onLaunch = None,
detectShutdownLeaks = False, screenshotOnFail=False, testPath=None, bisectChunk=None,
valgrindPath=None, valgrindArgs=None, valgrindSuppFiles=None, outputHandler=None):
"""
Run the app, log the duration it took to execute, return the status code.
Kills the app if it runs for longer than |maxTime| seconds, or outputs nothing for |timeout| seconds.
"""
if utilityPath == None:
utilityPath = self.DIST_BIN
if xrePath == None:
xrePath = self.DIST_BIN
if certPath == None:
certPath = self.CERTS_SRC_DIR
if timeout == -1:
timeout = self.DEFAULT_TIMEOUT
# copy env so we don't munge the caller's environment
env = dict(env);
env["NO_EM_RESTART"] = "1"
tmpfd, processLog = tempfile.mkstemp(suffix='pidlog')
os.close(tmpfd)
env["MOZ_PROCESS_LOG"] = processLog
cmd, args = self.buildCommandLine(app, debuggerInfo, profileDir, testURL, extraArgs)
startTime = datetime.now()
if debuggerInfo and debuggerInfo.interactive:
# If an interactive debugger is attached, don't redirect output,
# don't use timeouts, and don't capture ctrl-c.
timeout = None
maxTime = None
outputPipe = None
signal.signal(signal.SIGINT, lambda sigid, frame: None)
else:
outputPipe = subprocess.PIPE
self.lastTestSeen = "automation.py"
proc = self.Process([cmd] + args,
env = self.environment(env, xrePath = xrePath,
crashreporter = not debuggerInfo),
stdout = outputPipe,
stderr = subprocess.STDOUT)
self.log.info("INFO | automation.py | Application pid: %d", proc.pid)
if onLaunch is not None:
# Allow callers to specify an onLaunch callback to be fired after the
# app is launched.
onLaunch()
status = self.waitForFinish(proc, utilityPath, timeout, maxTime, startTime, debuggerInfo, symbolsPath,
outputHandler=outputHandler)
self.log.info("INFO | automation.py | Application ran for: %s", str(datetime.now() - startTime))
# Do a final check for zombie child processes.
zombieProcesses = self.checkForZombies(processLog, utilityPath, debuggerInfo)
crashed = self.checkForCrashes(os.path.join(profileDir, "minidumps"), symbolsPath)
if crashed or zombieProcesses:
status = 1
if os.path.exists(processLog):
os.unlink(processLog)
return status
def elf_arm(self, filename):
data = open(filename, 'rb').read(20)
return data[:4] == "\x7fELF" and ord(data[18]) == 40 # EM_ARM