2007-07-25 01:06:15 +00:00
#!/bin/env python
2012-05-21 11:12:37 +00:00
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
2007-07-25 01:06:15 +00:00
#
# Usage: symbolstore.py <params> <dump_syms path> <symbol store path>
# <debug info files or dirs>
# Runs dump_syms on each debug info file specified on the command line,
# then places the resulting symbol file in the proper directory
# structure in the symbol store path. Accepts multiple files
# on the command line, so can be called as part of a pipe using
# find <dir> | xargs symbolstore.pl <dump_syms> <storepath>
# But really, you might just want to pass it <dir>.
#
# Parameters accepted:
# -c : Copy debug info files to the same directory structure
# as sym files
# -a "<archs>" : Run dump_syms -a <arch> for each space separated
# cpu architecture in <archs> (only on OS X)
# -s <srcdir> : Use <srcdir> as the top source directory to
# generate relative filenames.
import sys
2012-04-04 18:03:13 +00:00
import platform
2007-07-25 01:06:15 +00:00
import os
import re
import shutil
2012-05-16 18:51:45 +00:00
import textwrap
2012-05-24 15:58:35 +00:00
import fnmatch
2012-06-19 13:24:49 +00:00
import subprocess
2012-10-29 15:12:30 +00:00
import urlparse
2012-12-27 23:32:45 +00:00
import multiprocessing
import collections
2007-07-25 01:06:15 +00:00
from optparse import OptionParser
2012-10-29 15:12:30 +00:00
from xml . dom . minidom import parse
2007-07-25 01:06:15 +00:00
2007-09-26 18:01:23 +00:00
# Utility classes
2007-07-25 01:06:15 +00:00
2007-09-26 18:01:23 +00:00
class VCSFileInfo :
""" A base class for version-controlled file information. Ensures that the
following attributes are generated only once ( successfully ) :
self . root
2008-04-09 06:39:35 +00:00
self . clean_root
2007-09-26 18:01:23 +00:00
self . revision
self . filename
The attributes are generated by a single call to the GetRoot ,
GetRevision , and GetFilename methods . Those methods are explicitly not
implemented here and must be implemented in derived classes . """
def __init__ ( self , file ) :
if not file :
raise ValueError
self . file = file
def __getattr__ ( self , name ) :
""" __getattr__ is only called for attributes that are not set on self,
so setting self . [ attr ] will prevent future calls to the GetRoot ,
GetRevision , and GetFilename methods . We don ' t set the values on
failure on the off chance that a future call might succeed . """
if name == " root " :
root = self . GetRoot ( )
if root :
self . root = root
return root
2008-04-09 06:39:35 +00:00
elif name == " clean_root " :
clean_root = self . GetCleanRoot ( )
if clean_root :
self . clean_root = clean_root
return clean_root
2007-09-26 18:01:23 +00:00
elif name == " revision " :
revision = self . GetRevision ( )
if revision :
self . revision = revision
return revision
elif name == " filename " :
filename = self . GetFilename ( )
if filename :
self . filename = filename
return filename
raise AttributeError
def GetRoot ( self ) :
2008-04-09 06:39:35 +00:00
""" This method should return the unmodified root for the file or ' None '
2008-04-04 08:54:44 +00:00
on failure . """
2008-04-04 19:53:08 +00:00
raise NotImplementedError
2008-04-04 08:54:44 +00:00
2008-04-09 06:39:35 +00:00
def GetCleanRoot ( self ) :
""" This method should return the repository root for the file or ' None '
on failure . """
raise NotImplementedErrors
2007-09-26 18:01:23 +00:00
def GetRevision ( self ) :
""" This method should return the revision number for the file or ' None '
on failure . """
raise NotImplementedError
def GetFilename ( self ) :
""" This method should return the repository-specific filename for the
file or ' None ' on failure . """
raise NotImplementedError
2008-06-18 15:06:57 +00:00
# This regex separates protocol and optional username/password from a url.
# For instance, all the following urls will be transformed into
# 'foo.com/bar':
#
# http://foo.com/bar
# svn+ssh://user@foo.com/bar
# svn+ssh://user:pass@foo.com/bar
#
rootRegex = re . compile ( r ' ^ \ S+?:/+(?:[^ \ s/]*@)?( \ S+)$ ' )
2010-05-18 16:46:12 +00:00
def read_output ( * args ) :
2012-06-19 13:24:49 +00:00
( stdout , _ ) = subprocess . Popen ( args = args , stdout = subprocess . PIPE ) . communicate ( )
2010-05-18 16:46:12 +00:00
return stdout . rstrip ( )
2009-02-17 07:46:32 +00:00
class HGRepoInfo :
2012-10-29 15:12:30 +00:00
def __init__ ( self , path ) :
2008-06-18 15:06:57 +00:00
self . path = path
2012-10-29 15:12:30 +00:00
rev = read_output ( ' hg ' , ' -R ' , path ,
' parent ' , ' --template= { node|short} ' )
# Look for the default hg path. If SRVSRV_ROOT is set, we
# don't bother asking hg.
hg_root = os . environ . get ( " SRCSRV_ROOT " )
if hg_root :
root = hg_root
else :
root = read_output ( ' hg ' , ' -R ' , path ,
' showconfig ' , ' paths.default ' )
if not root :
print >> sys . stderr , " Failed to get HG Repo for %s " % path
cleanroot = None
if root :
match = rootRegex . match ( root )
if match :
cleanroot = match . group ( 1 )
if cleanroot . endswith ( ' / ' ) :
cleanroot = cleanroot [ : - 1 ]
if cleanroot is None :
print >> sys . stderr , textwrap . dedent ( """ \
Could not determine repo info for % s . This is either not a clone of the web - based
repository , or you have not specified SRCSRV_ROOT , or the clone is corrupt . """ ) % path
sys . exit ( 1 )
2008-06-18 15:06:57 +00:00
self . rev = rev
self . cleanroot = cleanroot
2012-10-29 15:12:30 +00:00
def GetFileInfo ( self , file ) :
return HGFileInfo ( file , self )
2008-06-18 15:06:57 +00:00
class HGFileInfo ( VCSFileInfo ) :
2012-10-29 15:12:30 +00:00
def __init__ ( self , file , repo ) :
2008-06-18 15:06:57 +00:00
VCSFileInfo . __init__ ( self , file )
2012-10-29 15:12:30 +00:00
self . repo = repo
self . file = os . path . relpath ( file , repo . path )
2008-06-18 15:06:57 +00:00
def GetRoot ( self ) :
return self . repo . path
def GetCleanRoot ( self ) :
return self . repo . cleanroot
def GetRevision ( self ) :
return self . repo . rev
def GetFilename ( self ) :
if self . revision and self . clean_root :
2012-10-29 15:12:30 +00:00
return " hg: %s : %s : %s " % ( self . clean_root , self . file , self . revision )
return self . file
2008-06-18 15:06:57 +00:00
2012-10-29 15:12:30 +00:00
class GitRepoInfo :
"""
Info about a local git repository . Does not currently
support discovering info about a git clone , the info must be
provided out - of - band .
"""
def __init__ ( self , path , rev , root ) :
self . path = path
cleanroot = None
if root :
match = rootRegex . match ( root )
if match :
cleanroot = match . group ( 1 )
if cleanroot . endswith ( ' / ' ) :
cleanroot = cleanroot [ : - 1 ]
if cleanroot is None :
print >> sys . stderr , textwrap . dedent ( """ \
Could not determine repo info for % s ( % s ) . This is either not a clone of a web - based
repository , or you have not specified SRCSRV_ROOT , or the clone is corrupt . """ ) % (path, root)
sys . exit ( 1 )
self . rev = rev
self . cleanroot = cleanroot
def GetFileInfo ( self , file ) :
return GitFileInfo ( file , self )
class GitFileInfo ( VCSFileInfo ) :
def __init__ ( self , file , repo ) :
VCSFileInfo . __init__ ( self , file )
self . repo = repo
self . file = os . path . relpath ( file , repo . path )
def GetRoot ( self ) :
return self . repo . path
def GetCleanRoot ( self ) :
return self . repo . cleanroot
def GetRevision ( self ) :
return self . repo . rev
def GetFilename ( self ) :
if self . revision and self . clean_root :
return " git: %s : %s : %s " % ( self . clean_root , self . file , self . revision )
return self . file
2007-09-26 18:01:23 +00:00
# Utility functions
2012-10-29 15:12:30 +00:00
# A cache of repo info for each srcdir.
srcdirRepoInfo = { }
2007-09-26 18:01:23 +00:00
# A cache of files for which VCS info has already been determined. Used to
# prevent extra filesystem activity or process launching.
vcsFileInfoCache = { }
2007-07-25 01:06:15 +00:00
2008-06-18 15:06:57 +00:00
def IsInDir ( file , dir ) :
# the lower() is to handle win32+vc8, where
# the source filenames come out all lowercase,
# but the srcdir can be mixed case
return os . path . abspath ( file ) . lower ( ) . startswith ( os . path . abspath ( dir ) . lower ( ) )
2012-10-29 15:12:30 +00:00
def GetVCSFilenameFromSrcdir ( file , srcdir ) :
if srcdir not in srcdirRepoInfo :
# Not in cache, so find it adnd cache it
if os . path . isdir ( os . path . join ( srcdir , ' .hg ' ) ) :
srcdirRepoInfo [ srcdir ] = HGRepoInfo ( srcdir )
else :
# Unknown VCS or file is not in a repo.
return None
return srcdirRepoInfo [ srcdir ] . GetFileInfo ( file )
2009-02-17 07:46:32 +00:00
def GetVCSFilename ( file , srcdirs ) :
2007-07-25 01:06:15 +00:00
""" Given a full path to a file, and the top source directory,
look for version control information about this file , and return
2008-04-09 06:39:35 +00:00
a tuple containing
1 ) a specially formatted filename that contains the VCS type ,
2007-07-25 01:06:15 +00:00
VCS location , relative filename , and revision number , formatted like :
vcs : vcs location : filename : revision
For example :
2008-04-09 06:39:35 +00:00
cvs : cvs . mozilla . org / cvsroot : mozilla / browser / app / nsBrowserApp . cpp : 1.36
2 ) the unmodified root information if it exists """
2007-07-25 01:06:15 +00:00
( path , filename ) = os . path . split ( file )
if path == ' ' or filename == ' ' :
2008-04-09 06:39:35 +00:00
return ( file , None )
2007-09-26 18:01:23 +00:00
fileInfo = None
2008-04-09 06:39:35 +00:00
root = ' '
2007-09-26 18:01:23 +00:00
if file in vcsFileInfoCache :
# Already cached this info, use it.
fileInfo = vcsFileInfoCache [ file ]
else :
2009-02-17 07:46:32 +00:00
for srcdir in srcdirs :
2012-10-29 15:12:30 +00:00
if not IsInDir ( file , srcdir ) :
continue
fileInfo = GetVCSFilenameFromSrcdir ( file , srcdir )
if fileInfo :
2009-02-17 07:46:32 +00:00
vcsFileInfoCache [ file ] = fileInfo
break
2007-09-26 18:01:23 +00:00
if fileInfo :
file = fileInfo . filename
2008-12-19 16:04:50 +00:00
root = fileInfo . root
2007-09-26 18:01:23 +00:00
# we want forward slashes on win32 paths
2008-04-09 06:39:35 +00:00
return ( file . replace ( " \\ " , " / " ) , root )
2007-07-25 01:06:15 +00:00
def GetPlatformSpecificDumper ( * * kwargs ) :
""" This function simply returns a instance of a subclass of Dumper
that is appropriate for the current platform . """
2012-04-25 17:37:16 +00:00
# Python 2.5 has a bug where platform.system() returns 'Microsoft'.
# Remove this when we no longer support Python 2.5.
2012-04-04 18:03:13 +00:00
return { ' Windows ' : Dumper_Win32 ,
2012-04-25 17:37:16 +00:00
' Microsoft ' : Dumper_Win32 ,
2012-04-04 18:03:13 +00:00
' Linux ' : Dumper_Linux ,
' Sunos5 ' : Dumper_Solaris ,
' Darwin ' : Dumper_Mac } [ platform . system ( ) ] ( * * kwargs )
2007-07-25 01:06:15 +00:00
2008-12-19 16:04:50 +00:00
def SourceIndex ( fileStream , outputPath , vcs_root ) :
2008-02-27 00:54:47 +00:00
""" Takes a list of files, writes info to a data block in a .stream file """
# Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
# Create the srcsrv data block that indexes the pdb file
result = True
pdbStreamFile = open ( outputPath , " w " )
2008-12-19 16:04:50 +00:00
pdbStreamFile . write ( ''' SRCSRV: ini ------------------------------------------------ \r \n VERSION=2 \r \n INDEXVERSION=2 \r \n VERCTRL=http \r \n SRCSRV: variables ------------------------------------------ \r \n HGSERVER= ''' )
pdbStreamFile . write ( vcs_root )
pdbStreamFile . write ( ''' \r \n SRCSRVVERCTRL=http \r \n HTTP_EXTRACT_TARGET= %hg server % /raw-file/ % var3 % / % var2 % \r \n SRCSRVTRG= % http_extract_target % \r \n SRCSRV: source files --------------------------------------- \r \n ''' )
2008-02-27 00:54:47 +00:00
pdbStreamFile . write ( fileStream ) # can't do string interpolation because the source server also uses this and so there are % in the above
pdbStreamFile . write ( " SRCSRV: end ------------------------------------------------ \r \n \n " )
pdbStreamFile . close ( )
return result
2012-12-27 23:32:45 +00:00
def WorkerInitializer ( cls , lock ) :
""" Windows worker processes won ' t have run GlobalInit, and due to a lack of fork(),
won ' t inherit the class variables from the parent. The only one they need is the lock,
so we run an initializer to set it . Redundant but harmless on other platforms . """
cls . lock = lock
def StartProcessFilesWork ( dumper , files , arch_num , arch , vcs_root , after , after_arg ) :
""" multiprocessing can ' t handle methods as Process targets, so we define
a simple wrapper function around the work method . """
return dumper . ProcessFilesWork ( files , arch_num , arch , vcs_root , after , after_arg )
2007-07-25 01:06:15 +00:00
class Dumper :
""" This class can dump symbols from a file with debug info, and
store the output in a directory structure that is valid for use as
a Breakpad symbol server . Requires a path to a dump_syms binary - -
| dump_syms | and a directory to store symbols in - - | symbol_path | .
Optionally takes a list of processor architectures to process from
each debug file - - | archs | , the full path to the top source
directory - - | srcdir | , for generating relative source file names ,
and an option to copy debug info files alongside the dumped
symbol files - - | copy_debug | , mostly useful for creating a
Microsoft Symbol Server from the resulting output .
You don ' t want to use this directly if you intend to call
ProcessDir . Instead , call GetPlatformSpecificDumper to
2012-12-27 23:32:45 +00:00
get an instance of a subclass .
Processing is performed asynchronously via worker processes ; in
order to wait for processing to finish and cleanup correctly , you
must call Finish after all Process / ProcessDir calls have been made .
You must also call Dumper . GlobalInit before creating or using any
instances . """
2007-07-25 01:06:15 +00:00
def __init__ ( self , dump_syms , symbol_path ,
2012-05-24 15:58:35 +00:00
archs = None ,
2012-10-29 15:12:30 +00:00
srcdirs = [ ] ,
2012-05-24 15:58:35 +00:00
copy_debug = False ,
vcsinfo = False ,
srcsrv = False ,
2012-10-29 15:12:30 +00:00
exclude = [ ] ,
repo_manifest = None ) :
2007-08-13 20:35:36 +00:00
# popen likes absolute paths, at least on windows
self . dump_syms = os . path . abspath ( dump_syms )
2007-07-25 01:06:15 +00:00
self . symbol_path = symbol_path
if archs is None :
# makes the loop logic simpler
self . archs = [ ' ' ]
else :
self . archs = [ ' -a %s ' % a for a in archs . split ( ) ]
2012-10-29 15:12:30 +00:00
self . srcdirs = [ os . path . normpath ( a ) for a in srcdirs ]
2007-07-25 01:06:15 +00:00
self . copy_debug = copy_debug
self . vcsinfo = vcsinfo
2008-02-27 00:54:47 +00:00
self . srcsrv = srcsrv
2012-05-24 15:58:35 +00:00
self . exclude = exclude [ : ]
2012-10-29 15:12:30 +00:00
if repo_manifest :
self . parse_repo_manifest ( repo_manifest )
2012-12-27 23:32:45 +00:00
# book-keeping to keep track of our jobs and the cleanup work per file tuple
self . files_record = { }
self . jobs_record = collections . defaultdict ( int )
@classmethod
def GlobalInit ( cls , module = multiprocessing ) :
""" Initialize the class globals for the multiprocessing setup; must
be called before any Dumper instances are created and used . Test cases
may pass in a different module to supply Manager and Pool objects ,
usually multiprocessing . dummy . """
num_cpus = module . cpu_count ( )
if num_cpus is None :
# assume a dual core machine if we can't find out for some reason
# probably better on single core anyway due to I/O constraints
num_cpus = 2
# have to create any locks etc before the pool
cls . manager = module . Manager ( )
cls . jobs_condition = Dumper . manager . Condition ( )
cls . lock = Dumper . manager . RLock ( )
cls . pool = module . Pool ( num_cpus , WorkerInitializer , ( cls , cls . lock ) )
def JobStarted ( self , file_key ) :
""" Increments the number of submitted jobs for the specified key file,
defined as the original file we processed ; note that a single key file
can generate up to 1 + len ( self . archs ) jobs in the Mac case . """
with Dumper . jobs_condition :
self . jobs_record [ file_key ] + = 1
Dumper . jobs_condition . notify_all ( )
def JobFinished ( self , file_key ) :
""" Decrements the number of submitted jobs for the specified key file,
defined as the original file we processed ; once the count is back to 0 ,
remove the entry from our record . """
with Dumper . jobs_condition :
self . jobs_record [ file_key ] - = 1
if self . jobs_record [ file_key ] == 0 :
del self . jobs_record [ file_key ]
Dumper . jobs_condition . notify_all ( )
def output ( self , dest , output_str ) :
""" Writes |output_str| to |dest|, holding |lock|;
terminates with a newline . """
with Dumper . lock :
dest . write ( output_str + " \n " )
dest . flush ( )
def output_pid ( self , dest , output_str ) :
""" Debugging output; prepends the pid to the string. """
self . output ( dest , " %d : %s " % ( os . getpid ( ) , output_str ) )
2012-10-29 15:12:30 +00:00
def parse_repo_manifest ( self , repo_manifest ) :
"""
Parse an XML manifest of repository info as produced
by the ` repo manifest - r ` command .
"""
doc = parse ( repo_manifest )
if doc . firstChild . tagName != " manifest " :
return
# First, get remotes.
remotes = dict ( [ ( r . getAttribute ( " name " ) , r . getAttribute ( " fetch " ) ) for r in doc . getElementsByTagName ( " remote " ) ] )
# And default remote.
default_remote = None
if doc . getElementsByTagName ( " default " ) :
default_remote = doc . getElementsByTagName ( " default " ) [ 0 ] . getAttribute ( " remote " )
# Now get projects. Assume they're relative to repo_manifest.
base_dir = os . path . abspath ( os . path . dirname ( repo_manifest ) )
for proj in doc . getElementsByTagName ( " project " ) :
# name is the repository URL relative to the remote path.
name = proj . getAttribute ( " name " )
# path is the path on-disk, relative to the manifest file.
path = proj . getAttribute ( " path " )
# revision is the changeset ID.
rev = proj . getAttribute ( " revision " )
# remote is the base URL to use.
remote = proj . getAttribute ( " remote " )
# remote defaults to the <default remote>.
if not remote :
remote = default_remote
# path defaults to name.
if not path :
path = name
if not ( name and path and rev and remote ) :
print " Skipping project %s " % proj . toxml ( )
continue
remote = remotes [ remote ]
# Turn git URLs into http URLs so that urljoin works.
if remote . startswith ( " git: " ) :
remote = " http " + remote [ 3 : ]
# Add this project to srcdirs.
srcdir = os . path . join ( base_dir , path )
self . srcdirs . append ( srcdir )
# And cache its VCS file info. Currently all repos mentioned
# in a repo manifest are assumed to be git.
root = urlparse . urljoin ( remote , name )
srcdirRepoInfo [ srcdir ] = GitRepoInfo ( srcdir , rev , root )
2007-07-25 01:06:15 +00:00
# subclasses override this
def ShouldProcess ( self , file ) :
2012-05-24 15:58:35 +00:00
return not any ( fnmatch . fnmatch ( os . path . basename ( file ) , exclude ) for exclude in self . exclude )
2007-07-25 01:06:15 +00:00
2008-12-08 17:04:33 +00:00
# and can override this
def ShouldSkipDir ( self , dir ) :
return False
2007-07-25 01:06:15 +00:00
def RunFileCommand ( self , file ) :
""" Utility function, returns the output of file(1) """
try :
# we use -L to read the targets of symlinks,
# and -b to print just the content, not the filename
return os . popen ( " file -Lb " + file ) . read ( )
except :
return " "
# This is a no-op except on Win32
def FixFilenameCase ( self , file ) :
return file
2008-02-27 00:54:47 +00:00
# This is a no-op except on Win32
2008-12-19 16:04:50 +00:00
def SourceServerIndexing ( self , debug_file , guid , sourceFileStream , vcs_root ) :
2008-02-27 00:54:47 +00:00
return " "
2008-03-06 12:15:58 +00:00
# subclasses override this if they want to support this
def CopyDebug ( self , file , debug_file , guid ) :
pass
2012-12-27 23:32:45 +00:00
def Finish ( self , stop_pool = True ) :
""" Wait for the expected number of jobs to be submitted, and then
wait for the pool to finish processing them . By default , will close
and clear the pool , but for testcases that need multiple runs , pass
stop_pool = False . """
with Dumper . jobs_condition :
while len ( self . jobs_record ) != 0 :
Dumper . jobs_condition . wait ( )
if stop_pool :
Dumper . pool . close ( )
Dumper . pool . join ( )
2007-07-25 01:06:15 +00:00
def Process ( self , file_or_dir ) :
2012-12-27 23:32:45 +00:00
""" Process a file or all the (valid) files in a directory; processing is performed
asynchronously , and Finish must be called to wait for it complete and cleanup . """
2008-12-08 17:04:33 +00:00
if os . path . isdir ( file_or_dir ) and not self . ShouldSkipDir ( file_or_dir ) :
2012-12-27 23:32:45 +00:00
self . ProcessDir ( file_or_dir )
2007-07-25 01:06:15 +00:00
elif os . path . isfile ( file_or_dir ) :
2012-12-27 23:32:45 +00:00
self . ProcessFiles ( ( file_or_dir , ) )
2007-09-26 18:01:23 +00:00
2007-07-25 01:06:15 +00:00
def ProcessDir ( self , dir ) :
""" Process all the valid files in this directory. Valid files
2012-12-27 23:32:45 +00:00
are determined by calling ShouldProcess ; processing is performed
asynchronously , and Finish must be called to wait for it complete and cleanup . """
2007-07-25 01:06:15 +00:00
for root , dirs , files in os . walk ( dir ) :
2008-12-08 17:04:33 +00:00
for d in dirs [ : ] :
if self . ShouldSkipDir ( d ) :
dirs . remove ( d )
2007-07-25 01:06:15 +00:00
for f in files :
fullpath = os . path . join ( root , f )
if self . ShouldProcess ( fullpath ) :
2012-12-27 23:32:45 +00:00
self . ProcessFiles ( ( fullpath , ) )
def SubmitJob ( self , file_key , func , args , callback ) :
""" Submits a job to the pool of workers; increments the number of submitted jobs. """
self . JobStarted ( file_key )
res = Dumper . pool . apply_async ( func , args = args , callback = callback )
def ProcessFilesFinished ( self , res ) :
""" Callback from multiprocesing when ProcessFilesWork finishes;
run the cleanup work , if any """
self . JobFinished ( res [ ' files ' ] [ - 1 ] )
# only run the cleanup function once per tuple of files
self . files_record [ res [ ' files ' ] ] + = 1
if self . files_record [ res [ ' files ' ] ] == len ( self . archs ) :
del self . files_record [ res [ ' files ' ] ]
if res [ ' after ' ] :
res [ ' after ' ] ( res [ ' status ' ] , res [ ' after_arg ' ] )
def ProcessFiles ( self , files , after = None , after_arg = None ) :
""" Dump symbols from these files into a symbol file, stored
in the proper directory structure in | symbol_path | ; processing is performed
asynchronously , and Finish must be called to wait for it complete and cleanup .
All files after the first are fallbacks in case the first file does not process
successfully ; if it does , no other files will be touched . """
self . output_pid ( sys . stderr , " Submitting jobs for files: %s " % str ( files ) )
2007-09-26 18:01:23 +00:00
2008-12-19 16:04:50 +00:00
# tries to get the vcs root from the .mozconfig first - if it's not set
# the tinderbox vcs path will be assigned further down
vcs_root = os . environ . get ( " SRCSRV_ROOT " )
2012-06-19 13:24:49 +00:00
for arch_num , arch in enumerate ( self . archs ) :
2012-12-27 23:32:45 +00:00
self . files_record [ files ] = 0 # record that we submitted jobs for this tuple of files
self . SubmitJob ( files [ - 1 ] , StartProcessFilesWork , args = ( self , files , arch_num , arch , vcs_root , after , after_arg ) , callback = self . ProcessFilesFinished )
def ProcessFilesWork ( self , files , arch_num , arch , vcs_root , after , after_arg ) :
self . output_pid ( sys . stderr , " Worker processing files: %s " % ( files , ) )
# our result is a status, a cleanup function, an argument to that function, and the tuple of files we were called on
result = { ' status ' : False , ' after ' : after , ' after_arg ' : after_arg , ' files ' : files }
sourceFileStream = ' '
for file in files :
# files is a tuple of files, containing fallbacks in case the first file doesn't process successfully
2007-07-25 01:06:15 +00:00
try :
2012-06-19 13:24:49 +00:00
proc = subprocess . Popen ( [ self . dump_syms ] + arch . split ( ) + [ file ] ,
stdout = subprocess . PIPE )
module_line = proc . stdout . next ( )
2007-07-25 01:06:15 +00:00
if module_line . startswith ( " MODULE " ) :
# MODULE os cpu guid debug_file
( guid , debug_file ) = ( module_line . split ( ) ) [ 3 : 5 ]
# strip off .pdb extensions, and append .sym
sym_file = re . sub ( " \ .pdb$ " , " " , debug_file ) + " .sym "
# we do want forward slashes here
rel_path = os . path . join ( debug_file ,
guid ,
sym_file ) . replace ( " \\ " , " / " )
full_path = os . path . normpath ( os . path . join ( self . symbol_path ,
rel_path ) )
try :
os . makedirs ( os . path . dirname ( full_path ) )
except OSError : # already exists
pass
f = open ( full_path , " w " )
f . write ( module_line )
# now process the rest of the output
2012-06-19 13:24:49 +00:00
for line in proc . stdout :
2007-07-25 01:06:15 +00:00
if line . startswith ( " FILE " ) :
# FILE index filename
2012-06-19 13:24:49 +00:00
( x , index , filename ) = line . rstrip ( ) . split ( None , 2 )
2008-03-20 01:13:36 +00:00
if sys . platform == " sunos5 " :
2009-02-17 07:46:32 +00:00
for srcdir in self . srcdirs :
start = filename . find ( self . srcdir )
if start != - 1 :
filename = filename [ start : ]
break
2012-06-19 13:24:49 +00:00
filename = self . FixFilenameCase ( filename )
2008-02-27 00:54:47 +00:00
sourcepath = filename
2007-07-25 01:06:15 +00:00
if self . vcsinfo :
2009-02-17 07:46:32 +00:00
( filename , rootname ) = GetVCSFilename ( filename , self . srcdirs )
2008-12-19 16:04:50 +00:00
# sets vcs_root in case the loop through files were to end on an empty rootname
if vcs_root is None :
2008-04-15 10:31:41 +00:00
if rootname :
2008-12-19 16:04:50 +00:00
vcs_root = rootname
# gather up files with hg for indexing
if filename . startswith ( " hg " ) :
2008-02-27 00:54:47 +00:00
( ver , checkout , source_file , revision ) = filename . split ( " : " , 3 )
2008-12-19 16:04:50 +00:00
sourceFileStream + = sourcepath + " * " + source_file + ' * ' + revision + " \r \n "
2007-07-25 01:06:15 +00:00
f . write ( " FILE %s %s \n " % ( index , filename ) )
else :
# pass through all other lines unchanged
f . write ( line )
2009-06-12 12:50:13 +00:00
# we want to return true only if at least one line is not a MODULE or FILE line
2012-12-27 23:32:45 +00:00
result [ ' status ' ] = True
2007-07-25 01:06:15 +00:00
f . close ( )
2012-06-19 13:24:49 +00:00
proc . wait ( )
2007-07-25 01:06:15 +00:00
# we output relative paths so callers can get a list of what
# was generated
2012-12-27 23:32:45 +00:00
self . output ( sys . stdout , rel_path )
2009-10-03 18:52:37 +00:00
if self . srcsrv and vcs_root :
# add source server indexing to the pdb file
self . SourceServerIndexing ( file , guid , sourceFileStream , vcs_root )
2012-06-19 13:24:49 +00:00
# only copy debug the first time if we have multiple architectures
if self . copy_debug and arch_num == 0 :
2008-03-06 12:15:58 +00:00
self . CopyDebug ( file , debug_file , guid )
2007-07-25 01:06:15 +00:00
except StopIteration :
pass
2012-12-27 23:32:45 +00:00
except e :
self . output ( sys . stderr , " Unexpected error: %s " % ( str ( e ) , ) )
2007-07-25 01:06:15 +00:00
raise
2012-12-27 23:32:45 +00:00
if result [ ' status ' ] :
# we only need 1 file to work
break
2007-07-25 01:06:15 +00:00
return result
# Platform-specific subclasses. For the most part, these just have
# logic to determine what files to extract symbols from.
class Dumper_Win32 ( Dumper ) :
2007-09-26 18:01:23 +00:00
fixedFilenameCaseCache = { }
2007-07-25 01:06:15 +00:00
def ShouldProcess ( self , file ) :
""" This function will allow processing of pdb files that have dll
or exe files with the same base name next to them . """
2012-05-24 15:58:35 +00:00
if not Dumper . ShouldProcess ( self , file ) :
return False
2007-07-25 01:06:15 +00:00
if file . endswith ( " .pdb " ) :
( path , ext ) = os . path . splitext ( file )
if os . path . isfile ( path + " .exe " ) or os . path . isfile ( path + " .dll " ) :
return True
return False
2007-09-26 18:01:23 +00:00
2007-07-25 01:06:15 +00:00
def FixFilenameCase ( self , file ) :
""" Recent versions of Visual C++ put filenames into
PDB files as all lowercase . If the file exists
on the local filesystem , fix it . """
2007-09-26 18:01:23 +00:00
# Use a cached version if we have one.
if file in self . fixedFilenameCaseCache :
return self . fixedFilenameCaseCache [ file ]
result = file
2007-07-25 01:06:15 +00:00
( path , filename ) = os . path . split ( file )
2007-09-26 18:01:23 +00:00
if os . path . isdir ( path ) :
lc_filename = filename . lower ( )
for f in os . listdir ( path ) :
if f . lower ( ) == lc_filename :
result = os . path . join ( path , f )
break
# Cache the corrected version to avoid future filesystem hits.
self . fixedFilenameCaseCache [ file ] = result
return result
2008-03-06 12:15:58 +00:00
def CopyDebug ( self , file , debug_file , guid ) :
rel_path = os . path . join ( debug_file ,
guid ,
debug_file ) . replace ( " \\ " , " / " )
full_path = os . path . normpath ( os . path . join ( self . symbol_path ,
rel_path ) )
shutil . copyfile ( file , full_path )
2009-01-23 12:40:27 +00:00
# try compressing it
compressed_file = os . path . splitext ( full_path ) [ 0 ] + " .pd_ "
# ignore makecab's output
2012-06-19 13:24:49 +00:00
success = subprocess . call ( [ " makecab.exe " , " /D " , " CompressionType=LZX " , " /D " ,
" CompressionMemory=21 " ,
full_path , compressed_file ] ,
stdout = open ( " NUL: " , " w " ) , stderr = subprocess . STDOUT )
2009-01-23 12:40:27 +00:00
if success == 0 and os . path . exists ( compressed_file ) :
os . unlink ( full_path )
2012-12-27 23:32:45 +00:00
self . output ( sys . stdout , os . path . splitext ( rel_path ) [ 0 ] + " .pd_ " )
2009-01-23 12:40:27 +00:00
else :
2012-12-27 23:32:45 +00:00
self . output ( sys . stdout , rel_path )
2008-02-27 00:54:47 +00:00
2008-12-19 16:04:50 +00:00
def SourceServerIndexing ( self , debug_file , guid , sourceFileStream , vcs_root ) :
2008-02-27 00:54:47 +00:00
# Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
2009-10-03 18:52:37 +00:00
debug_file = os . path . abspath ( debug_file )
2008-02-27 00:54:47 +00:00
streamFilename = debug_file + " .stream "
2009-10-03 18:52:37 +00:00
stream_output_path = os . path . abspath ( streamFilename )
2008-02-27 00:54:47 +00:00
# Call SourceIndex to create the .stream file
2008-12-19 16:04:50 +00:00
result = SourceIndex ( sourceFileStream , stream_output_path , vcs_root )
2008-02-27 00:54:47 +00:00
if self . copy_debug :
pdbstr_path = os . environ . get ( " PDBSTR_PATH " )
pdbstr = os . path . normpath ( pdbstr_path )
2012-06-19 13:24:49 +00:00
subprocess . call ( [ pdbstr , " -w " , " -p: " + os . path . basename ( debug_file ) ,
" -i: " + os . path . basename ( streamFilename ) , " -s:srcsrv " ] ,
cwd = os . path . dirname ( stream_output_path ) )
2008-02-27 00:54:47 +00:00
# clean up all the .stream files when done
os . remove ( stream_output_path )
return result
2007-07-25 01:06:15 +00:00
class Dumper_Linux ( Dumper ) :
2011-10-06 07:08:14 +00:00
objcopy = os . environ [ ' OBJCOPY ' ] if ' OBJCOPY ' in os . environ else ' objcopy '
2007-07-25 01:06:15 +00:00
def ShouldProcess ( self , file ) :
""" This function will allow processing of files that are
executable , or end with the . so extension , and additionally
file ( 1 ) reports as being ELF files . It expects to find the file
command in PATH . """
2012-05-24 15:58:35 +00:00
if not Dumper . ShouldProcess ( self , file ) :
return False
2007-07-25 01:06:15 +00:00
if file . endswith ( " .so " ) or os . access ( file , os . X_OK ) :
return self . RunFileCommand ( file ) . startswith ( " ELF " )
return False
2008-03-06 12:15:58 +00:00
def CopyDebug ( self , file , debug_file , guid ) :
# We want to strip out the debug info, and add a
# .gnu_debuglink section to the object, so the debugger can
# actually load our debug info later.
file_dbg = file + " .dbg "
2012-12-09 08:33:34 +00:00
if subprocess . call ( [ self . objcopy , ' --only-keep-debug ' , file , file_dbg ] ) == 0 and \
subprocess . call ( [ self . objcopy , ' --add-gnu-debuglink= %s ' % file_dbg , file ] ) == 0 :
2010-05-18 16:46:12 +00:00
rel_path = os . path . join ( debug_file ,
guid ,
debug_file + " .dbg " )
full_path = os . path . normpath ( os . path . join ( self . symbol_path ,
rel_path ) )
shutil . move ( file_dbg , full_path )
# gzip the shipped debug files
os . system ( " gzip %s " % full_path )
2012-12-27 23:32:45 +00:00
self . output ( sys . stdout , rel_path + " .gz " )
2010-05-18 16:46:12 +00:00
else :
if os . path . isfile ( file_dbg ) :
os . unlink ( file_dbg )
2008-03-06 12:15:58 +00:00
2008-03-20 01:13:36 +00:00
class Dumper_Solaris ( Dumper ) :
def RunFileCommand ( self , file ) :
""" Utility function, returns the output of file(1) """
try :
output = os . popen ( " file " + file ) . read ( )
return output . split ( ' \t ' ) [ 1 ] ;
except :
return " "
def ShouldProcess ( self , file ) :
""" This function will allow processing of files that are
executable , or end with the . so extension , and additionally
file ( 1 ) reports as being ELF files . It expects to find the file
command in PATH . """
2012-05-24 15:58:35 +00:00
if not Dumper . ShouldProcess ( self , file ) :
return False
2008-03-20 01:13:36 +00:00
if file . endswith ( " .so " ) or os . access ( file , os . X_OK ) :
return self . RunFileCommand ( file ) . startswith ( " ELF " )
return False
2012-12-27 23:32:45 +00:00
def StartProcessFilesWorkMac ( dumper , file ) :
""" multiprocessing can ' t handle methods as Process targets, so we define
a simple wrapper function around the work method . """
return dumper . ProcessFilesWorkMac ( file )
def AfterMac ( status , dsymbundle ) :
""" Cleanup function to run on Macs after we process the file(s). """
# CopyDebug will already have been run from Dumper.ProcessFiles
shutil . rmtree ( dsymbundle )
2007-07-25 01:06:15 +00:00
class Dumper_Mac ( Dumper ) :
def ShouldProcess ( self , file ) :
""" This function will allow processing of files that are
executable , or end with the . dylib extension , and additionally
file ( 1 ) reports as being Mach - O files . It expects to find the file
command in PATH . """
2012-05-24 15:58:35 +00:00
if not Dumper . ShouldProcess ( self , file ) :
return False
2007-07-25 01:06:15 +00:00
if file . endswith ( " .dylib " ) or os . access ( file , os . X_OK ) :
return self . RunFileCommand ( file ) . startswith ( " Mach-O " )
return False
2008-12-08 17:04:33 +00:00
def ShouldSkipDir ( self , dir ) :
""" We create .dSYM bundles on the fly, but if someone runs
buildsymbols twice , we should skip any bundles we created
previously , otherwise we ' ll recurse into them and try to
dump the inner bits again . """
if dir . endswith ( " .dSYM " ) :
return True
return False
2012-12-27 23:32:45 +00:00
def ProcessFiles ( self , files , after = None , after_arg = None ) :
# also note, files must be len 1 here, since we're the only ones
# that ever add more than one file to the list
self . output_pid ( sys . stderr , " Submitting job for Mac pre-processing on file: %s " % ( files [ 0 ] ) )
self . SubmitJob ( files [ 0 ] , StartProcessFilesWorkMac , args = ( self , files [ 0 ] ) , callback = self . ProcessFilesMacFinished )
def ProcessFilesMacFinished ( self , result ) :
if result [ ' status ' ] :
# kick off new jobs per-arch with our new list of files
Dumper . ProcessFiles ( self , result [ ' files ' ] , after = AfterMac , after_arg = result [ ' files ' ] [ 0 ] )
# only decrement jobs *after* that, since otherwise we'll remove the record for this file
self . JobFinished ( result [ ' files ' ] [ - 1 ] )
def ProcessFilesWorkMac ( self , file ) :
2008-12-08 17:04:33 +00:00
""" dump_syms on Mac needs to be run on a dSYM bundle produced
by dsymutil ( 1 ) , so run dsymutil here and pass the bundle name
down to the superclass method instead . """
2012-12-27 23:32:45 +00:00
self . output_pid ( sys . stderr , " Worker running Mac pre-processing on file: %s " % ( file , ) )
# our return is a status and a tuple of files to dump symbols for
# the extra files are fallbacks; as soon as one is dumped successfully, we stop
result = { ' status ' : False , ' files ' : None , ' file_key ' : file }
2008-12-08 17:04:33 +00:00
dsymbundle = file + " .dSYM "
if os . path . exists ( dsymbundle ) :
shutil . rmtree ( dsymbundle )
# dsymutil takes --arch=foo instead of -a foo like everything else
2012-06-19 13:24:49 +00:00
subprocess . call ( [ " dsymutil " ] + [ a . replace ( ' -a ' , ' --arch= ' ) for a in self . archs if a ]
2012-06-21 13:02:15 +00:00
+ [ file ] ,
stdout = open ( " /dev/null " , " w " ) )
2010-07-26 01:12:35 +00:00
if not os . path . exists ( dsymbundle ) :
# dsymutil won't produce a .dSYM for files without symbols
2012-12-27 23:32:45 +00:00
result [ ' status ' ] = False
return result
2012-12-17 03:25:53 +00:00
2012-12-27 23:32:45 +00:00
result [ ' status ' ] = True
result [ ' files ' ] = ( dsymbundle , file )
return result
2008-12-08 17:04:33 +00:00
2009-03-11 15:56:57 +00:00
def CopyDebug ( self , file , debug_file , guid ) :
2012-12-27 23:32:45 +00:00
""" ProcessFiles has already produced a dSYM bundle, so we should just
2009-03-11 15:56:57 +00:00
copy that to the destination directory . However , we ' ll package it
into a . tar . bz2 because the debug symbols are pretty huge , and
also because it ' s a bundle, so it ' s a directory . | file | here is the
dSYM bundle , and | debug_file | is the original filename . """
rel_path = os . path . join ( debug_file ,
guid ,
os . path . basename ( file ) + " .tar.bz2 " )
full_path = os . path . abspath ( os . path . join ( self . symbol_path ,
rel_path ) )
2012-06-19 13:24:49 +00:00
success = subprocess . call ( [ " tar " , " cjf " , full_path , os . path . basename ( file ) ] ,
cwd = os . path . dirname ( file ) ,
stdout = open ( " /dev/null " , " w " ) , stderr = subprocess . STDOUT )
2009-03-11 15:56:57 +00:00
if success == 0 and os . path . exists ( full_path ) :
2012-12-27 23:32:45 +00:00
self . output ( sys . stdout , rel_path )
2009-03-11 15:56:57 +00:00
2007-07-25 01:06:15 +00:00
# Entry point if called as a standalone program
def main ( ) :
parser = OptionParser ( usage = " usage: % prog [options] <dump_syms binary> <symbol store path> <debug info files> " )
parser . add_option ( " -c " , " --copy " ,
action = " store_true " , dest = " copy_debug " , default = False ,
help = " Copy debug info files into the same directory structure as symbol files " )
parser . add_option ( " -a " , " --archs " ,
action = " store " , dest = " archs " ,
help = " Run dump_syms -a <arch> for each space separated cpu architecture in ARCHS (only on OS X) " )
parser . add_option ( " -s " , " --srcdir " ,
2009-02-17 07:46:32 +00:00
action = " append " , dest = " srcdir " , default = [ ] ,
2007-07-25 01:06:15 +00:00
help = " Use SRCDIR to determine relative paths to source files " )
parser . add_option ( " -v " , " --vcs-info " ,
action = " store_true " , dest = " vcsinfo " ,
help = " Try to retrieve VCS info for each FILE listed in the output " )
2008-02-27 00:54:47 +00:00
parser . add_option ( " -i " , " --source-index " ,
action = " store_true " , dest = " srcsrv " , default = False ,
help = " Add source index information to debug files, making them suitable for use in a source server. " )
2012-05-24 15:58:35 +00:00
parser . add_option ( " -x " , " --exclude " ,
action = " append " , dest = " exclude " , default = [ ] , metavar = " PATTERN " ,
help = " Skip processing files matching PATTERN. " )
2012-10-29 15:12:30 +00:00
parser . add_option ( " --repo-manifest " ,
action = " store " , dest = " repo_manifest " ,
help = """ Get source information from this XML manifest
produced by the ` repo manifest - r ` command .
""" )
2007-07-25 01:06:15 +00:00
( options , args ) = parser . parse_args ( )
2012-10-29 15:12:30 +00:00
2008-02-27 00:54:47 +00:00
#check to see if the pdbstr.exe exists
if options . srcsrv :
pdbstr = os . environ . get ( " PDBSTR_PATH " )
if not os . path . exists ( pdbstr ) :
print >> sys . stderr , " Invalid path to pdbstr.exe - please set/check PDBSTR_PATH. \n "
sys . exit ( 1 )
2012-10-29 15:12:30 +00:00
2007-07-25 01:06:15 +00:00
if len ( args ) < 3 :
parser . error ( " not enough arguments " )
exit ( 1 )
dumper = GetPlatformSpecificDumper ( dump_syms = args [ 0 ] ,
symbol_path = args [ 1 ] ,
copy_debug = options . copy_debug ,
archs = options . archs ,
2009-02-17 07:46:32 +00:00
srcdirs = options . srcdir ,
2008-02-27 00:54:47 +00:00
vcsinfo = options . vcsinfo ,
2012-05-24 15:58:35 +00:00
srcsrv = options . srcsrv ,
2012-10-29 15:12:30 +00:00
exclude = options . exclude ,
repo_manifest = options . repo_manifest )
2007-07-25 01:06:15 +00:00
for arg in args [ 2 : ] :
dumper . Process ( arg )
2012-12-27 23:32:45 +00:00
dumper . Finish ( )
2007-07-25 01:06:15 +00:00
# run main if run directly
if __name__ == " __main__ " :
2012-12-27 23:32:45 +00:00
# set up the multiprocessing infrastructure before we start;
# note that this needs to be in the __main__ guard, or else Windows will choke
Dumper . GlobalInit ( )
2007-07-25 01:06:15 +00:00
main ( )