Bug 794472 - Allow pymake to run more commands without sending them to a shell. r=ted

2024-10-08 19:04:45 +00:00 · 2012-10-18 14:46:33 +02:00 · 2012-10-18 14:46:33 +02:00 · b843df8e20
commit b843df8e20
parent 5055bfb2f5
3 changed files with 226 additions and 65 deletions
--- a/build/pymake/pymake/data.py
+++ b/build/pymake/pymake/data.py
@ -1404,24 +1404,24 @@ class _NativeWrapper(_CommandWrapper):
                 pycommandpath, **kwargs):
        _CommandWrapper.__init__(self, cline, ignoreErrors, loc, context,
                                 **kwargs)
-        # get the module and method to call
-        parts, badchar = process.clinetoargv(cline, blacklist_gray=False)
-        if parts is None:
-            raise DataError("native command '%s': shell metacharacter '%s' in command line" % (cline, badchar), self.loc)
-        if len(parts) < 2:
-            raise DataError("native command '%s': no method name specified" % cline, self.loc)
        if pycommandpath:
            self.pycommandpath = re.split('[%s\s]+' % os.pathsep,
                                          pycommandpath)
        else:
            self.pycommandpath = None
-        self.module = parts[0]
-        self.method = parts[1]
-        self.cline_list = parts[2:]

    def __call__(self, cb):
+        # get the module and method to call
+        parts, badchar = process.clinetoargv(self.cline, self.kwargs['cwd'])
+        if parts is None:
+            raise DataError("native command '%s': shell metacharacter '%s' in command line" % (cline, badchar), self.loc)
+        if len(parts) < 2:
+            raise DataError("native command '%s': no method name specified" % cline, self.loc)
+        module = parts[0]
+        method = parts[1]
+        cline_list = parts[2:]
        self.usercb = cb
-        process.call_native(self.module, self.method, self.cline_list,
+        process.call_native(module, method, cline_list,
                            loc=self.loc, cb=self._cb, context=self.context,
                            pycommandpath=self.pycommandpath, **self.kwargs)

--- a/build/pymake/pymake/functions.py
+++ b/build/pymake/pymake/functions.py
@ -3,7 +3,7 @@ Makefile functions.
 """

 import parser, util
-import subprocess, os, logging
+import subprocess, os, logging, sys
 from globrelative import glob
 from cStringIO import StringIO

@ -766,16 +766,28 @@ class ShellFunction(Function):
    __slots__ = Function.__slots__

    def resolve(self, makefile, variables, fd, setting):
-        #TODO: call this once up-front somewhere and save the result?
-        shell, msys = util.checkmsyscompat()
+        from process import prepare_command
        cline = self._arguments[0].resolvestr(makefile, variables, setting)
+        executable, cline = prepare_command(cline, makefile.workdir, self.loc)
+
+        # subprocess.Popen doesn't use the PATH set in the env argument for
+        # finding the executable on some platforms (but strangely it does on
+        # others!), so set os.environ['PATH'] explicitly.
+        oldpath = os.environ['PATH']
+        if makefile.env is not None and 'PATH' in makefile.env:
+            os.environ['PATH'] = makefile.env['PATH']
+
+        log.debug("%s: running command '%s'" % (self.loc, ' '.join(cline)))
+        try:
+            p = subprocess.Popen(cline, executable=executable, env=makefile.env, shell=False,
+                                 stdout=subprocess.PIPE, cwd=makefile.workdir)
+        except OSError, e:
+            print >>sys.stderr, "Error executing command %s" % cline[0], e
+            return
+        finally:
+            os.environ['PATH'] = oldpath

-        log.debug("%s: running shell command '%s'" % (self.loc, cline))
-        cline = [shell, "-c", cline]
-        p = subprocess.Popen(cline, env=makefile.env, shell=False,
-                             stdout=subprocess.PIPE, cwd=makefile.workdir)
        stdout, stderr = p.communicate()
-
        stdout = stdout.replace('\r\n', '\n')
        if stdout.endswith('\n'):
            stdout = stdout[:-1]
--- a/build/pymake/pymake/process.py
+++ b/build/pymake/pymake/process.py
@ -15,83 +15,238 @@ if sys.platform=='win32':
 _log = logging.getLogger('pymake.process')

 _escapednewlines = re.compile(r'\\\n')
-# Characters that most likely indicate a shell script and that native commands
-# should reject
-_blacklist = re.compile(r'[$><;\[~`|&]' +
-    r'|\${|(?:^|\s){(?:$|\s)')  # Blacklist ${foo} and { commands }
-# Characters that probably indicate a shell script, but that native commands
-# shouldn't just reject
-_graylist = re.compile(r'[()]')
-# Characters that indicate we need to glob
-_needsglob = re.compile(r'[\*\?]')

-def clinetoargv(cline, blacklist_gray):
+def tokens2re(tokens):
+    # Create a pattern for non-escaped tokens, in the form:
+    #   (?<!\\)(?:a|b|c...)
+    # This is meant to match patterns a, b, or c, or ... if they are not
+    # preceded by a backslash.
+    # where a, b, c... are in the form
+    #   (?P<name>pattern)
+    # which matches the pattern and captures it in a named match group.
+    # The group names and patterns come are given as a dict in the function
+    # argument.
+    nonescaped = r'(?<!\\)(?:%s)' % '|'.join('(?P<%s>%s)' % (name, value) for name, value in tokens.iteritems())
+    # The final pattern matches either the above pattern, or an escaped
+    # backslash, captured in the "escape" match group.
+    return re.compile('(?:%s|%s)' % (nonescaped, r'(?P<escape>\\\\)'))
+
+_unquoted_tokens = tokens2re({
+  'whitespace': r'[\t\r\n ]',
+  'quote': r'[\'"]',
+  'comment': '#',
+  'special': r'[<>&|`~(){}$;]',
+  'backslashed': r'\\[^\\]',
+  'glob': r'[\*\?]',
+})
+
+_doubly_quoted_tokens = tokens2re({
+  'quote': '"',
+  'backslashedquote': r'\\"',
+  'special': '\$',
+  'backslashed': r'\\[^\\"]',
+})
+
+class MetaCharacterException(Exception):
+    def __init__(self, char):
+        self.char = char
+
+class ClineSplitter(list):
+    """
+    Parses a given command line string and creates a list of command
+    and arguments, with wildcard expansion.
+    """
+    def __init__(self, cline, cwd):
+        self.cwd = cwd
+        self.arg = ''
+        self.cline = cline
+        self.glob = False
+        self._parse_unquoted()
+
+    def _push(self, str):
+        """
+        Push the given string as part of the current argument
+        """
+        self.arg += str
+
+    def _next(self):
+        """
+        Finalize current argument, effectively adding it to the list.
+        Perform globbing if needed.
+        """
+        if not self.arg:
+            return
+        if self.glob:
+            if os.path.isabs(self.arg):
+                path = self.arg
+            else:
+                path = os.path.join(self.cwd, self.arg)
+            globbed = glob.glob(path)
+            if not globbed:
+                # If globbing doesn't find anything, the literal string is
+                # used.
+                self.append(self.arg)
+            else:
+                self.extend(f[len(path)-len(self.arg):] for f in globbed)
+            self.glob = False
+        else:
+            self.append(self.arg)
+        self.arg = ''
+
+    def _parse_unquoted(self):
+        """
+        Parse command line remainder in the context of an unquoted string.
+        """
+        while self.cline:
+            # Find the next token
+            m = _unquoted_tokens.search(self.cline)
+            # If we find none, the remainder of the string can be pushed to
+            # the current argument and the argument finalized
+            if not m:
+                self._push(self.cline)
+                break
+            # The beginning of the string, up to the found token, is part of
+            # the current argument
+            self._push(self.cline[:m.start()])
+            self.cline = self.cline[m.end():]
+
+            match = dict([(name, value) for name, value in m.groupdict().items() if value])
+            if 'quote' in match:
+                # " or ' start a quoted string
+                if match['quote'] == '"':
+                    self._parse_doubly_quoted()
+                else:
+                    self._parse_quoted()
+            elif 'comment' in match:
+                # Comments are ignored. The current argument can be finalized,
+                # and parsing stopped.
+                break
+            elif 'special' in match:
+                # Unquoted, non-escaped special characters need to be sent to a
+                # shell.
+                raise MetaCharacterException, match['special']
+            elif 'whitespace' in match:
+                # Whitespaces terminate current argument.
+                self._next()
+            elif 'escape' in match:
+                # Escaped backslashes turn into a single backslash
+                self._push('\\')
+            elif 'backslashed' in match:
+                # Backslashed characters are unbackslashed
+                # e.g. echo \a -> a
+                self._push(match['backslashed'][1])
+            elif 'glob' in match:
+                # ? or * will need globbing
+                self.glob = True
+                self._push(m.group(0))
+            else:
+                raise Exception, "Shouldn't reach here"
+        self._next()
+
+    def _parse_quoted(self):
+        # Single quoted strings are preserved, except for the final quote
+        index = self.cline.find("'")
+        if index == -1:
+            raise Exception, 'Unterminated quoted string in command'
+        self._push(self.cline[:index])
+        self.cline = self.cline[index+1:]
+
+    def _parse_doubly_quoted(self):
+        if not self.cline:
+            raise Exception, 'Unterminated quoted string in command'
+        while self.cline:
+            m = _doubly_quoted_tokens.search(self.cline)
+            if not m:
+                raise Exception, 'Unterminated quoted string in command'
+            self._push(self.cline[:m.start()])
+            self.cline = self.cline[m.end():]
+            match = dict([(name, value) for name, value in m.groupdict().items() if value])
+            if 'quote' in match:
+                # a double quote ends the quoted string, so go back to
+                # unquoted parsing
+                return
+            elif 'special' in match:
+                # Unquoted, non-escaped special characters in a doubly quoted
+                # string still have a special meaning and need to be sent to a
+                # shell.
+                raise MetaCharacterException, match['special']
+            elif 'escape' in match:
+                # Escaped backslashes turn into a single backslash
+                self._push('\\')
+            elif 'backslashedquote' in match:
+                # Backslashed double quotes are un-backslashed
+                self._push('"')
+            elif 'backslashed' in match:
+                # Backslashed characters are kept backslashed
+                self._push(match['backslashed'])
+
+def clinetoargv(cline, cwd):
    """
    If this command line can safely skip the shell, return an argv array.
    @returns argv, badchar
    """
    str = _escapednewlines.sub('', cline)
-    m = _blacklist.search(str)
-    if m is not None:
-        return None, m.group(0)
-    if blacklist_gray:
-        m = _graylist.search(str)
-        if m is not None:
-            return None, m.group(0)
-
-    args = shlex.split(str, comments=True)
+    try:
+        args = ClineSplitter(str, cwd)
+    except MetaCharacterException, e:
+        return None, e.char

    if len(args) and args[0].find('=') != -1:
        return None, '='

    return args, None

-def doglobbing(args, cwd):
-    """
-    Perform any needed globbing on the argument list passed in
-    """
-    globbedargs = []
-    for arg in args:
-        if _needsglob.search(arg):
-            globbedargs.extend(glob.glob(os.path.join(cwd, arg)))
-        else:
-            globbedargs.append(arg)
-
-    return globbedargs
-
+# shellwords contains a set of shell builtin commands that need to be
+# executed within a shell. It also contains a set of commands that are known
+# to be giving problems when run directly instead of through the msys shell.
 shellwords = (':', '.', 'break', 'cd', 'continue', 'exec', 'exit', 'export',
              'getopts', 'hash', 'pwd', 'readonly', 'return', 'shift', 
              'test', 'times', 'trap', 'umask', 'unset', 'alias',
              'set', 'bind', 'builtin', 'caller', 'command', 'declare',
              'echo', 'enable', 'help', 'let', 'local', 'logout', 
              'printf', 'read', 'shopt', 'source', 'type', 'typeset',
-              'ulimit', 'unalias', 'set')
+              'ulimit', 'unalias', 'set', 'find')
+
+def prepare_command(cline, cwd, loc):
+    """
+    Returns a list of command and arguments for the given command line string.
+    If the command needs to be run through a shell for some reason, the
+    returned list contains the shell invocation.
+    """

-def call(cline, env, cwd, loc, cb, context, echo, justprint=False):
    #TODO: call this once up-front somewhere and save the result?
    shell, msys = util.checkmsyscompat()

    shellreason = None
+    executable = None
    if msys and cline.startswith('/'):
        shellreason = "command starts with /"
    else:
-        argv, badchar = clinetoargv(cline, blacklist_gray=True)
+        argv, badchar = clinetoargv(cline, cwd)
        if argv is None:
            shellreason = "command contains shell-special character '%s'" % (badchar,)
        elif len(argv) and argv[0] in shellwords:
            shellreason = "command starts with shell primitive '%s'" % (argv[0],)
-        else:
-            argv = doglobbing(argv, cwd)
+        elif argv and (os.sep in argv[0] or os.altsep and os.altsep in argv[0]):
+            executable = util.normaljoin(cwd, argv[0])
+            # Avoid "%1 is not a valid Win32 application" errors, assuming
+            # that if the executable path is to be resolved with PATH, it will
+            # be a Win32 executable.
+            if sys.platform == 'win32' and os.path.isfile(executable) and open(executable, 'rb').read(2) == "#!":
+                shellreason = "command executable starts with a hashbang"

    if shellreason is not None:
        _log.debug("%s: using shell: %s: '%s'", loc, shellreason, cline)
        if msys:
            if len(cline) > 3 and cline[1] == ':' and cline[2] == '/':
                cline = '/' + cline[0] + cline[2:]
-        cline = [shell, "-c", cline]
-        context.call(cline, shell=False, env=env, cwd=cwd, cb=cb, echo=echo,
-                     justprint=justprint)
-        return
+        argv = [shell, "-c", cline]
+        executable = None
+
+    return executable, argv
+
+def call(cline, env, cwd, loc, cb, context, echo, justprint=False):
+    executable, argv = prepare_command(cline, cwd, loc)

    if not len(argv):
        cb(res=0)
@ -106,17 +261,11 @@ def call(cline, env, cwd, loc, cb, context, echo, justprint=False):
        command.main(argv[2:], env, cwd, cb)
        return

-    if argv[0].find('/') != -1:
-        executable = util.normaljoin(cwd, argv[0])
-    else:
-        executable = None
-
    context.call(argv, executable=executable, shell=False, env=env, cwd=cwd, cb=cb,
                 echo=echo, justprint=justprint)

 def call_native(module, method, argv, env, cwd, loc, cb, context, echo, justprint=False,
                pycommandpath=None):
-    argv = doglobbing(argv, cwd)
    context.call_native(module, method, argv, env=env, cwd=cwd, cb=cb,
                        echo=echo, justprint=justprint, pycommandpath=pycommandpath)