Teach lit to expand glob expressions.

This will enable removing hacks throughout the codebase
in clang and compiler-rt that feed multiple inputs to a
testing utility by globbing, all of which are either disabled
on Windows currently or using xargs / find hacks.

Differential Revision: https://reviews.llvm.org/D30380

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296904 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Zachary Turner 2017-03-03 18:55:24 +00:00
parent ab26ff431c
commit 985631dcc8
5 changed files with 86 additions and 9 deletions

View File

View File

@ -0,0 +1,28 @@
RUN: echo TA > %T/TA.txt
RUN: echo TB > %T/TB.txt
RUN: echo TAB > %T/TAB.txt
RUN: echo %T/TA* | FileCheck -check-prefix=STAR %s
RUN: echo %T/'TA'* | FileCheck -check-prefix=STAR %s
RUN: echo %T/T'A'* | FileCheck -check-prefix=STAR %s
RUN: echo %T/T?.txt | FileCheck -check-prefix=QUESTION %s
RUN: echo %T/'T'?.txt | FileCheck -check-prefix=QUESTION %s
RUN: echo %T/T??.txt | FileCheck -check-prefix=QUESTION2 %s
RUN: echo %T/'T'??.txt | FileCheck -check-prefix=QUESTION2 %s
RUN: echo 'T*' 'T?.txt' 'T??.txt' | FileCheck -check-prefix=QUOTEDARGS %s
STAR-NOT: TB.txt
STAR: {{(TA.txt.*TAB.txt|TAB.txt.*TA.txt)}}
QUESTION-NOT: TAB.txt
QUESTION: {{(TA.txt.*TB.txt|TB.txt.*TA.txt)}}
QUESTION2-NOT: TA.txt
QUESTION2-NOT: TB.txt
QUESTION2: TAB.txt
QUOTEDARGS-NOT: .txt
QUOTEDARGS: T* T?.txt T??.txt

View File

@ -35,6 +35,24 @@ class Command:
else:
file.write("%s%s '%s'" % (r[0][1], r[0][0], r[1]))
class GlobItem:
def __init__(self, pattern):
self.pattern = pattern
def __repr__(self):
return self.pattern
def __eq__(self, other):
if not isinstance(other, Command):
return False
return (self.pattern == other.pattern)
def resolve(self):
import glob
results = glob.glob(self.pattern)
return [self.pattern] if len(results) == 0 else results
class Pipeline:
def __init__(self, commands, negate=False, pipe_err=False):
self.commands = commands

View File

@ -2,7 +2,7 @@ from __future__ import absolute_import
import itertools
import lit.util
from lit.ShCommands import Command, Pipeline, Seq
from lit.ShCommands import Command, GlobItem, Pipeline, Seq
class ShLexer:
def __init__(self, data, win32Escapes = False):
@ -40,13 +40,15 @@ class ShLexer:
return None
self.pos = self.pos - 1 + len(chunk)
return chunk
return GlobItem(chunk) if '*' in chunk or '?' in chunk else chunk
def lex_arg_slow(self, c):
if c in "'\"":
str = self.lex_arg_quoted(c)
else:
str = c
unquoted_glob_char = False
quoted_glob_char = False
while self.pos != self.end:
c = self.look()
if c.isspace() or c in "|&;":
@ -65,12 +67,12 @@ class ShLexer:
tok = self.lex_one_token()
assert isinstance(tok, tuple) and len(tok) == 1
return (tok[0], num)
elif c == '"':
elif c == '"' or c == "'":
self.eat()
str += self.lex_arg_quoted('"')
elif c == "'":
self.eat()
str += self.lex_arg_quoted("'")
quoted_arg = self.lex_arg_quoted(c)
if '*' in quoted_arg or '?' in quoted_arg:
quoted_glob_char = True
str += quoted_arg
elif not self.win32Escapes and c == '\\':
# Outside of a string, '\\' escapes everything.
self.eat()
@ -79,9 +81,25 @@ class ShLexer:
"escape at end of quoted argument in: %r" % self.data)
return str
str += self.eat()
elif c in '*?':
unquoted_glob_char = True
str += self.eat()
else:
str += self.eat()
return str
# If a quote character is present, lex_arg_quoted will remove the quotes
# and append the argument directly. This causes a problem when the
# quoted portion contains a glob character, as the character will no
# longer be treated literally. If glob characters occur *only* inside
# of quotes, then we can handle this by not globbing at all, and if
# glob characters occur *only* outside of quotes, we can still glob just
# fine. But if a glob character occurs both inside and outside of
# quotes this presents a problem. In practice this is such an obscure
# edge case that it doesn't seem worth the added complexity to support.
# By adding an assertion, it means some bot somewhere will catch this
# and flag the user of a non-portable test (which could almost certainly
# be re-written to work correctly without triggering this).
assert not (quoted_glob_char and unquoted_glob_char)
return GlobItem(str) if unquoted_glob_char else str
def lex_arg_quoted(self, delim):
str = ''
@ -202,7 +220,7 @@ class ShParser:
break
# If this is an argument, just add it to the current command.
if isinstance(tok, str):
if isinstance(tok, (str, GlobItem)):
args.append(self.lex())
continue

View File

@ -5,6 +5,7 @@ import platform
import tempfile
import threading
from lit.ShCommands import GlobItem
import lit.ShUtil as ShUtil
import lit.Test as Test
import lit.util
@ -141,6 +142,15 @@ def executeShCmd(cmd, shenv, results, timeout=0):
return (finalExitCode, timeoutInfo)
def expand_glob_expressions(cmd, args):
result = [args[0]]
for arg in args[1:]:
if isinstance(arg, GlobItem):
result.extend(arg.resolve())
else:
result.append(arg)
return result
def quote_windows_command(seq):
"""
Reimplement Python's private subprocess.list2cmdline for MSys compatibility
@ -372,6 +382,9 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
named_temp_files.append(f.name)
args[i] = f.name
# Expand all glob expressions
args = expand_glob_expressions(j, args)
# On Windows, do our own command line quoting for better compatibility
# with some core utility distributions.
if kIsWindows: