Add test utility 'split-file'

See https://lists.llvm.org/pipermail/llvm-dev/2020-July/143373.html
"[llvm-dev] Multiple documents in one test file" for some discussions.

This patch has explored several alternatives. The current semantics are similar to
what @dblaikie proposed.
`split-file filename output` splits the input file into multiple parts separated by
regex `^(.|//)--- filename` and write each part to the file `output/filename`
(`filename` can include path separators).

Use case A (organizing input of different formats (e.g. linker
script+assembly) in one file).

```
# RUN: split-file %s %t
# RUN: llvm-mc %t/asm -o %t.o
# RUN: ld.lld -T %t/lds %t.o -o %t
This is sometimes better than the %S/Inputs/ approach because the user
can see the auxiliary files immediately and don't have to open another file.

# asm
...
# lds
...
```

Use case B (for utilities which don't have built-in input splitting
feature):

```
// RUN: split-file %s %t
// RUN: llc < %t/1.ll | FileCheck %s --check-prefix=CASE1
// RUN: llc < %t/2.ll | FileCheck %s --check-prefix=CASE2
Combing tests prudently can improve readability.
For example, when testing parsing errors if the recovery mechanism isn't possible,
grouping the tests in one file can more readily see test coverage/strategy.

//--- 1.ll
...
//--- 2.ll
...
```

Since this is a new utility, there is no git history concerns for
UpperCase variable names. I use lowerCase variable names like mlir/lld.

Reviewed By: jhenderson, lattner

Differential Revision: https://reviews.llvm.org/D83834
This commit is contained in:
Fangrui Song 2020-08-03 10:17:55 -07:00 committed by Fangrui Song
parent e56626e438
commit bcea3a7a28
22 changed files with 373 additions and 36 deletions

View File

@ -28,7 +28,7 @@ if (NOT LLD_BUILT_STANDALONE)
FileCheck count llc llvm-ar llvm-as llvm-bcanalyzer llvm-config llvm-cvtres
llvm-dis llvm-dwarfdump llvm-lib llvm-lipo llvm-mc llvm-nm llvm-objcopy
llvm-objdump llvm-pdbutil llvm-readelf llvm-readobj llvm-strip not obj2yaml
opt yaml2obj
opt split-file yaml2obj
)
endif()

View File

@ -1,12 +1,8 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
# RUN: echo "SECTIONS { \
# RUN: .data_noload_a (NOLOAD) : { *(.data_noload_a) } \
# RUN: .data_noload_b (0x10000) (NOLOAD) : { *(.data_noload_b) } \
# RUN: .no_input_sec_noload (NOLOAD) : { . += 1; } \
# RUN: .text (0x20000) : { *(.text) } };" > %t.script
# RUN: ld.lld -o %t --script %t.script %t.o
# RUN: llvm-readelf -S -l %t | FileCheck %s
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/asm -o %t.o
# RUN: ld.lld --script %t/lds %t.o -o %t/out
# RUN: llvm-readelf -S -l %t/out | FileCheck %s
# CHECK: Name Type Address Off Size
# CHECK: .data_noload_a NOBITS 0000000000000000 [[OFF:[0-9a-f]+]] 001000
@ -16,6 +12,7 @@
# CHECK: Type Offset VirtAddr PhysAddr
# CHECK-NEXT: LOAD 0x001000 0x0000000000020000 0x0000000000020000
#--- asm
.section .text,"ax",@progbits
nop
@ -24,3 +21,11 @@
.section .data_noload_b,"aw",@progbits
.zero 4096
#--- lds
SECTIONS {
.data_noload_a (NOLOAD) : { *(.data_noload_a) }
.data_noload_b (0x10000) (NOLOAD) : { *(.data_noload_b) }
.no_input_sec_noload (NOLOAD) : { . += 1; }
.text (0x20000) : { *(.text) }
}

View File

@ -271,8 +271,27 @@ adding your code there instead of creating a new file.
Extra files
-----------
If your test requires extra files besides the file containing the ``RUN:``
lines, the idiomatic place to put them is in a subdirectory ``Inputs``.
If your test requires extra files besides the file containing the ``RUN:`` lines
and the extra files are small, consider specifying them in the same file and
using ``split-file`` to extract them. For example,
.. code-block:: llvm
; RUN: split-file %s %t
; RUN: llvm-link -S %t/a.ll %t/b.ll | FileCheck %s
; CHECK: ...
;--- a.ll
...
;--- b.ll
...
The parts are separated by the regex ``^(.|//)--- <part>``. By default the
extracted content has leading empty lines to preserve line numbers. Specify
``--no-leading-lines`` to drop leading lines.
If the extra files are large, the idiomatic place to put them is in a subdirectory ``Inputs``.
You can then refer to the extra files as ``%S/Inputs/foo.bar``.
For example, consider ``test/Linker/ident.ll``. The directory structure is

View File

@ -119,6 +119,7 @@ set(LLVM_TEST_DEPENDS
opt
sancov
sanstats
split-file
verify-uselistorder
yaml-bench
yaml2obj

View File

@ -141,6 +141,7 @@ tools = [
ToolSubst('%llvm-objcopy', FindTool('llvm-objcopy')),
ToolSubst('%llvm-strip', FindTool('llvm-strip')),
ToolSubst('%llvm-install-name-tool', FindTool('llvm-install-name-tool')),
ToolSubst('%split-file', FindTool('split-file')),
]
# FIXME: Why do we have both `lli` and `%lli` that do slightly different things?

View File

@ -1,10 +1,8 @@
; RUN: echo ".text.tin" > %t_order_lto.txt
; RUN: echo ".text._start" >> %t_order_lto.txt
; RUN: echo ".text.pat" >> %t_order_lto.txt
; RUN: llvm-as %s -o %t.o
; RUN: split-file %s %t
; RUN: llvm-as %t/a.ll -o %t.o
; RUN: %gold -plugin %llvmshlibdir/LLVMgold%shlibext \
; RUN: -m elf_x86_64 -o %t.exe %t.o \
; RUN: --section-ordering-file=%t_order_lto.txt
; RUN: --section-ordering-file=%t/order
; RUN: llvm-readelf -s %t.exe | FileCheck %s
; Check that the order of the sections is tin -> _start -> pat.
@ -13,6 +11,12 @@
; CHECK: 00000000004000b0 1 FUNC LOCAL DEFAULT 1 tin
; CHECK: 00000000004000c0 15 FUNC GLOBAL DEFAULT 1 _start
;--- order
.text.tin
.text._start
.text.pat
;--- a.ll
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,29 +1,32 @@
## Show that llvm-strings can handle the -t/--radix switch properly.
RUN: echo one > %t
RUN: echo two >> %t
RUN: echo three >> %t
RUN: echo four >> %t
RUN: echo five >> %t
RUN: echo six >> %t
RUN: echo seven >> %t
RUN: echo eight >> %t
RUN: echo nine >> %t
RUN: echo ten >> %t
RUN: split-file --no-leading-lines %s %t
#--- a.txt
one
two
three
four
five
six
seven
eight
nine
ten
#--- end
RUN: llvm-strings %t | FileCheck %s -check-prefix CHECK-NONE --implicit-check-not={{.}}
RUN: llvm-strings -t d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace --implicit-check-not={{.}}
RUN: llvm-strings -t o %t | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace --implicit-check-not={{.}}
RUN: llvm-strings -t x %t | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace --implicit-check-not={{.}}
RUN: llvm-strings %t/a.txt | FileCheck %s -check-prefix CHECK-NONE --implicit-check-not={{.}}
RUN: llvm-strings -t d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace --implicit-check-not={{.}}
RUN: llvm-strings -t o %t/a.txt | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace --implicit-check-not={{.}}
RUN: llvm-strings -t x %t/a.txt | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace --implicit-check-not={{.}}
## Show --radix works too.
RUN: llvm-strings --radix d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
RUN: llvm-strings --radix o %t | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace
RUN: llvm-strings --radix x %t | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace
RUN: llvm-strings --radix d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
RUN: llvm-strings --radix o %t/a.txt | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace
RUN: llvm-strings --radix x %t/a.txt | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace
## Show different syntaxes work.
RUN: llvm-strings --radix=d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
RUN: llvm-strings -t=d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
RUN: llvm-strings --radix=d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
RUN: llvm-strings -t=d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
CHECK-NONE: {{^}}three
CHECK-NONE: {{^}}four
@ -54,5 +57,5 @@ CHECK-HEX: {{^}} 22 eight
CHECK-HEX: {{^}} 28 nine
## Show that an invalid value is rejected.
RUN: not llvm-strings --radix z %t 2>&1 | FileCheck %s --check-prefix=INVALID
RUN: not llvm-strings --radix z %t/a.txt 2>&1 | FileCheck %s --check-prefix=INVALID
INVALID: llvm-strings{{.*}}: for the --radix option: Cannot find option named 'z'!

View File

@ -0,0 +1,2 @@
aa

View File

@ -0,0 +1,6 @@
; Comments are preserved.
bb

View File

@ -0,0 +1,8 @@
cc

View File

@ -0,0 +1,40 @@
#--- aa
aa
;--- bb
; Comments are preserved.
bb
//--- subdir/cc
cc
//--- end
# RUN: rm -rf %t
# RUN: split-file %s %t
# RUN: diff %S/Inputs/basic-aa.txt %t/aa
# RUN: diff %S/Inputs/basic-bb.txt %t/bb
# RUN: diff %S/Inputs/basic-cc.txt %t/subdir/cc
# RUN: FileCheck %s --check-prefix=END < %t/end
## Can be called on a non-empty directory.
# RUN: split-file %s %t
# RUN: diff %S/Inputs/basic-aa.txt %t/aa
## Test that we will delete the output if it is a file, so that we can create
## a directory.
# RUN: rm -rf %t && touch %t
# RUN: split-file %s %t
# RUN: diff %S/Inputs/basic-aa.txt %t/aa
# END: RUN: split-file %s %t
# RUN: not %split-file 2>&1 | FileCheck %s --check-prefix=NO_INPUT
# NO_INPUT: split-file: error: input filename is not specified
# RUN: not %split-file %s '' 2>&1 | FileCheck %s --check-prefix=NO_OUTPUT
# NO_OUTPUT: split-file: error: output directory is not specified
# RUN: not %split-file %S/Inputs/basic-aa.txt %t 2>&1 | FileCheck %s --check-prefix=NOT_EXIST
# NOT_EXIST: split-file: error: {{.*}}.txt: no part separator was found

View File

@ -0,0 +1,4 @@
# RUN: split-file --no-leading-lines %s %t
# RUN: count 0 < %t/empty
#--- empty

View File

@ -0,0 +1,16 @@
# RUN: not %split-file %s %t 2>&1 | FileCheck %s
# RUN: not ls %t/dup
# CHECK: {{.*}}.test:[[#@LINE+1]]: error: empty part name
//---
# CHECK: {{.*}}.test:[[#@LINE+1]]: error: part name cannot have leading or trailing space
//--- leading_space
# CHECK: {{.*}}.test:[[#@LINE+1]]: error: part name cannot have leading or trailing space
//--- trailing_space
;--- dup
# CHECK: {{.*}}.test:[[#@LINE+1]]: error: ';--- dup' occurs more than once
;--- dup

View File

@ -0,0 +1,6 @@
RUN: split-file --help 2>&1 | FileCheck --implicit-check-not='General Options:' %s
CHECK: OVERVIEW: Split input {{.*}}
CHECK: USAGE: split-file [options] filename directory
CHECK: Generic Options:
CHECK: split-file Options:
CHECK: --no-leading-lines

View File

@ -0,0 +1,10 @@
## With --no-leading-lines, don't add leading lines (which is used to preserve line numbers).
# RUN: split-file --no-leading-lines %s %t
# RUN: count 1 < %t/a.txt
# RUN: FileCheck %s < %t/a.txt
# CHECK: input
#--- a.txt
input

View File

@ -0,0 +1,8 @@
# UNSUPPORTED: system-windows
# REQUIRES: shell
## Don't delete the output if it is special, otherwise root may accidentally
## remove important special files.
# RUN: not split-file %s /dev/null 2>&1 | FileCheck %s
# CHECK: error: /dev/null: output cannot be a special file

View File

@ -0,0 +1,19 @@
# Almost identical to the top-level .clang-tidy, except that {Member,Parameter,Variable}Case use camelBack.
Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,readability-identifier-naming'
CheckOptions:
- key: readability-identifier-naming.ClassCase
value: CamelCase
- key: readability-identifier-naming.EnumCase
value: CamelCase
- key: readability-identifier-naming.FunctionCase
value: camelBack
- key: readability-identifier-naming.MemberCase
value: camelBack
- key: readability-identifier-naming.ParameterCase
value: camelBack
- key: readability-identifier-naming.UnionCase
value: CamelCase
- key: readability-identifier-naming.VariableCase
value: camelBack
- key: readability-identifier-naming.IgnoreMainLikeFunctions
value: 1

View File

@ -0,0 +1,7 @@
set(LLVM_LINK_COMPONENTS
Support
)
add_llvm_tool(split-file
split-file.cpp
)

View File

@ -0,0 +1,172 @@
//===- split-file.cpp - Input splitting utility ---------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Split input into multipe parts separated by regex '^(.|//)--- ' and extract
// the specified part.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
#include <string>
#include <system_error>
using namespace llvm;
static cl::OptionCategory cat("split-file Options");
static cl::opt<std::string> input(cl::Positional, cl::desc("filename"),
cl::cat(cat));
static cl::opt<std::string> output(cl::Positional, cl::desc("directory"),
cl::value_desc("directory"), cl::cat(cat));
static cl::opt<bool> noLeadingLines("no-leading-lines",
cl::desc("Don't preserve line numbers"),
cl::cat(cat));
static StringRef toolName;
static int errorCount;
LLVM_ATTRIBUTE_NORETURN static void fatal(StringRef filename,
const Twine &message) {
if (filename.empty())
WithColor::error(errs(), toolName) << message << '\n';
else
WithColor::error(errs(), toolName) << filename << ": " << message << '\n';
exit(1);
}
static void error(StringRef filename, int64_t line, const Twine &message) {
++errorCount;
errs() << filename << ':' << line << ": ";
WithColor::error(errs()) << message << '\n';
}
namespace {
struct Part {
const char *begin = nullptr;
const char *end = nullptr;
int64_t leadingLines = 0;
};
} // namespace
static int handle(MemoryBuffer &inputBuf, StringRef input) {
DenseMap<StringRef, Part> partToBegin;
StringRef lastPart, separator;
for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) {
const int64_t lineNo = i.line_number();
const StringRef line = *i++;
const size_t markerLen = line.startswith("//") ? 6 : 5;
if (!(line.size() >= markerLen &&
line.substr(markerLen - 4).startswith("--- ")))
continue;
separator = line.substr(0, markerLen);
const StringRef partName = line.substr(markerLen);
if (partName.empty()) {
error(input, lineNo, "empty part name");
continue;
}
if (isSpace(partName.front()) || isSpace(partName.back())) {
error(input, lineNo, "part name cannot have leading or trailing space");
continue;
}
auto res = partToBegin.try_emplace(partName);
if (!res.second) {
error(input, lineNo,
"'" + separator + partName + "' occurs more than once");
continue;
}
if (!lastPart.empty())
partToBegin[lastPart].end = line.data();
Part &cur = res.first->second;
if (!i.is_at_eof())
cur.begin = i->data();
// If --no-leading-lines is not specified, numEmptyLines is 0. Append
// newlines so that the extracted part preserves line numbers.
cur.leadingLines = noLeadingLines ? 0 : i.line_number() - 1;
lastPart = partName;
}
if (lastPart.empty())
fatal(input, "no part separator was found");
if (errorCount)
return 1;
partToBegin[lastPart].end = inputBuf.getBufferEnd();
std::vector<std::unique_ptr<ToolOutputFile>> outputFiles;
SmallString<256> partPath;
for (auto &keyValue : partToBegin) {
partPath.clear();
sys::path::append(partPath, output, keyValue.first);
std::error_code ec =
sys::fs::create_directories(sys::path::parent_path(partPath));
if (ec)
fatal(input, ec.message());
auto f = std::make_unique<ToolOutputFile>(partPath.str(), ec,
llvm::sys::fs::OF_None);
if (!f)
fatal(input, ec.message());
Part &part = keyValue.second;
for (int64_t i = 0; i != part.leadingLines; ++i)
(*f).os().write('\n');
if (part.begin)
(*f).os().write(part.begin, part.end - part.begin);
outputFiles.push_back(std::move(f));
}
for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles)
outputFile->keep();
return 0;
}
int main(int argc, const char **argv) {
toolName = sys::path::stem(argv[0]);
cl::HideUnrelatedOptions({&cat});
cl::ParseCommandLineOptions(
argc, argv,
"Split input into multiple parts separated by regex '^(.|//)--- ' and "
"extract the part specified by '^(.|//)--- <part>'\n",
nullptr,
/*EnvVar=*/nullptr,
/*LongOptionsUseDoubleDash=*/true);
if (input.empty())
fatal("", "input filename is not specified");
if (output.empty())
fatal("", "output directory is not specified");
ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr =
MemoryBuffer::getFileOrSTDIN(input);
if (std::error_code ec = bufferOrErr.getError())
fatal(input, ec.message());
// Delete output if it is a file or an empty directory, so that we can create
// a directory.
sys::fs::file_status status;
if (std::error_code ec = sys::fs::status(output, status))
if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory))
fatal(output, ec.message());
if (status.type() != sys::fs::file_type::file_not_found &&
status.type() != sys::fs::file_type::directory_file &&
status.type() != sys::fs::file_type::regular_file)
fatal(output, "output cannot be a special file");
if (std::error_code ec = sys::fs::remove(output, /*IgnoreNonExisting=*/true))
if (ec.value() != static_cast<int>(std::errc::directory_not_empty))
fatal(output, ec.message());
return handle(**bufferOrErr, input);
}

View File

@ -94,6 +94,7 @@ group("test") {
"//llvm/tools/llvm-readobj:symlinks",
"//llvm/tools/obj2yaml",
"//llvm/tools/opt",
"//llvm/tools/split-file",
"//llvm/tools/yaml2obj",
"//llvm/utils/FileCheck",
"//llvm/utils/count",

View File

@ -260,6 +260,7 @@ group("test") {
"//llvm/tools/opt",
"//llvm/tools/sancov",
"//llvm/tools/sanstats",
"//llvm/tools/split-file",
"//llvm/tools/verify-uselistorder",
"//llvm/tools/yaml2obj",
"//llvm/unittests",

View File

@ -0,0 +1,4 @@
executable("split-file") {
deps = [ "//llvm/lib/Support" ]
sources = [ "split-file.cpp" ]
}