From 6cfd76633ff61da359dd58bcf83ed178170a9771 Mon Sep 17 00:00:00 2001 From: Andrew Litteken Date: Mon, 7 Jun 2021 10:57:39 -0500 Subject: [PATCH] [IRSim] Adding basic implementation of llvm-sim. This is a similarity visualization tool that accepts a Module and passes it to the IRSimilarityIdentifier. The resulting SimilarityGroups are output in a JSON file. Tests are found in test/tools/llvm-sim and check for the file not found, a bad module, and that the JSON is created correctly. Reviewers: paquette, jroelofs, MaskRay Recommit of: 15645d044bcfe2a0f63156048b302f997a717688 to fix linking errors and GN build system. Differential Revision: https://reviews.llvm.org/D86974 --- test/CMakeLists.txt | 1 + test/lit.cfg.py | 2 +- test/tools/llvm-sim/Inputs/sim1.ll | 27 ++++ test/tools/llvm-sim/fail-cases.test | 8 + test/tools/llvm-sim/single-sim-file.test | 57 +++++++ test/tools/llvm-sim/single-sim.test | 56 +++++++ tools/llvm-sim/CMakeLists.txt | 9 ++ tools/llvm-sim/llvm-sim.cpp | 149 ++++++++++++++++++ utils/gn/secondary/llvm/test/BUILD.gn | 1 + .../gn/secondary/llvm/tools/llvm-sim/BUILD.gn | 10 ++ 10 files changed, 319 insertions(+), 1 deletion(-) create mode 100644 test/tools/llvm-sim/Inputs/sim1.ll create mode 100644 test/tools/llvm-sim/fail-cases.test create mode 100644 test/tools/llvm-sim/single-sim-file.test create mode 100644 test/tools/llvm-sim/single-sim.test create mode 100644 tools/llvm-sim/CMakeLists.txt create mode 100644 tools/llvm-sim/llvm-sim.cpp create mode 100644 utils/gn/secondary/llvm/tools/llvm-sim/BUILD.gn diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ddc6e645c8b..84be4dd3b32 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -111,6 +111,7 @@ set(LLVM_TEST_DEPENDS llvm-readelf llvm-reduce llvm-rtdyld + llvm-sim llvm-size llvm-split llvm-strings diff --git a/test/lit.cfg.py b/test/lit.cfg.py index 765e00267b0..b5b8ee9f4d2 100644 --- a/test/lit.cfg.py +++ b/test/lit.cfg.py @@ -165,7 +165,7 @@ tools.extend([ 'llvm-link', 'llvm-lto', 'llvm-lto2', 'llvm-mc', 'llvm-mca', 'llvm-modextract', 'llvm-nm', 'llvm-objcopy', 'llvm-objdump', 'llvm-otool', 'llvm-pdbutil', 'llvm-profdata', 'llvm-profgen', 'llvm-ranlib', 'llvm-rc', 'llvm-readelf', - 'llvm-readobj', 'llvm-rtdyld', 'llvm-size', 'llvm-split', 'llvm-strings', + 'llvm-readobj', 'llvm-rtdyld', 'llvm-sim', 'llvm-size', 'llvm-split', 'llvm-strings', 'llvm-strip', 'llvm-tblgen', 'llvm-tapi-diff', 'llvm-undname', 'llvm-windres', 'llvm-c-test', 'llvm-cxxfilt', 'llvm-xray', 'yaml2obj', 'obj2yaml', 'yaml-bench', 'verify-uselistorder', diff --git a/test/tools/llvm-sim/Inputs/sim1.ll b/test/tools/llvm-sim/Inputs/sim1.ll new file mode 100644 index 00000000000..facc27d285b --- /dev/null +++ b/test/tools/llvm-sim/Inputs/sim1.ll @@ -0,0 +1,27 @@ +define void @similar_func1() { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @similar_func2() { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} diff --git a/test/tools/llvm-sim/fail-cases.test b/test/tools/llvm-sim/fail-cases.test new file mode 100644 index 00000000000..8de658abe3e --- /dev/null +++ b/test/tools/llvm-sim/fail-cases.test @@ -0,0 +1,8 @@ +# RUN: not llvm-sim %s 2>&1 | FileCheck %s +# RUN: not llvm-sim %s.2 2>&1 | FileCheck -DMSG=%errc_ENOENT %s --check-prefix=EXIST + +# File reading error messaging tests. + +# CHECK: error: expected top-level entity + +# EXIST: error: Could not open input file: [[MSG]] diff --git a/test/tools/llvm-sim/single-sim-file.test b/test/tools/llvm-sim/single-sim-file.test new file mode 100644 index 00000000000..5e45edf12c2 --- /dev/null +++ b/test/tools/llvm-sim/single-sim-file.test @@ -0,0 +1,57 @@ +# RUN: llvm-sim -o %t %S/Inputs/sim1.ll +# RUN: FileCheck %s < %t + +# Checking the output of a single module test. + +# CHECK: { +# CHECK-NEXT: "1": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 8, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 18, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "2": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 7, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 17, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "3": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 6, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 16, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "4": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 5, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 15, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "5": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 4, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 14, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: } diff --git a/test/tools/llvm-sim/single-sim.test b/test/tools/llvm-sim/single-sim.test new file mode 100644 index 00000000000..4e04682e294 --- /dev/null +++ b/test/tools/llvm-sim/single-sim.test @@ -0,0 +1,56 @@ +# RUN: llvm-sim -o - %S/Inputs/sim1.ll | FileCheck %s + +# Checking the output of a single module test. + +# CHECK: { +# CHECK-NEXT: "1": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 8, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 18, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "2": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 7, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 17, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "3": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 6, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 16, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "4": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 5, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 15, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "5": [ +# CHECK-NEXT: { +# CHECK-NEXT: "start": 4, +# CHECK-NEXT: "end": 9 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "start": 14, +# CHECK-NEXT: "end": 19 +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: } diff --git a/tools/llvm-sim/CMakeLists.txt b/tools/llvm-sim/CMakeLists.txt new file mode 100644 index 00000000000..76299050392 --- /dev/null +++ b/tools/llvm-sim/CMakeLists.txt @@ -0,0 +1,9 @@ +set(LLVM_LINK_COMPONENTS + Core + Support + Analysis + IRReader) + +add_llvm_tool(llvm-sim + llvm-sim.cpp +) diff --git a/tools/llvm-sim/llvm-sim.cpp b/tools/llvm-sim/llvm-sim.cpp new file mode 100644 index 00000000000..26e370ff30f --- /dev/null +++ b/tools/llvm-sim/llvm-sim.cpp @@ -0,0 +1,149 @@ +//===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This program finds similar sections of a Module, and exports them as a JSON +// file. +// +// To find similarities contained across multiple modules, please use llvm-link +// first to merge the modules. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/ToolOutputFile.h" + +using namespace llvm; +using namespace IRSimilarity; + +static cl::opt OutputFilename("o", cl::desc("Output Filename"), + cl::init("-"), + cl::value_desc("filename")); + +static cl::opt InputSourceFile(cl::Positional, + cl::desc(""), + cl::init("-"), + cl::value_desc("filename")); + +/// Retrieve the unique number \p I was mapped to in parseBitcodeFile. +/// +/// \param I - The Instruction to find the instruction number for. +/// \param LLVMInstNum - The mapping of Instructions to their location in the +/// module represented by an unsigned integer. +/// \returns The instruction number for \p I if it exists. +Optional +getPositionInModule(const Instruction *I, + const DenseMap &LLVMInstNum) { + assert(I && "Instruction is nullptr!"); + DenseMap::const_iterator It = LLVMInstNum.find(I); + if (It == LLVMInstNum.end()) + return None; + return It->second; +} + +/// Exports the given SimilarityGroups to a JSON file at \p FilePath. +/// +/// \param FilePath - The path to the output location. +/// \param SimSections - The similarity groups to process. +/// \param LLVMInstNum - The mapping of Instructions to their location in the +/// module represented by an unsigned integer. +/// \returns A nonzero error code if there was a failure creating the file. +std::error_code +exportToFile(const StringRef FilePath, + const SimilarityGroupList &SimSections, + const DenseMap &LLVMInstNum) { + std::error_code EC; + std::unique_ptr Out( + new ToolOutputFile(FilePath, EC, sys::fs::OF_None)); + if (EC) + return EC; + + json::OStream J(Out->os(), 1); + J.objectBegin(); + + unsigned SimOption = 1; + // Process each list of SimilarityGroups organized by the Module. + for (const SimilarityGroup &G : SimSections) { + std::string SimOptionStr = std::to_string(SimOption); + J.attributeBegin(SimOptionStr); + J.arrayBegin(); + // For each file there is a list of the range where the similarity + // exists. + for (const IRSimilarityCandidate &C : G) { + Optional Start = + getPositionInModule((*C.front()).Inst, LLVMInstNum); + Optional End = + getPositionInModule((*C.back()).Inst, LLVMInstNum); + + assert(Start.hasValue() && + "Could not find instruction number for first instruction"); + assert(End.hasValue() && + "Could not find instruction number for last instruction"); + + J.object([&] { + J.attribute("start", Start.getValue()); + J.attribute("end", End.getValue()); + }); + } + J.arrayEnd(); + J.attributeEnd(); + SimOption++; + } + J.objectEnd(); + + Out->keep(); + + return EC; +} + +int main(int argc, const char *argv[]) { + InitLLVM X(argc, argv); + + cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n"); + + LLVMContext CurrContext; + SMDiagnostic Err; + std::unique_ptr ModuleToAnalyze = + parseIRFile(InputSourceFile, Err, CurrContext); + + if (!ModuleToAnalyze) { + Err.print(argv[0], errs()); + return 1; + } + + // Mapping from an Instruction pointer to its occurrence in a sequential + // list of all the Instructions in a Module. + DenseMap LLVMInstNum; + + // We give each instruction a number, which gives us a start and end value + // for the beginning and end of each IRSimilarityCandidate. + unsigned InstructionNumber = 1; + for (Function &F : *ModuleToAnalyze) + for (BasicBlock &BB : F) + for (Instruction &I : BB.instructionsWithoutDebug()) + LLVMInstNum[&I]= InstructionNumber++; + + // The similarity identifier we will use to find the similar sections. + IRSimilarityIdentifier SimIdent; + SimilarityGroupList SimilaritySections = + SimIdent.findSimilarity(*ModuleToAnalyze); + + std::error_code E = + exportToFile(OutputFilename, SimilaritySections, LLVMInstNum); + if (E) { + errs() << argv[0] << ": " << E.message() << '\n'; + return 2; + } + + return 0; +} diff --git a/utils/gn/secondary/llvm/test/BUILD.gn b/utils/gn/secondary/llvm/test/BUILD.gn index 6c35f398944..e87dc566270 100644 --- a/utils/gn/secondary/llvm/test/BUILD.gn +++ b/utils/gn/secondary/llvm/test/BUILD.gn @@ -276,6 +276,7 @@ group("test") { "//llvm/tools/llvm-readobj:symlinks", "//llvm/tools/llvm-reduce", "//llvm/tools/llvm-rtdyld", + "//llvm/tools/llvm-sim", "//llvm/tools/llvm-size", "//llvm/tools/llvm-split", "//llvm/tools/llvm-strings", diff --git a/utils/gn/secondary/llvm/tools/llvm-sim/BUILD.gn b/utils/gn/secondary/llvm/tools/llvm-sim/BUILD.gn new file mode 100644 index 00000000000..3451049f2ec --- /dev/null +++ b/utils/gn/secondary/llvm/tools/llvm-sim/BUILD.gn @@ -0,0 +1,10 @@ +executable("llvm-sim") { + deps = [ + "//llvm/lib/Analysis", + "//llvm/lib/IRReader", + "//llvm/lib/Support", + ] + sources = [ + "llvm-sim.cpp", + ] +}