[AMDGPU] Fix some Clang-tidy modernize and Include What You Use warnings; other minor fixes (NFC).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292623 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eugene Zelenko 2017-01-20 17:52:16 +00:00
parent 0e0a42437b
commit 68c521d030
11 changed files with 299 additions and 199 deletions

View File

@ -15,92 +15,75 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
#include "AMDGPUMCInstLower.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include <cstddef>
#include <cstdint>
#include <limits>
#include <memory>
#include <string>
#include <vector>
namespace llvm {
class MCOperand;
class AMDGPUAsmPrinter final : public AsmPrinter {
private:
struct SIProgramInfo {
SIProgramInfo() :
VGPRBlocks(0),
SGPRBlocks(0),
Priority(0),
FloatMode(0),
Priv(0),
DX10Clamp(0),
DebugMode(0),
IEEEMode(0),
ScratchSize(0),
ComputePGMRSrc1(0),
LDSBlocks(0),
ScratchBlocks(0),
ComputePGMRSrc2(0),
NumVGPR(0),
NumSGPR(0),
FlatUsed(false),
NumSGPRsForWavesPerEU(0),
NumVGPRsForWavesPerEU(0),
ReservedVGPRFirst(0),
ReservedVGPRCount(0),
DebuggerWavefrontPrivateSegmentOffsetSGPR((uint16_t)-1),
DebuggerPrivateSegmentBufferSGPR((uint16_t)-1),
VCCUsed(false),
CodeLen(0) {}
// Fields set in PGM_RSRC1 pm4 packet.
uint32_t VGPRBlocks;
uint32_t SGPRBlocks;
uint32_t Priority;
uint32_t FloatMode;
uint32_t Priv;
uint32_t DX10Clamp;
uint32_t DebugMode;
uint32_t IEEEMode;
uint32_t ScratchSize;
uint32_t VGPRBlocks = 0;
uint32_t SGPRBlocks = 0;
uint32_t Priority = 0;
uint32_t FloatMode = 0;
uint32_t Priv = 0;
uint32_t DX10Clamp = 0;
uint32_t DebugMode = 0;
uint32_t IEEEMode = 0;
uint32_t ScratchSize = 0;
uint64_t ComputePGMRSrc1;
uint64_t ComputePGMRSrc1 = 0;
// Fields set in PGM_RSRC2 pm4 packet.
uint32_t LDSBlocks;
uint32_t ScratchBlocks;
uint32_t LDSBlocks = 0;
uint32_t ScratchBlocks = 0;
uint64_t ComputePGMRSrc2;
uint64_t ComputePGMRSrc2 = 0;
uint32_t NumVGPR;
uint32_t NumSGPR;
uint32_t NumVGPR = 0;
uint32_t NumSGPR = 0;
uint32_t LDSSize;
bool FlatUsed;
bool FlatUsed = false;
// Number of SGPRs that meets number of waves per execution unit request.
uint32_t NumSGPRsForWavesPerEU;
uint32_t NumSGPRsForWavesPerEU = 0;
// Number of VGPRs that meets number of waves per execution unit request.
uint32_t NumVGPRsForWavesPerEU;
uint32_t NumVGPRsForWavesPerEU = 0;
// If ReservedVGPRCount is 0 then must be 0. Otherwise, this is the first
// fixed VGPR number reserved.
uint16_t ReservedVGPRFirst;
uint16_t ReservedVGPRFirst = 0;
// The number of consecutive VGPRs reserved.
uint16_t ReservedVGPRCount;
uint16_t ReservedVGPRCount = 0;
// Fixed SGPR number used to hold wave scratch offset for entire kernel
// execution, or uint16_t(-1) if the register is not used or not known.
uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR;
uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR =
std::numeric_limits<uint16_t>::max();
// Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
// kernel execution, or uint16_t(-1) if the register is not used or not
// known.
uint16_t DebuggerPrivateSegmentBufferSGPR;
uint16_t DebuggerPrivateSegmentBufferSGPR =
std::numeric_limits<uint16_t>::max();
// Bonus information for debugging.
bool VCCUsed;
uint64_t CodeLen;
bool VCCUsed = false;
uint64_t CodeLen = 0;
SIProgramInfo() = default;
};
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const;
@ -155,6 +138,6 @@ protected:
size_t DisasmLineMaxLen;
};
} // End anonymous llvm
} // end namespace llvm
#endif
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H

View File

@ -17,13 +17,29 @@
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include <cassert>
#include <iterator>
#define DEBUG_TYPE "amdgpu-codegenprepare"
@ -34,10 +50,10 @@ namespace {
class AMDGPUCodeGenPrepare : public FunctionPass,
public InstVisitor<AMDGPUCodeGenPrepare, bool> {
const GCNTargetMachine *TM;
const SISubtarget *ST;
DivergenceAnalysis *DA;
Module *Mod;
bool HasUnsafeFPMath;
const SISubtarget *ST = nullptr;
DivergenceAnalysis *DA = nullptr;
Module *Mod = nullptr;
bool HasUnsafeFPMath = false;
/// \brief Copies exact/nsw/nuw flags (if any) from binary operation \p I to
/// binary operation \p V.
@ -113,13 +129,9 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
public:
static char ID;
AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
FunctionPass(ID),
TM(static_cast<const GCNTargetMachine *>(TM)),
ST(nullptr),
DA(nullptr),
Mod(nullptr),
HasUnsafeFPMath(false) { }
FunctionPass(ID), TM(static_cast<const GCNTargetMachine *>(TM)) {}
bool visitFDiv(BinaryOperator &I);
@ -142,7 +154,7 @@ public:
}
};
} // End anonymous namespace
} // end anonymous namespace
Value *AMDGPUCodeGenPrepare::copyFlags(
const BinaryOperator &I, Value *V) const {

View File

@ -14,12 +14,49 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <map>
#include <tuple>
#include <utility>
#include <vector>
#define DEBUG_TYPE "amdgpu-promote-alloca"
@ -31,16 +68,16 @@ namespace {
class AMDGPUPromoteAlloca : public FunctionPass {
private:
const TargetMachine *TM;
Module *Mod;
const DataLayout *DL;
MDNode *MaxWorkGroupSizeRange;
Module *Mod = nullptr;
const DataLayout *DL = nullptr;
MDNode *MaxWorkGroupSizeRange = nullptr;
// FIXME: This should be per-kernel.
uint32_t LocalMemLimit;
uint32_t CurrentLocalMemUsage;
uint32_t LocalMemLimit = 0;
uint32_t CurrentLocalMemUsage = 0;
bool IsAMDGCN;
bool IsAMDHSA;
bool IsAMDGCN = false;
bool IsAMDHSA = false;
std::pair<Value *, Value *> getLocalSizeYZ(IRBuilder<> &Builder);
Value *getWorkitemID(IRBuilder<> &Builder, unsigned N);
@ -63,15 +100,7 @@ public:
static char ID;
AMDGPUPromoteAlloca(const TargetMachine *TM_ = nullptr) :
FunctionPass(ID),
TM(TM_),
Mod(nullptr),
DL(nullptr),
MaxWorkGroupSizeRange(nullptr),
LocalMemLimit(0),
CurrentLocalMemUsage(0),
IsAMDGCN(false),
IsAMDHSA(false) { }
FunctionPass(ID), TM(TM_) {}
bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
@ -86,7 +115,7 @@ public:
}
};
} // End anonymous namespace
} // end anonymous namespace
char AMDGPUPromoteAlloca::ID = 0;
@ -95,7 +124,6 @@ INITIALIZE_TM_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE,
char &llvm::AMDGPUPromoteAllocaID = AMDGPUPromoteAlloca::ID;
bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
if (!TM)
return false;
@ -298,7 +326,7 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 2);
LoadInst *LoadZU = Builder.CreateAlignedLoad(GEPZU, 4);
MDNode *MD = llvm::MDNode::get(Mod->getContext(), None);
MDNode *MD = MDNode::get(Mod->getContext(), None);
LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD);
LoadZU->setMetadata(LLVMContext::MD_invariant_load, MD);
LoadZU->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);

View File

@ -37,7 +37,6 @@
#include <string>
namespace AMDGPU {
namespace RuntimeMD {
// Version and revision of runtime metadata
@ -46,6 +45,7 @@ namespace RuntimeMD {
// Name of keys for runtime metadata.
namespace KeyName {
const char MDVersion[] = "amd.MDVersion"; // Runtime metadata version
const char Language[] = "amd.Language"; // Language
const char LanguageVersion[] = "amd.LanguageVersion"; // Language version
@ -72,9 +72,11 @@ namespace RuntimeMD {
const char PrintfInfo[] = "amd.PrintfInfo"; // Prinf function call information
const char ArgActualAcc[] = "amd.ArgActualAcc"; // The actual kernel argument access qualifier
const char ArgPointeeAlign[] = "amd.ArgPointeeAlign"; // Alignment of pointee type
}
} // end namespace KeyName
namespace KernelArg {
enum Kind : uint8_t {
ByValue = 0,
GlobalBuffer = 1,
@ -123,7 +125,8 @@ namespace RuntimeMD {
Generic = 4,
Region = 5,
};
} // namespace KernelArg
} // end namespace KernelArg
// Invalid values are used to indicate an optional key should not be emitted.
const uint8_t INVALID_ADDR_QUAL = 0xff;
@ -131,28 +134,30 @@ namespace RuntimeMD {
const uint32_t INVALID_KERNEL_INDEX = ~0U;
namespace KernelArg {
// In-memory representation of kernel argument information.
struct Metadata {
uint32_t Size;
uint32_t Align;
uint32_t PointeeAlign;
uint8_t Kind;
uint16_t ValueType;
uint32_t Size = 0;
uint32_t Align = 0;
uint32_t PointeeAlign = 0;
uint8_t Kind = 0;
uint16_t ValueType = 0;
std::string TypeName;
std::string Name;
uint8_t AddrQual;
uint8_t AccQual;
uint8_t IsVolatile;
uint8_t IsConst;
uint8_t IsRestrict;
uint8_t IsPipe;
Metadata() : Size(0), Align(0), PointeeAlign(0), Kind(0), ValueType(0),
AddrQual(INVALID_ADDR_QUAL), AccQual(INVALID_ACC_QUAL), IsVolatile(0),
IsConst(0), IsRestrict(0), IsPipe(0) {}
uint8_t AddrQual = INVALID_ADDR_QUAL;
uint8_t AccQual = INVALID_ACC_QUAL;
uint8_t IsVolatile = 0;
uint8_t IsConst = 0;
uint8_t IsRestrict = 0;
uint8_t IsPipe = 0;
Metadata() = default;
};
}
} // end namespace KernelArg
namespace Kernel {
// In-memory representation of kernel information.
struct Metadata {
std::string Name;
@ -161,21 +166,24 @@ namespace RuntimeMD {
std::vector<uint32_t> ReqdWorkGroupSize;
std::vector<uint32_t> WorkGroupSizeHint;
std::string VecTypeHint;
uint32_t KernelIndex;
uint8_t NoPartialWorkGroups;
uint32_t KernelIndex = INVALID_KERNEL_INDEX;
uint8_t NoPartialWorkGroups = 0;
std::vector<KernelArg::Metadata> Args;
Metadata() : KernelIndex(INVALID_KERNEL_INDEX), NoPartialWorkGroups(0) {}
Metadata() = default;
};
}
} // end namespace Kernel
namespace Program {
// In-memory representation of program information.
struct Metadata {
std::vector<uint8_t> MDVersionSeq;
std::vector<std::string> PrintfInfo;
std::vector<Kernel::Metadata> Kernels;
explicit Metadata(){}
explicit Metadata() = default;
// Construct from an YAML string.
explicit Metadata(const std::string &YAML);
@ -186,8 +194,10 @@ namespace RuntimeMD {
// Convert from YAML string.
static Metadata fromYAML(const std::string &S);
};
}
} // namespace RuntimeMD
} // namespace AMDGPU
} //end namespace Program
} // end namespace RuntimeMD
} // end namespace AMDGPU
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H

View File

@ -13,27 +13,36 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include <algorithm>
#include <cassert>
using namespace llvm;
namespace {
namespace kOCLMD {
const char SpirVer[] = "opencl.spir.version";
const char OCLVer[] = "opencl.ocl.version";
const char UsedExt[] = "opencl.used.extensions";
const char UsedOptCoreFeat[] = "opencl.used.optional.core.features";
const char CompilerOptions[] = "opencl.compiler.options";
const char LLVMIdent[] = "llvm.ident";
}
} // end namespace kOCLMD
/// \brief Unify multiple OpenCL metadata due to linking.
class AMDGPUUnifyMetadata : public FunctionPass {
public:
static char ID;
explicit AMDGPUUnifyMetadata() : FunctionPass(ID) {};
explicit AMDGPUUnifyMetadata() : FunctionPass(ID) {}
private:
// This should really be a module pass but we have to run it as early
@ -43,7 +52,7 @@ namespace {
virtual bool runOnModule(Module &M);
// \todo: Convert to a module pass.
virtual bool runOnFunction(Function &F);
bool runOnFunction(Function &F) override;
/// \brief Unify version metadata.
/// \return true if changes are made.

View File

@ -11,11 +11,24 @@
//
//===----------------------------------------------------------------------===//
#include "GCNHazardRecognizer.h"
#include "AMDGPUSubtarget.h"
#include "GCNHazardRecognizer.h"
#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/Debug.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
#include <limits>
#include <set>
#include <vector>
using namespace llvm;
@ -59,7 +72,6 @@ static bool isRFE(unsigned Opcode) {
}
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
AMDGPU::OpName::simm16);
return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
@ -142,7 +154,6 @@ void GCNHazardRecognizer::EmitNoop() {
}
void GCNHazardRecognizer::AdvanceCycle() {
// When the scheduler detects a stall, it will call AdvanceCycle() without
// emitting any instructions.
if (!CurrCycleInstr)
@ -180,7 +191,6 @@ void GCNHazardRecognizer::RecedeCycle() {
int GCNHazardRecognizer::getWaitStatesSince(
function_ref<bool(MachineInstr *)> IsHazard) {
int WaitStates = -1;
for (MachineInstr *MI : EmittedInstrs) {
++WaitStates;
@ -204,7 +214,6 @@ int GCNHazardRecognizer::getWaitStatesSinceDef(
int GCNHazardRecognizer::getWaitStatesSinceSetReg(
function_ref<bool(MachineInstr *)> IsHazard) {
auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
return isSSetReg(MI->getOpcode()) && IsHazard(MI);
};
@ -486,7 +495,6 @@ int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
}
int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 0;

View File

@ -12,20 +12,29 @@
/// Generates AMDGPU runtime metadata for YAML mapping.
//
//===----------------------------------------------------------------------===//
//
#include "AMDGPU.h"
#include "AMDGPURuntimeMetadata.h"
#include "MCTargetDesc/AMDGPURuntimeMD.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/YAMLTraits.h"
#include <cassert>
#include <cstdint>
#include <limits>
#include <vector>
#include "AMDGPURuntimeMD.h"
using namespace llvm;
using namespace ::AMDGPU::RuntimeMD;
@ -198,7 +207,6 @@ static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL,
Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "",
StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "",
StringRef AccQual = "") {
KernelArg::Metadata Arg;
// Set ArgSize and ArgAlign.
@ -350,10 +358,11 @@ Program::Metadata::Metadata(const std::string &YAML) {
Input >> *this;
}
std::string Program::Metadata::toYAML(void) {
std::string Program::Metadata::toYAML() {
std::string Text;
raw_string_ostream Stream(Text);
yaml::Output Output(Stream, nullptr, INT_MAX /* do not wrap line */);
yaml::Output Output(Stream, nullptr,
std::numeric_limits<int>::max() /* do not wrap line */);
Output << *this;
return Stream.str();
}
@ -366,11 +375,11 @@ Program::Metadata Program::Metadata::fromYAML(const std::string &S) {
static void checkRuntimeMDYAMLString(const std::string &YAML) {
auto P = Program::Metadata::fromYAML(YAML);
auto S = P.toYAML();
llvm::errs() << "AMDGPU runtime metadata parser test "
<< (YAML == S ? "passes" : "fails") << ".\n";
errs() << "AMDGPU runtime metadata parser test "
<< (YAML == S ? "passes" : "fails") << ".\n";
if (YAML != S) {
llvm::errs() << "First output: " << YAML << '\n'
<< "Second output: " << S << '\n';
errs() << "First output: " << YAML << '\n'
<< "Second output: " << S << '\n';
}
}
@ -399,7 +408,7 @@ std::string llvm::getRuntimeMDYAMLString(Module &M) {
auto YAML = Prog.toYAML();
if (DumpRuntimeMD)
llvm::errs() << "AMDGPU runtime metadata:\n" << YAML << '\n';
errs() << "AMDGPU runtime metadata:\n" << YAML << '\n';
if (CheckRuntimeMDParser)
checkRuntimeMDYAMLString(YAML);

View File

@ -19,10 +19,26 @@
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <new>
#include <set>
#include <utility>
#include <vector>
using namespace llvm;
@ -43,13 +59,12 @@ struct CFStack {
std::vector<StackItem> BranchStack;
std::vector<StackItem> LoopStack;
unsigned MaxStackSize;
unsigned CurrentEntries;
unsigned CurrentSubEntries;
unsigned CurrentEntries = 0;
unsigned CurrentSubEntries = 0;
CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
// We need to reserve a stack entry for CALL_FS in vertex shaders.
MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0),
CurrentEntries(0), CurrentSubEntries(0) { }
MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
unsigned getLoopDepth();
bool branchStackContains(CFStack::StackItem);
@ -198,9 +213,8 @@ void CFStack::popLoop() {
}
class R600ControlFlowFinalizer : public MachineFunctionPass {
private:
typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
typedef std::pair<MachineInstr *, std::vector<MachineInstr *>> ClauseFile;
enum ControlFlowInstruction {
CF_TC,
@ -217,10 +231,10 @@ private:
};
static char ID;
const R600InstrInfo *TII;
const R600RegisterInfo *TRI;
const R600InstrInfo *TII = nullptr;
const R600RegisterInfo *TRI = nullptr;
unsigned MaxFetchInst;
const R600Subtarget *ST;
const R600Subtarget *ST = nullptr;
bool IsTrivialInst(MachineInstr &MI) const {
switch (MI.getOpcode()) {
@ -355,7 +369,7 @@ private:
continue;
int64_t Imm = Src.second;
std::vector<MachineOperand *>::iterator It =
find_if(Lits, [&](MachineOperand *val) {
llvm::find_if(Lits, [&](MachineOperand *val) {
return val->isImm() && (val->getImm() == Imm);
});
@ -485,8 +499,7 @@ private:
}
public:
R600ControlFlowFinalizer(TargetMachine &tm)
: MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override {
ST = &MF.getSubtarget<R600Subtarget>();
@ -501,7 +514,7 @@ public:
++MB) {
MachineBasicBlock &MBB = *MB;
unsigned CfCount = 0;
std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
std::vector<MachineInstr * > IfThenElseStack;
if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_VS) {
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
@ -554,7 +567,7 @@ public:
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_WHILE_LOOP))
.addImm(1);
std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
std::set<MachineInstr *>());
Pair.second.insert(MIb);
LoopStack.push_back(std::move(Pair));
@ -564,7 +577,7 @@ public:
}
case AMDGPU::ENDLOOP: {
CFStack.popLoop();
std::pair<unsigned, std::set<MachineInstr *> > Pair =
std::pair<unsigned, std::set<MachineInstr *>> Pair =
std::move(LoopStack.back());
LoopStack.pop_back();
CounterPropagateAddr(Pair.second, CfCount);
@ -693,7 +706,6 @@ char R600ControlFlowFinalizer::ID = 0;
} // end anonymous namespace
llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
return new R600ControlFlowFinalizer(TM);
}

View File

@ -17,26 +17,37 @@
#include "AMDGPU.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
#include <utility>
#include <vector>
using namespace llvm;
namespace llvm {
void initializeR600EmitClauseMarkersPass(PassRegistry&);
}
} // end namespace llvm
namespace {
class R600EmitClauseMarkers : public MachineFunctionPass {
private:
const R600InstrInfo *TII;
int Address;
const R600InstrInfo *TII = nullptr;
int Address = 0;
unsigned OccupiedDwords(MachineInstr &MI) const {
switch (MI.getOpcode()) {
@ -118,7 +129,7 @@ private:
SubstituteKCacheBank(MachineInstr &MI,
std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
bool UpdateInstr = true) const {
std::vector<std::pair<unsigned, unsigned> > UsedKCache;
std::vector<std::pair<unsigned, unsigned>> UsedKCache;
if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != AMDGPU::DOT_4)
return true;
@ -181,7 +192,7 @@ private:
bool canClauseLocalKillFitInClause(
unsigned AluInstCount,
std::vector<std::pair<unsigned, unsigned> > KCacheBanks,
std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
MachineBasicBlock::iterator Def,
MachineBasicBlock::iterator BBEnd) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
@ -228,7 +239,7 @@ private:
MachineBasicBlock::iterator
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator ClauseHead = I;
std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
bool PushBeforeModifier = false;
unsigned AluInstCount = 0;
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
@ -294,8 +305,8 @@ private:
public:
static char ID;
R600EmitClauseMarkers() : MachineFunctionPass(ID), TII(nullptr), Address(0) {
R600EmitClauseMarkers() : MachineFunctionPass(ID) {
initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
}
@ -333,7 +344,6 @@ INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
"R600 Emit Clause Markters", false, false)
llvm::FunctionPass *llvm::createR600EmitClauseMarkers() {
FunctionPass *llvm::createR600EmitClauseMarkers() {
return new R600EmitClauseMarkers();
}

View File

@ -12,16 +12,34 @@
//
//===----------------------------------------------------------------------===//
#include "R600InstrInfo.h"
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
#include "R600FrameLowering.h"
#include "R600InstrInfo.h"
#include "R600RegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstring>
#include <cstdint>
#include <iterator>
#include <utility>
#include <vector>
using namespace llvm;
@ -191,7 +209,7 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const {
const MachineFunction *MF = MI.getParent()->getParent();
return (AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
usesVertexCache(MI.getOpcode())) ||
usesTextureCache(MI.getOpcode());
usesTextureCache(MI.getOpcode());
}
bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
@ -321,7 +339,7 @@ R600InstrInfo::ExtractSrcs(MachineInstr &MI,
unsigned &ConstCount) const {
ConstCount = 0;
const std::pair<int, unsigned> DummyPair(-1, 0);
std::vector<std::pair<int, unsigned> > Result;
std::vector<std::pair<int, unsigned>> Result;
unsigned i = 0;
for (const auto &Src : getSrcs(MI)) {
++i;
@ -348,8 +366,8 @@ R600InstrInfo::ExtractSrcs(MachineInstr &MI,
return Result;
}
static std::vector<std::pair<int, unsigned> >
Swizzle(std::vector<std::pair<int, unsigned> > Src,
static std::vector<std::pair<int, unsigned>>
Swizzle(std::vector<std::pair<int, unsigned>> Src,
R600InstrInfo::BankSwizzle Swz) {
if (Src[0] == Src[1])
Src[1].first = -1;
@ -404,14 +422,14 @@ static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
/// in the same Instruction Group while meeting read port limitations given a
/// Swz swizzle sequence.
unsigned R600InstrInfo::isLegalUpTo(
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs,
const std::vector<R600InstrInfo::BankSwizzle> &Swz,
const std::vector<std::pair<int, unsigned> > &TransSrcs,
const std::vector<std::pair<int, unsigned>> &TransSrcs,
R600InstrInfo::BankSwizzle TransSwz) const {
int Vector[4][3];
memset(Vector, -1, sizeof(Vector));
for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
const std::vector<std::pair<int, unsigned> > &Srcs =
const std::vector<std::pair<int, unsigned>> &Srcs =
Swizzle(IGSrcs[i], Swz[i]);
for (unsigned j = 0; j < 3; j++) {
const std::pair<int, unsigned> &Src = Srcs[j];
@ -473,9 +491,9 @@ NextPossibleSolution(
/// Enumerate all possible Swizzle sequence to find one that can meet all
/// read port requirements.
bool R600InstrInfo::FindSwizzleForVectorSlot(
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs,
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
const std::vector<std::pair<int, unsigned> > &TransSrcs,
const std::vector<std::pair<int, unsigned>> &TransSrcs,
R600InstrInfo::BankSwizzle TransSwz) const {
unsigned ValidUpTo = 0;
do {
@ -490,7 +508,7 @@ bool R600InstrInfo::FindSwizzleForVectorSlot(
/// a const, and can't read a gpr at cycle 1 if they read 2 const.
static bool
isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
const std::vector<std::pair<int, unsigned> > &TransOps,
const std::vector<std::pair<int, unsigned>> &TransOps,
unsigned ConstCount) {
// TransALU can't read 3 constants
if (ConstCount > 2)
@ -516,7 +534,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
const {
//Todo : support shared src0 - src1 operand
std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
std::vector<std::vector<std::pair<int, unsigned>>> IGSrcs;
ValidSwizzle.clear();
unsigned ConstCount;
BankSwizzle TransBS = ALU_VEC_012_SCL_210;
@ -527,7 +545,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
IG[i]->getOperand(Op).getImm());
}
std::vector<std::pair<int, unsigned> > TransOps;
std::vector<std::pair<int, unsigned>> TransOps;
if (!isLastAluTrans)
return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
@ -556,7 +574,6 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
return false;
}
bool
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
const {
@ -780,7 +797,7 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,
unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
assert(!BytesRemoved && "code size not handled");
assert(!BytesRemoved && "code size not handled");
// Note : we leave PRED* instructions there.
// They may be needed when predicating instructions.
@ -874,7 +891,6 @@ bool R600InstrInfo::isPredicable(MachineInstr &MI) const {
}
}
bool
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCyles,
@ -908,7 +924,6 @@ R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
return false;
}
bool
R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
MachineOperand &MO = Cond[1];
@ -948,7 +963,6 @@ bool R600InstrInfo::DefinesPredicate(MachineInstr &MI,
return isPredicateSetter(MI.getOpcode());
}
bool R600InstrInfo::PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const {
int PIdx = MI.findFirstPredOperandIdx();
@ -1067,7 +1081,7 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const {
const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
const R600FrameLowering *TFL = ST.getFrameLowering();

View File

@ -51,13 +51,23 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <cassert>
#include <iterator>
using namespace llvm;
@ -67,10 +77,10 @@ namespace {
class SILowerControlFlow : public MachineFunctionPass {
private:
const SIRegisterInfo *TRI;
const SIInstrInfo *TII;
LiveIntervals *LIS;
MachineRegisterInfo *MRI;
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
LiveIntervals *LIS = nullptr;
MachineRegisterInfo *MRI = nullptr;
void emitIf(MachineInstr &MI);
void emitElse(MachineInstr &MI);
@ -88,12 +98,7 @@ private:
public:
static char ID;
SILowerControlFlow() :
MachineFunctionPass(ID),
TRI(nullptr),
TII(nullptr),
LIS(nullptr),
MRI(nullptr) {}
SILowerControlFlow() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@ -113,7 +118,7 @@ public:
}
};
} // End anonymous namespace
} // end anonymous namespace
char SILowerControlFlow::ID = 0;