mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-07 12:30:57 +00:00
[AMDGPU] Emit debugger prologue and emit the rest of the debugger fields in the kernel code header
Debugger prologue is emitted if -mattr=+amdgpu-debugger-emit-prologue. Debugger prologue writes work group IDs and work item IDs to scratch memory at fixed location in the following format: - offset 0: work group ID x - offset 4: work group ID y - offset 8: work group ID z - offset 16: work item ID x - offset 20: work item ID y - offset 24: work item ID z Set - amd_kernel_code_t::debug_wavefront_private_segment_offset_sgpr to scratch wave offset reg - amd_kernel_code_t::debug_private_segment_buffer_sgpr to scratch rsrc reg - amd_kernel_code_t::is_debug_supported to true if all debugger features are enabled Differential Revision: http://reviews.llvm.org/D20335 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273769 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
005f32a475
commit
20c7a48718
@ -329,6 +329,13 @@ def FeatureDebuggerReserveRegs : SubtargetFeature<
|
||||
"Reserve registers for debugger usage"
|
||||
>;
|
||||
|
||||
def FeatureDebuggerEmitPrologue : SubtargetFeature<
|
||||
"amdgpu-debugger-emit-prologue",
|
||||
"DebuggerEmitPrologue",
|
||||
"true",
|
||||
"Emit debugger prologue"
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def AMDGPUInstrInfo : InstrInfo {
|
||||
|
@ -200,6 +200,13 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount),
|
||||
false);
|
||||
|
||||
if (MF.getSubtarget<SISubtarget>().debuggerEmitPrologue()) {
|
||||
OutStreamer->emitRawComment(" DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
|
||||
Twine(KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
|
||||
OutStreamer->emitRawComment(" DebuggerPrivateSegmentBufferSGPR: s" +
|
||||
Twine(KernelInfo.DebuggerPrivateSegmentBufferSGPR), false);
|
||||
}
|
||||
|
||||
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
|
||||
Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
|
||||
false);
|
||||
@ -444,6 +451,16 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||
MaxVGPR += MFI->getDebuggerReservedVGPRCount();
|
||||
}
|
||||
|
||||
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
|
||||
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
|
||||
// attribute was specified.
|
||||
if (STM.debuggerEmitPrologue()) {
|
||||
ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
|
||||
RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
|
||||
ProgInfo.DebuggerPrivateSegmentBufferSGPR =
|
||||
RI->getHWRegIndex(MFI->getScratchRSrcReg());
|
||||
}
|
||||
|
||||
// We found the maximum register index. They start at 0, so add one to get the
|
||||
// number of registers.
|
||||
ProgInfo.NumVGPR = MaxVGPR + 1;
|
||||
@ -670,6 +687,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
|
||||
if (MFI->hasDispatchPtr())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
|
||||
|
||||
if (STM.debuggerSupported())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
|
||||
|
||||
if (STM.isXNACKEnabled())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
|
||||
|
||||
@ -681,6 +701,13 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
|
||||
header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
|
||||
header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
|
||||
|
||||
if (STM.debuggerEmitPrologue()) {
|
||||
header.debug_wavefront_private_segment_offset_sgpr =
|
||||
KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
|
||||
header.debug_private_segment_buffer_sgpr =
|
||||
KernelInfo.DebuggerPrivateSegmentBufferSGPR;
|
||||
}
|
||||
|
||||
AMDGPUTargetStreamer *TS =
|
||||
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
|
||||
|
||||
|
@ -42,6 +42,8 @@ private:
|
||||
FlatUsed(false),
|
||||
ReservedVGPRFirst(0),
|
||||
ReservedVGPRCount(0),
|
||||
DebuggerWavefrontPrivateSegmentOffsetSGPR((uint16_t)-1),
|
||||
DebuggerPrivateSegmentBufferSGPR((uint16_t)-1),
|
||||
VCCUsed(false),
|
||||
CodeLen(0) {}
|
||||
|
||||
@ -75,6 +77,14 @@ private:
|
||||
// The number of consecutive VGPRs reserved.
|
||||
uint16_t ReservedVGPRCount;
|
||||
|
||||
// Fixed SGPR number used to hold wave scratch offset for entire kernel
|
||||
// execution, or uint16_t(-1) if the register is not used or not known.
|
||||
uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR;
|
||||
// Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
|
||||
// kernel execution, or uint16_t(-1) if the register is not used or not
|
||||
// known.
|
||||
uint16_t DebuggerPrivateSegmentBufferSGPR;
|
||||
|
||||
// Bonus information for debugging.
|
||||
bool VCCUsed;
|
||||
uint64_t CodeLen;
|
||||
|
@ -101,6 +101,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
EnableXNACK(false),
|
||||
DebuggerInsertNops(false),
|
||||
DebuggerReserveRegs(false),
|
||||
DebuggerEmitPrologue(false),
|
||||
|
||||
EnableVGPRSpilling(false),
|
||||
EnablePromoteAlloca(false),
|
||||
|
@ -77,6 +77,7 @@ protected:
|
||||
bool EnableXNACK;
|
||||
bool DebuggerInsertNops;
|
||||
bool DebuggerReserveRegs;
|
||||
bool DebuggerEmitPrologue;
|
||||
|
||||
// Used as options.
|
||||
bool EnableVGPRSpilling;
|
||||
@ -402,6 +403,11 @@ public:
|
||||
return EnableSIScheduler;
|
||||
}
|
||||
|
||||
bool debuggerSupported() const {
|
||||
return debuggerInsertNops() && debuggerReserveRegs() &&
|
||||
debuggerEmitPrologue();
|
||||
}
|
||||
|
||||
bool debuggerInsertNops() const {
|
||||
return DebuggerInsertNops;
|
||||
}
|
||||
@ -410,6 +416,10 @@ public:
|
||||
return DebuggerReserveRegs;
|
||||
}
|
||||
|
||||
bool debuggerEmitPrologue() const {
|
||||
return DebuggerEmitPrologue;
|
||||
}
|
||||
|
||||
bool loadStoreOptEnabled() const {
|
||||
return EnableLoadStoreOpt;
|
||||
}
|
||||
|
@ -39,6 +39,12 @@ static ArrayRef<MCPhysReg> getAllSGPRs() {
|
||||
|
||||
void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
// Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
|
||||
// specified.
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
if (ST.debuggerEmitPrologue())
|
||||
emitDebuggerPrologue(MF, MBB);
|
||||
|
||||
if (!MF.getFrameInfo()->hasStackObjects())
|
||||
return;
|
||||
|
||||
@ -54,7 +60,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
if (hasOnlySGPRSpills(MFI, MF.getFrameInfo()))
|
||||
return;
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
@ -87,6 +92,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
// pointer. Because we only detect if flat instructions are used at all,
|
||||
// this will be used more often than necessary on VI.
|
||||
|
||||
// Debug location must be unknown since the first debug location is used to
|
||||
// determine the end of the prologue.
|
||||
DebugLoc DL;
|
||||
|
||||
unsigned FlatScratchInitReg
|
||||
@ -289,3 +296,44 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
|
||||
RS->addScavengingFrameIndex(ScavengeFI);
|
||||
}
|
||||
}
|
||||
|
||||
void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
MachineBasicBlock::iterator I = MBB.begin();
|
||||
DebugLoc DL;
|
||||
|
||||
// For each dimension:
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
// Get work group ID SGPR, and make it live-in again.
|
||||
unsigned WorkGroupIDSGPR = MFI->getWorkGroupIDSGPR(i);
|
||||
MF.getRegInfo().addLiveIn(WorkGroupIDSGPR);
|
||||
MBB.addLiveIn(WorkGroupIDSGPR);
|
||||
|
||||
// Since SGPRs are spilled into VGPRs, copy work group ID SGPR to VGPR in
|
||||
// order to spill it to scratch.
|
||||
unsigned WorkGroupIDVGPR =
|
||||
MF.getRegInfo().createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), WorkGroupIDVGPR)
|
||||
.addReg(WorkGroupIDSGPR);
|
||||
|
||||
// Spill work group ID.
|
||||
int WorkGroupIDObjectIdx = MFI->getDebuggerWorkGroupIDStackObjectIndex(i);
|
||||
TII->storeRegToStackSlot(MBB, I, WorkGroupIDVGPR, false,
|
||||
WorkGroupIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
|
||||
|
||||
// Get work item ID VGPR, and make it live-in again.
|
||||
unsigned WorkItemIDVGPR = MFI->getWorkItemIDVGPR(i);
|
||||
MF.getRegInfo().addLiveIn(WorkItemIDVGPR);
|
||||
MBB.addLiveIn(WorkItemIDVGPR);
|
||||
|
||||
// Spill work item ID.
|
||||
int WorkItemIDObjectIdx = MFI->getDebuggerWorkItemIDStackObjectIndex(i);
|
||||
TII->storeRegToStackSlot(MBB, I, WorkItemIDVGPR, false,
|
||||
WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
|
||||
}
|
||||
}
|
||||
|
@ -29,6 +29,10 @@ public:
|
||||
void processFunctionBeforeFrameFinalized(
|
||||
MachineFunction &MF,
|
||||
RegScavenger *RS = nullptr) const override;
|
||||
|
||||
private:
|
||||
/// \brief Emits debugger prologue.
|
||||
void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -596,6 +596,11 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
return DAG.getEntryNode();
|
||||
}
|
||||
|
||||
// Create stack objects that are used for emitting debugger prologue if
|
||||
// "amdgpu-debugger-emit-prologue" attribute was specified.
|
||||
if (ST.debuggerEmitPrologue())
|
||||
createDebuggerPrologueStackObjects(MF);
|
||||
|
||||
SmallVector<ISD::InputArg, 16> Splits;
|
||||
BitVector Skipped(Ins.size());
|
||||
|
||||
@ -1258,6 +1263,32 @@ bool SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
|
||||
}
|
||||
}
|
||||
|
||||
void SITargetLowering::createDebuggerPrologueStackObjects(
|
||||
MachineFunction &MF) const {
|
||||
// Create stack objects that are used for emitting debugger prologue.
|
||||
//
|
||||
// Debugger prologue writes work group IDs and work item IDs to scratch memory
|
||||
// at fixed location in the following format:
|
||||
// offset 0: work group ID x
|
||||
// offset 4: work group ID y
|
||||
// offset 8: work group ID z
|
||||
// offset 16: work item ID x
|
||||
// offset 20: work item ID y
|
||||
// offset 24: work item ID z
|
||||
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||
int ObjectIdx = 0;
|
||||
|
||||
// For each dimension:
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
// Create fixed stack object for work group ID.
|
||||
ObjectIdx = MF.getFrameInfo()->CreateFixedObject(4, i * 4, true);
|
||||
Info->setDebuggerWorkGroupIDStackObjectIndex(i, ObjectIdx);
|
||||
// Create fixed stack object for work item ID.
|
||||
ObjectIdx = MF.getFrameInfo()->CreateFixedObject(4, i * 4 + 16, true);
|
||||
Info->setDebuggerWorkItemIDStackObjectIndex(i, ObjectIdx);
|
||||
}
|
||||
}
|
||||
|
||||
/// This transforms the control flow intrinsics to get the branch destination as
|
||||
/// last parameter, also switches branch target with BR if the need arise
|
||||
SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
|
||||
|
@ -70,6 +70,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
|
||||
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
|
||||
|
||||
bool isCFIntrinsic(const SDNode *Intr) const;
|
||||
|
||||
void createDebuggerPrologueStackObjects(MachineFunction &MF) const;
|
||||
public:
|
||||
SITargetLowering(const TargetMachine &tm, const SISubtarget &STI);
|
||||
|
||||
|
@ -54,6 +54,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
||||
ReturnsVoid(true),
|
||||
MaximumWorkGroupSize(0),
|
||||
DebuggerReservedVGPRCount(0),
|
||||
DebuggerWorkGroupIDStackObjectIndices{0, 0, 0},
|
||||
DebuggerWorkItemIDStackObjectIndices{0, 0, 0},
|
||||
LDSWaveSpillSize(0),
|
||||
PSInputEna(0),
|
||||
NumUserSGPRs(0),
|
||||
@ -92,16 +94,16 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
||||
WorkItemIDX = true;
|
||||
}
|
||||
|
||||
if (F->hasFnAttribute("amdgpu-work-group-id-y"))
|
||||
if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue())
|
||||
WorkGroupIDY = true;
|
||||
|
||||
if (F->hasFnAttribute("amdgpu-work-group-id-z"))
|
||||
if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue())
|
||||
WorkGroupIDZ = true;
|
||||
|
||||
if (F->hasFnAttribute("amdgpu-work-item-id-y"))
|
||||
if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue())
|
||||
WorkItemIDY = true;
|
||||
|
||||
if (F->hasFnAttribute("amdgpu-work-item-id-z"))
|
||||
if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue())
|
||||
WorkItemIDZ = true;
|
||||
|
||||
// X, XY, and XYZ are the only supported combinations, so make sure Y is
|
||||
|
@ -64,6 +64,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
|
||||
|
||||
// Number of reserved VGPRs for debugger usage.
|
||||
unsigned DebuggerReservedVGPRCount;
|
||||
// Stack object indices for work group IDs.
|
||||
int DebuggerWorkGroupIDStackObjectIndices[3];
|
||||
// Stack object indices for work item IDs.
|
||||
int DebuggerWorkItemIDStackObjectIndices[3];
|
||||
|
||||
public:
|
||||
// FIXME: Make private
|
||||
@ -334,6 +338,62 @@ public:
|
||||
return DebuggerReservedVGPRCount;
|
||||
}
|
||||
|
||||
/// \returns Stack object index for \p Dim's work group ID.
|
||||
int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
|
||||
assert(Dim < 3);
|
||||
return DebuggerWorkGroupIDStackObjectIndices[Dim];
|
||||
}
|
||||
|
||||
/// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
|
||||
void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
|
||||
assert(Dim < 3);
|
||||
DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
|
||||
}
|
||||
|
||||
/// \returns Stack object index for \p Dim's work item ID.
|
||||
int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
|
||||
assert(Dim < 3);
|
||||
return DebuggerWorkItemIDStackObjectIndices[Dim];
|
||||
}
|
||||
|
||||
/// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
|
||||
void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
|
||||
assert(Dim < 3);
|
||||
DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
|
||||
}
|
||||
|
||||
/// \returns SGPR used for \p Dim's work group ID.
|
||||
unsigned getWorkGroupIDSGPR(unsigned Dim) const {
|
||||
switch (Dim) {
|
||||
case 0:
|
||||
assert(hasWorkGroupIDX());
|
||||
return WorkGroupIDXSystemSGPR;
|
||||
case 1:
|
||||
assert(hasWorkGroupIDY());
|
||||
return WorkGroupIDYSystemSGPR;
|
||||
case 2:
|
||||
assert(hasWorkGroupIDZ());
|
||||
return WorkGroupIDZSystemSGPR;
|
||||
}
|
||||
llvm_unreachable("unexpected dimension");
|
||||
}
|
||||
|
||||
/// \returns VGPR used for \p Dim' work item ID.
|
||||
unsigned getWorkItemIDVGPR(unsigned Dim) const {
|
||||
switch (Dim) {
|
||||
case 0:
|
||||
assert(hasWorkItemIDX());
|
||||
return AMDGPU::VGPR0;
|
||||
case 1:
|
||||
assert(hasWorkItemIDY());
|
||||
return AMDGPU::VGPR1;
|
||||
case 2:
|
||||
assert(hasWorkItemIDZ());
|
||||
return AMDGPU::VGPR2;
|
||||
}
|
||||
llvm_unreachable("unexpected dimension");
|
||||
}
|
||||
|
||||
unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
|
||||
};
|
||||
|
||||
|
80
test/CodeGen/AMDGPU/debugger-emit-prologue.ll
Normal file
80
test/CodeGen/AMDGPU/debugger-emit-prologue.ll
Normal file
@ -0,0 +1,80 @@
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-emit-prologue -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s --check-prefix=NOATTR
|
||||
|
||||
; CHECK: debug_wavefront_private_segment_offset_sgpr = [[SOFF:[0-9]+]]
|
||||
; CHECK: debug_private_segment_buffer_sgpr = [[SREG:[0-9]+]]
|
||||
|
||||
; CHECK: v_mov_b32_e32 [[WGIDX:v[0-9]+]], s{{[0-9]+}}
|
||||
; CHECK: buffer_store_dword [[WGIDX]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]]
|
||||
; CHECK: buffer_store_dword v0, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:16
|
||||
|
||||
; CHECK: v_mov_b32_e32 [[WGIDY:v[0-9]+]], s{{[0-9]+}}
|
||||
; CHECK: buffer_store_dword [[WGIDY]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:4
|
||||
; CHECK: buffer_store_dword v1, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:20
|
||||
|
||||
; CHECK: v_mov_b32_e32 [[WGIDZ:v[0-9]+]], s{{[0-9]+}}
|
||||
; CHECK: buffer_store_dword [[WGIDZ]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:8
|
||||
; CHECK: buffer_store_dword v2, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:24
|
||||
|
||||
; CHECK: DebuggerWavefrontPrivateSegmentOffsetSGPR: s[[SOFF]]
|
||||
; CHECK: DebuggerPrivateSegmentBufferSGPR: s[[SREG]]
|
||||
|
||||
; NOATTR-NOT: DebuggerWavefrontPrivateSegmentOffsetSGPR
|
||||
; NOATTR-NOT: DebuggerPrivateSegmentBufferSGPR
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test(i32 addrspace(1)* %A) #0 !dbg !12 {
|
||||
entry:
|
||||
%A.addr = alloca i32 addrspace(1)*, align 4
|
||||
store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
|
||||
call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !17, metadata !18), !dbg !19
|
||||
%0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !20
|
||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20
|
||||
store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !21
|
||||
%1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !22
|
||||
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22
|
||||
store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23
|
||||
%2 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !24
|
||||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2, !dbg !24
|
||||
store i32 3, i32 addrspace(1)* %arrayidx2, align 4, !dbg !25
|
||||
ret void, !dbg !26
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
|
||||
|
||||
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!opencl.kernels = !{!3}
|
||||
!llvm.module.flags = !{!9, !10}
|
||||
!llvm.ident = !{!11}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 269772)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
||||
!1 = !DIFile(filename: "test01.cl", directory: "/home/kzhuravl/Lightning/testing")
|
||||
!2 = !{}
|
||||
!3 = !{void (i32 addrspace(1)*)* @test, !4, !5, !6, !7, !8}
|
||||
!4 = !{!"kernel_arg_addr_space", i32 1}
|
||||
!5 = !{!"kernel_arg_access_qual", !"none"}
|
||||
!6 = !{!"kernel_arg_type", !"int*"}
|
||||
!7 = !{!"kernel_arg_base_type", !"int*"}
|
||||
!8 = !{!"kernel_arg_type_qual", !""}
|
||||
!9 = !{i32 2, !"Dwarf Version", i32 2}
|
||||
!10 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!11 = !{!"clang version 3.9.0 (trunk 269772)"}
|
||||
!12 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !13, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
|
||||
!13 = !DISubroutineType(types: !14)
|
||||
!14 = !{null, !15}
|
||||
!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64, align: 32)
|
||||
!16 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
|
||||
!17 = !DILocalVariable(name: "A", arg: 1, scope: !12, file: !1, line: 1, type: !15)
|
||||
!18 = !DIExpression()
|
||||
!19 = !DILocation(line: 1, column: 30, scope: !12)
|
||||
!20 = !DILocation(line: 2, column: 3, scope: !12)
|
||||
!21 = !DILocation(line: 2, column: 8, scope: !12)
|
||||
!22 = !DILocation(line: 3, column: 3, scope: !12)
|
||||
!23 = !DILocation(line: 3, column: 8, scope: !12)
|
||||
!24 = !DILocation(line: 4, column: 3, scope: !12)
|
||||
!25 = !DILocation(line: 4, column: 8, scope: !12)
|
||||
!26 = !DILocation(line: 5, column: 1, scope: !12)
|
Loading…
Reference in New Issue
Block a user