mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-20 18:27:20 +00:00
[OpenMP][libomptarget][AMDGPU] Update print launch info
Clean up for the AMD-specific kernel launch info in the NextGen Plugins. - Fixes a mistake introduced with the initial commit that added printing of an AMD-only property. - Removes another AMD-only property (not clear on upstream status) - Adds some more comment to what info is printed. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D145924
This commit is contained in:
parent
70562607ab
commit
13a0b48f37
@ -2614,33 +2614,39 @@ Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
|
||||
return Plugin::success();
|
||||
|
||||
// General Info
|
||||
auto ConstWGSize = getDefaultNumThreads(GenericDevice);
|
||||
auto NumGroups = NumBlocks;
|
||||
auto ThreadsPerGroup = getDefaultNumThreads(GenericDevice);
|
||||
auto NumTeams = KernelArgs.NumTeams[0]; // Only first dimension
|
||||
auto ThreadLimit = KernelArgs.ThreadLimit[0]; // Only first dimension
|
||||
auto ThreadsPerGroup = NumThreads;
|
||||
|
||||
// Kernel Arguments Info
|
||||
auto ArgNum = KernelArgs.NumArgs;
|
||||
auto LoopTripCount = KernelArgs.Tripcount;
|
||||
|
||||
// Details for AMDGPU kernels
|
||||
// Details for AMDGPU kernels (read from image)
|
||||
// https://www.llvm.org/docs/AMDGPUUsage.html#code-object-v4-metadata
|
||||
auto GroupSegmentSize = (*KernelInfo).GroupSegmentList;
|
||||
auto SGPRCount = (*KernelInfo).SGPRCount;
|
||||
auto VGPRCount = (*KernelInfo).VGPRCount;
|
||||
auto SGPRSpillCount = (*KernelInfo).SGPRSpillCount;
|
||||
auto VGPRSpillCount = (*KernelInfo).VGPRSpillCount;
|
||||
auto MaxFlatWorkgroupSize = (*KernelInfo).MaxFlatWorkgroupSize;
|
||||
|
||||
// TODO set correctly once host services available
|
||||
auto HostCallRequired = false;
|
||||
// Prints additional launch info that contains the following.
|
||||
// Num Args: The number of kernel arguments
|
||||
// Teams x Thrds: The number of teams and the number of threads actually
|
||||
// running.
|
||||
// MaxFlatWorkgroupSize: Maximum flat work-group size supported by the
|
||||
// kernel in work-items
|
||||
// LDS Usage: Amount of bytes used in LDS storage
|
||||
// S/VGPR Count: the number of S/V GPRs occupied by the kernel
|
||||
// S/VGPR Spill Count: how many S/VGPRs are spilled by the kernel
|
||||
// Tripcount: loop tripcount for the kernel
|
||||
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(),
|
||||
"SGN:%s ConstWGSize:%d args:%d teamsXthrds:(%4luX%4d) "
|
||||
"reqd:(%4dX%4d) lds_usage:%uB sgpr_count:%u vgpr_count:%u "
|
||||
"sgpr_spill_count:%u vgpr_spill_count:%u tripcount:%lu rpc:%d n:%s\n",
|
||||
getExecutionModeName(), ConstWGSize, ArgNum, NumGroups, ThreadsPerGroup,
|
||||
NumTeams, ThreadLimit, GroupSegmentSize, SGPRCount, VGPRCount,
|
||||
SGPRSpillCount, VGPRSpillCount, LoopTripCount, HostCallRequired,
|
||||
getName());
|
||||
"#Args: %d Teams x Thrds: %4lux%4u (MaxFlatWorkGroupSize: %u) LDS "
|
||||
"Usage: %uB #SGPRs/VGPRs: %u/%u #SGPR/VGPR Spills: %u/%u Tripcount: "
|
||||
"%lu\n",
|
||||
ArgNum, NumGroups, ThreadsPerGroup, MaxFlatWorkgroupSize,
|
||||
GroupSegmentSize, SGPRCount, VGPRCount, SGPRSpillCount, VGPRSpillCount,
|
||||
LoopTripCount);
|
||||
|
||||
return Plugin::success();
|
||||
}
|
||||
|
@ -146,6 +146,10 @@ struct KernelMetaDataTy {
|
||||
uint32_t KernelSegmentSize;
|
||||
uint32_t ExplicitArgumentCount;
|
||||
uint32_t ImplicitArgumentCount;
|
||||
uint32_t RequestedWorkgroupSize[3];
|
||||
uint32_t WorkgroupSizeHint[3];
|
||||
uint32_t WavefronSize;
|
||||
uint32_t MaxFlatWorkgroupSize;
|
||||
};
|
||||
namespace {
|
||||
|
||||
@ -194,6 +198,19 @@ private:
|
||||
return DK.getString() == SK;
|
||||
};
|
||||
|
||||
const auto getSequenceOfThreeInts = [](msgpack::DocNode &DN,
|
||||
uint32_t *Vals) {
|
||||
assert(DN.isArray() && "MsgPack DocNode is an array node");
|
||||
auto DNA = DN.getArray();
|
||||
assert(DNA.size() == 3 && "ArrayNode has at most three elements");
|
||||
|
||||
int i = 0;
|
||||
for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
|
||||
++DNABegin) {
|
||||
Vals[i++] = DNABegin->getUInt();
|
||||
}
|
||||
};
|
||||
|
||||
if (isKey(V.first, ".name")) {
|
||||
KernelName = V.second.toString();
|
||||
} else if (isKey(V.first, ".sgpr_count")) {
|
||||
@ -208,6 +225,14 @@ private:
|
||||
KernelData.PrivateSegmentSize = V.second.getUInt();
|
||||
} else if (isKey(V.first, ".group_segement_fixed_size")) {
|
||||
KernelData.GroupSegmentList = V.second.getUInt();
|
||||
} else if (isKey(V.first, ".reqd_workgroup_size")) {
|
||||
getSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
|
||||
} else if (isKey(V.first, ".workgroup_size_hint")) {
|
||||
getSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
|
||||
} else if (isKey(V.first, ".wavefront_size")) {
|
||||
KernelData.WavefronSize = V.second.getUInt();
|
||||
} else if (isKey(V.first, ".max_flat_workgroup_size")) {
|
||||
KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
|
||||
}
|
||||
|
||||
return Error::success();
|
||||
@ -295,6 +320,7 @@ Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
|
||||
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
} // namespace utils
|
||||
} // namespace plugin
|
||||
} // namespace target
|
||||
|
@ -39,7 +39,7 @@ int main() {
|
||||
// INFO: info: Entering OpenMP kernel at info.c:{{[0-9]+}}:{{[0-9]+}} with 1 arguments:
|
||||
// INFO: info: firstprivate(val)[4]
|
||||
// INFO: info: Launching kernel __omp_offloading_{{.*}}main{{.*}} with {{[0-9]+}} blocks and {{[0-9]+}} threads in Generic mode
|
||||
// AMDGPU: AMDGPU device {{[0-9]}} info: SGN:Generic ConstWGSize:{{[0-9]+}} args:{{[0-9]}} teamsXthrds:({{ [0-9]+}}X {{[0-9]+}}) reqd:( {{[0-9]+}}X {{[0-9]+}}) lds_usage:{{[0-9]+}}B sgpr_count:{{[0-9]+}} vgpr_count:{{[0-9]+}} sgpr_spill_count:{{[0-9]+}} vgpr_spill_count:{{[0-9]+}} tripcount:{{[0-9]+}} rpc:0 n:__omp_offloading_{{.*}}main{{.*}}
|
||||
// AMDGPU: AMDGPU device {{[0-9]}} info: #Args: {{[0-9]}} Teams x Thrds: {{[0-9]+}}x {{[0-9]+}} (MaxFlatWorkGroupSize: {{[0-9]+}}) LDS Usage: {{[0-9]+}}B #SGPRs/VGPRs: {{[0-9]+}}/{{[0-9]+}} #SGPR/VGPR Spills: {{[0-9]+}}/{{[0-9]+}} Tripcount: {{[0-9]+}}
|
||||
// INFO: info: OpenMP Host-Device pointer mappings after block at info.c:{{[0-9]+}}:{{[0-9]+}}:
|
||||
// INFO: info: Host Ptr Target Ptr Size (B) DynRefCount HoldRefCount Declaration
|
||||
// INFO: info: {{.*}} {{.*}} 256 1 0 C[0:64] at info.c:{{[0-9]+}}:{{[0-9]+}}
|
||||
|
Loading…
x
Reference in New Issue
Block a user