[OpenMP][libomptarget][AMDGPU] Update print launch info

Clean up for the AMD-specific kernel launch info in the NextGen Plugins.
- Fixes a mistake introduced with the initial commit that added printing
  of an AMD-only property.
- Removes another AMD-only property (not clear on upstream status)
- Adds some more comment to what info is printed.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D145924
This commit is contained in:
JP Lehr 2023-03-15 05:58:01 -04:00
parent 70562607ab
commit 13a0b48f37
3 changed files with 47 additions and 15 deletions

View File

@ -2614,33 +2614,39 @@ Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
return Plugin::success();
// General Info
auto ConstWGSize = getDefaultNumThreads(GenericDevice);
auto NumGroups = NumBlocks;
auto ThreadsPerGroup = getDefaultNumThreads(GenericDevice);
auto NumTeams = KernelArgs.NumTeams[0]; // Only first dimension
auto ThreadLimit = KernelArgs.ThreadLimit[0]; // Only first dimension
auto ThreadsPerGroup = NumThreads;
// Kernel Arguments Info
auto ArgNum = KernelArgs.NumArgs;
auto LoopTripCount = KernelArgs.Tripcount;
// Details for AMDGPU kernels
// Details for AMDGPU kernels (read from image)
// https://www.llvm.org/docs/AMDGPUUsage.html#code-object-v4-metadata
auto GroupSegmentSize = (*KernelInfo).GroupSegmentList;
auto SGPRCount = (*KernelInfo).SGPRCount;
auto VGPRCount = (*KernelInfo).VGPRCount;
auto SGPRSpillCount = (*KernelInfo).SGPRSpillCount;
auto VGPRSpillCount = (*KernelInfo).VGPRSpillCount;
auto MaxFlatWorkgroupSize = (*KernelInfo).MaxFlatWorkgroupSize;
// TODO set correctly once host services available
auto HostCallRequired = false;
// Prints additional launch info that contains the following.
// Num Args: The number of kernel arguments
// Teams x Thrds: The number of teams and the number of threads actually
// running.
// MaxFlatWorkgroupSize: Maximum flat work-group size supported by the
// kernel in work-items
// LDS Usage: Amount of bytes used in LDS storage
// S/VGPR Count: the number of S/V GPRs occupied by the kernel
// S/VGPR Spill Count: how many S/VGPRs are spilled by the kernel
// Tripcount: loop tripcount for the kernel
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(),
"SGN:%s ConstWGSize:%d args:%d teamsXthrds:(%4luX%4d) "
"reqd:(%4dX%4d) lds_usage:%uB sgpr_count:%u vgpr_count:%u "
"sgpr_spill_count:%u vgpr_spill_count:%u tripcount:%lu rpc:%d n:%s\n",
getExecutionModeName(), ConstWGSize, ArgNum, NumGroups, ThreadsPerGroup,
NumTeams, ThreadLimit, GroupSegmentSize, SGPRCount, VGPRCount,
SGPRSpillCount, VGPRSpillCount, LoopTripCount, HostCallRequired,
getName());
"#Args: %d Teams x Thrds: %4lux%4u (MaxFlatWorkGroupSize: %u) LDS "
"Usage: %uB #SGPRs/VGPRs: %u/%u #SGPR/VGPR Spills: %u/%u Tripcount: "
"%lu\n",
ArgNum, NumGroups, ThreadsPerGroup, MaxFlatWorkgroupSize,
GroupSegmentSize, SGPRCount, VGPRCount, SGPRSpillCount, VGPRSpillCount,
LoopTripCount);
return Plugin::success();
}

View File

@ -146,6 +146,10 @@ struct KernelMetaDataTy {
uint32_t KernelSegmentSize;
uint32_t ExplicitArgumentCount;
uint32_t ImplicitArgumentCount;
uint32_t RequestedWorkgroupSize[3];
uint32_t WorkgroupSizeHint[3];
uint32_t WavefronSize;
uint32_t MaxFlatWorkgroupSize;
};
namespace {
@ -194,6 +198,19 @@ private:
return DK.getString() == SK;
};
const auto getSequenceOfThreeInts = [](msgpack::DocNode &DN,
uint32_t *Vals) {
assert(DN.isArray() && "MsgPack DocNode is an array node");
auto DNA = DN.getArray();
assert(DNA.size() == 3 && "ArrayNode has at most three elements");
int i = 0;
for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
++DNABegin) {
Vals[i++] = DNABegin->getUInt();
}
};
if (isKey(V.first, ".name")) {
KernelName = V.second.toString();
} else if (isKey(V.first, ".sgpr_count")) {
@ -208,6 +225,14 @@ private:
KernelData.PrivateSegmentSize = V.second.getUInt();
} else if (isKey(V.first, ".group_segement_fixed_size")) {
KernelData.GroupSegmentList = V.second.getUInt();
} else if (isKey(V.first, ".reqd_workgroup_size")) {
getSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
} else if (isKey(V.first, ".workgroup_size_hint")) {
getSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
} else if (isKey(V.first, ".wavefront_size")) {
KernelData.WavefronSize = V.second.getUInt();
} else if (isKey(V.first, ".max_flat_workgroup_size")) {
KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
}
return Error::success();
@ -295,6 +320,7 @@ Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
return Error::success();
}
} // namespace utils
} // namespace plugin
} // namespace target

View File

@ -39,7 +39,7 @@ int main() {
// INFO: info: Entering OpenMP kernel at info.c:{{[0-9]+}}:{{[0-9]+}} with 1 arguments:
// INFO: info: firstprivate(val)[4]
// INFO: info: Launching kernel __omp_offloading_{{.*}}main{{.*}} with {{[0-9]+}} blocks and {{[0-9]+}} threads in Generic mode
// AMDGPU: AMDGPU device {{[0-9]}} info: SGN:Generic ConstWGSize:{{[0-9]+}} args:{{[0-9]}} teamsXthrds:({{ [0-9]+}}X {{[0-9]+}}) reqd:( {{[0-9]+}}X {{[0-9]+}}) lds_usage:{{[0-9]+}}B sgpr_count:{{[0-9]+}} vgpr_count:{{[0-9]+}} sgpr_spill_count:{{[0-9]+}} vgpr_spill_count:{{[0-9]+}} tripcount:{{[0-9]+}} rpc:0 n:__omp_offloading_{{.*}}main{{.*}}
// AMDGPU: AMDGPU device {{[0-9]}} info: #Args: {{[0-9]}} Teams x Thrds: {{[0-9]+}}x {{[0-9]+}} (MaxFlatWorkGroupSize: {{[0-9]+}}) LDS Usage: {{[0-9]+}}B #SGPRs/VGPRs: {{[0-9]+}}/{{[0-9]+}} #SGPR/VGPR Spills: {{[0-9]+}}/{{[0-9]+}} Tripcount: {{[0-9]+}}
// INFO: info: OpenMP Host-Device pointer mappings after block at info.c:{{[0-9]+}}:{{[0-9]+}}:
// INFO: info: Host Ptr Target Ptr Size (B) DynRefCount HoldRefCount Declaration
// INFO: info: {{.*}} {{.*}} 256 1 0 C[0:64] at info.c:{{[0-9]+}}:{{[0-9]+}}