mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-24 06:10:12 +00:00
[OPENMP][AMDGCN] Improvements to print_kernel_trace (bit mask)
allow bit masking to select various trace features. bit 0 => Launch tracing (stderr) bit 1 => timing of runtime (stdout) bit 2 => detailed launch tracing (stderr) bit 3 => timing goes to stdout instead of stderr example: LIBOMPTARGET_KERNEL_TRACE=7 does it all LIBOMPTARGET_KERNEL_TRACE=5 Launch + details LIBOMPTARGET_KERNEL_TRACE=2 timings + launch to stderr LIBOMPTARGET_KERNEL_TRACE=10 timings + launch to stdout Differential Revision: https://reviews.llvm.org/D96998
This commit is contained in:
parent
8181dcd30f
commit
30c0d5b4c3
21
openmp/libomptarget/plugins/amdgpu/src/print_tracing.h
Normal file
21
openmp/libomptarget/plugins/amdgpu/src/print_tracing.h
Normal file
@ -0,0 +1,21 @@
|
||||
//===--- print_tracing.h - OpenMP interface definitions -------- AMDGPU -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LIBOMPTARGET_PLUGINS_AMGGPU_SRC_PRINT_TRACING_H_INCLUDED
|
||||
#define LIBOMPTARGET_PLUGINS_AMGGPU_SRC_PRINT_TRACING_H_INCLUDED
|
||||
|
||||
enum PrintTraceControlBits {
|
||||
LAUNCH = 1, // print a message to stderr for each kernel launch
|
||||
RTL_TIMING = 2, // Print timing info around each RTL step
|
||||
STARTUP_DETAILS = 4, // Details around loading up kernel
|
||||
RTL_TO_STDOUT = 8 // Redirect RTL tracing to stdout
|
||||
};
|
||||
|
||||
extern int print_kernel_trace; // set by environment variable
|
||||
|
||||
#endif
|
@ -38,6 +38,7 @@
|
||||
#include "Debug.h"
|
||||
#include "get_elf_mach_gfx_name.h"
|
||||
#include "omptargetplugin.h"
|
||||
#include "print_tracing.h"
|
||||
|
||||
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
||||
|
||||
@ -714,7 +715,7 @@ int32_t __tgt_rtl_init_device(int device_id) {
|
||||
DeviceInfo.GPUName[device_id] = GetInfoName;
|
||||
}
|
||||
|
||||
if (print_kernel_trace == 4)
|
||||
if (print_kernel_trace & STARTUP_DETAILS)
|
||||
fprintf(stderr, "Device#%-2d CU's: %2d %s\n", device_id,
|
||||
DeviceInfo.ComputeUnits[device_id],
|
||||
DeviceInfo.GPUName[device_id].c_str());
|
||||
@ -1568,7 +1569,7 @@ void getLaunchVals(int &threadsPerGroup, int &num_groups, int ConstWGSize,
|
||||
if (Max_Teams > DeviceInfo.HardTeamLimit)
|
||||
Max_Teams = DeviceInfo.HardTeamLimit;
|
||||
|
||||
if (print_kernel_trace == 4) {
|
||||
if (print_kernel_trace & STARTUP_DETAILS) {
|
||||
fprintf(stderr, "RTLDeviceInfoTy::Max_Teams: %d\n",
|
||||
RTLDeviceInfoTy::Max_Teams);
|
||||
fprintf(stderr, "Max_Teams: %d\n", Max_Teams);
|
||||
@ -1601,7 +1602,7 @@ void getLaunchVals(int &threadsPerGroup, int &num_groups, int ConstWGSize,
|
||||
DP("Reduced threadsPerGroup to flat-attr-group-size limit %d\n",
|
||||
threadsPerGroup);
|
||||
}
|
||||
if (print_kernel_trace == 4)
|
||||
if (print_kernel_trace & STARTUP_DETAILS)
|
||||
fprintf(stderr, "threadsPerGroup: %d\n", threadsPerGroup);
|
||||
DP("Preparing %d threads\n", threadsPerGroup);
|
||||
|
||||
@ -1614,7 +1615,7 @@ void getLaunchVals(int &threadsPerGroup, int &num_groups, int ConstWGSize,
|
||||
num_groups = Max_Teams;
|
||||
DP("Set default num of groups %d\n", num_groups);
|
||||
|
||||
if (print_kernel_trace == 4) {
|
||||
if (print_kernel_trace & STARTUP_DETAILS) {
|
||||
fprintf(stderr, "num_groups: %d\n", num_groups);
|
||||
fprintf(stderr, "num_teams: %d\n", num_teams);
|
||||
}
|
||||
@ -1634,7 +1635,7 @@ void getLaunchVals(int &threadsPerGroup, int &num_groups, int ConstWGSize,
|
||||
if (num_teams > 0) {
|
||||
num_groups = (num_teams < num_groups) ? num_teams : num_groups;
|
||||
}
|
||||
if (print_kernel_trace == 4) {
|
||||
if (print_kernel_trace & STARTUP_DETAILS) {
|
||||
fprintf(stderr, "num_groups: %d\n", num_groups);
|
||||
fprintf(stderr, "DeviceInfo.EnvNumTeams %d\n", DeviceInfo.EnvNumTeams);
|
||||
fprintf(stderr, "DeviceInfo.EnvTeamLimit %d\n", DeviceInfo.EnvTeamLimit);
|
||||
@ -1667,13 +1668,13 @@ void getLaunchVals(int &threadsPerGroup, int &num_groups, int ConstWGSize,
|
||||
}
|
||||
if (num_groups > Max_Teams) {
|
||||
num_groups = Max_Teams;
|
||||
if (print_kernel_trace == 4)
|
||||
if (print_kernel_trace & STARTUP_DETAILS)
|
||||
fprintf(stderr, "Limiting num_groups %d to Max_Teams %d \n", num_groups,
|
||||
Max_Teams);
|
||||
}
|
||||
if (num_groups > num_teams && num_teams > 0) {
|
||||
num_groups = num_teams;
|
||||
if (print_kernel_trace == 4)
|
||||
if (print_kernel_trace & STARTUP_DETAILS)
|
||||
fprintf(stderr, "Limiting num_groups %d to clause num_teams %d \n",
|
||||
num_groups, num_teams);
|
||||
}
|
||||
@ -1687,7 +1688,7 @@ void getLaunchVals(int &threadsPerGroup, int &num_groups, int ConstWGSize,
|
||||
num_groups > DeviceInfo.EnvMaxTeamsDefault)
|
||||
num_groups = DeviceInfo.EnvMaxTeamsDefault;
|
||||
}
|
||||
if (print_kernel_trace == 4) {
|
||||
if (print_kernel_trace & STARTUP_DETAILS) {
|
||||
fprintf(stderr, "threadsPerGroup: %d\n", threadsPerGroup);
|
||||
fprintf(stderr, "num_groups: %d\n", num_groups);
|
||||
fprintf(stderr, "loop_tripcount: %ld\n", loop_tripcount);
|
||||
@ -1767,14 +1768,17 @@ int32_t __tgt_rtl_run_target_team_region_locked(
|
||||
loop_tripcount, // From run_region arg
|
||||
KernelInfo->device_id);
|
||||
|
||||
if (print_kernel_trace >= 1)
|
||||
if (print_kernel_trace >= LAUNCH) {
|
||||
// enum modes are SPMD, GENERIC, NONE 0,1,2
|
||||
fprintf(stderr,
|
||||
// if doing rtl timing, print to stderr, unless stdout requested.
|
||||
bool traceToStdout = print_kernel_trace & (RTL_TO_STDOUT | RTL_TIMING);
|
||||
fprintf(traceToStdout ? stdout : stderr,
|
||||
"DEVID:%2d SGN:%1d ConstWGSize:%-4d args:%2d teamsXthrds:(%4dX%4d) "
|
||||
"reqd:(%4dX%4d) n:%s\n",
|
||||
device_id, KernelInfo->ExecutionMode, KernelInfo->ConstWGSize,
|
||||
arg_num, num_groups, threadsPerGroup, num_teams, thread_limit,
|
||||
KernelInfo->Name);
|
||||
}
|
||||
|
||||
// Run on the device.
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user