From addd0733250af38da0ff5503d65250156a9c179a Mon Sep 17 00:00:00 2001 From: Wael Yehia Date: Sat, 5 Feb 2022 00:08:21 -0500 Subject: [PATCH] [AIX][PowerPC][PGO] Generate .ref for some PGO sections For PGO on AIX, when we switch to the linux-style PGO variable access (via _start and _stop labels), we need the compiler to generate a .ref assembly for each of the three csects: - __llvm_prf_data[RW] - __llvm_prf_names[RO] - __llvm_prf_vnds[RW] We insert the .ref inside the __llvm_prf_cnts[RW] csect so that if it's live then the 3 csects are live. For example, for a testcase with at least one function definition, when compiled with -fprofile-generate we should generate: .csect __llvm_prf_cnts[RW],3 .ref __llvm_prf_data[RW] <<============ needs to be inserted .ref __llvm_prf_names[RO] <<=========== the __llvm_prf_vnds is not always present, so we reference it only when it's present. Reviewed By: sfertile, daltenty Differential Revision: https://reviews.llvm.org/D116607 --- llvm/include/llvm/MC/MCContext.h | 3 + llvm/include/llvm/MC/MCStreamer.h | 6 ++ llvm/include/llvm/MC/MCXCOFFStreamer.h | 4 + llvm/lib/MC/MCAsmStreamer.cpp | 7 ++ llvm/lib/MC/MCContext.cpp | 6 ++ llvm/lib/MC/MCStreamer.cpp | 4 + llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 29 +++++++ .../test/CodeGen/PowerPC/pgo-ref-directive.ll | 82 +++++++++++++++++++ 8 files changed, 141 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/pgo-ref-directive.ll diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index d2307d692278..96a4024ff8d8 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -650,6 +650,9 @@ namespace llvm { unsigned Flags, const MCSymbolWasm *Group, unsigned UniqueID, const char *BeginSymName); + bool hasXCOFFSection(StringRef Section, + XCOFF::CsectProperties CsectProp) const; + MCSectionXCOFF *getXCOFFSection( StringRef Section, SectionKind K, Optional CsectProp = None, diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index 3d6c512bfe73..3be890cc6fea 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -615,6 +615,12 @@ public: /// changed at the end of assembly. virtual void emitXCOFFRenameDirective(const MCSymbol *Name, StringRef Rename); + /// Emit a XCOFF .ref directive which creates R_REF type entry in the + /// relocation table for one or more symbols. + /// + /// \param Sym - The symbol on the .ref directive. + virtual void emitXCOFFRefDirective(StringRef Sym); + /// Emit an ELF .size directive. /// /// This corresponds to an assembler statement such as: diff --git a/llvm/include/llvm/MC/MCXCOFFStreamer.h b/llvm/include/llvm/MC/MCXCOFFStreamer.h index 5fc2efbe5284..3faa03fa69e9 100644 --- a/llvm/include/llvm/MC/MCXCOFFStreamer.h +++ b/llvm/include/llvm/MC/MCXCOFFStreamer.h @@ -32,6 +32,10 @@ public: void emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol, MCSymbolAttr Linkage, MCSymbolAttr Visibility) override; + void emitXCOFFRefDirective(StringRef Name) override { + report_fatal_error("emitXCOFFRefDirective is not implemented yet on object" + "generation path"); + } void emitXCOFFRenameDirective(const MCSymbol *Name, StringRef Rename) override { report_fatal_error("emitXCOFFRenameDirective is not implemented yet on " diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 61ec941f50b8..3ca1813a9093 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -198,6 +198,8 @@ public: void emitXCOFFRenameDirective(const MCSymbol *Name, StringRef Rename) override; + void emitXCOFFRefDirective(StringRef Name) override; + void emitELFSize(MCSymbol *Symbol, const MCExpr *Value) override; void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; @@ -931,6 +933,11 @@ void MCAsmStreamer::emitXCOFFRenameDirective(const MCSymbol *Name, EmitEOL(); } +void MCAsmStreamer::emitXCOFFRefDirective(StringRef Name) { + OS << "\t.ref " << Name; + EmitEOL(); +} + void MCAsmStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) { assert(MAI->hasDotTypeDotSizeDirective()); OS << "\t.size\t"; diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index eafcee1e0607..bbbbfe5731b5 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -732,6 +732,12 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind, return Result; } +bool MCContext::hasXCOFFSection(StringRef Section, + XCOFF::CsectProperties CsectProp) const { + return XCOFFUniquingMap.count( + XCOFFSectionKey(Section.str(), CsectProp.MappingClass)) != 0; +} + MCSectionXCOFF *MCContext::getXCOFFSection( StringRef Section, SectionKind Kind, Optional CsectProp, bool MultiSymbolsAllowed, diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index a14f0de65a9d..8608d7a90fec 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -1180,6 +1180,10 @@ void MCStreamer::emitXCOFFRenameDirective(const MCSymbol *Name, "XCOFF targets"); } +void MCStreamer::emitXCOFFRefDirective(StringRef Name) { + llvm_unreachable("emitXCOFFRefDirective is only supported on XCOFF targets"); +} + void MCStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} void MCStreamer::emitELFSymverDirective(const MCSymbol *OriginalSym, StringRef Name, bool KeepOriginalSym) {} diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 780981806996..387fa027054a 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -254,6 +254,8 @@ public: void emitFunctionBodyEnd() override; + void emitPGORefs(); + void emitEndOfAsmFile(Module &) override; void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const override; @@ -2475,12 +2477,39 @@ void PPCAIXAsmPrinter::emitFunctionEntryLabel() { }); } +void PPCAIXAsmPrinter::emitPGORefs() { + if (OutContext.hasXCOFFSection( + "__llvm_prf_cnts", + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) { + MCSection *CntsSection = OutContext.getXCOFFSection( + "__llvm_prf_cnts", SectionKind::getData(), + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD), + /*MultiSymbolsAllowed*/ true); + + OutStreamer->SwitchSection(CntsSection); + if (OutContext.hasXCOFFSection( + "__llvm_prf_data", + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) + OutStreamer->emitXCOFFRefDirective("__llvm_prf_data[RW]"); + if (OutContext.hasXCOFFSection( + "__llvm_prf_names", + XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD))) + OutStreamer->emitXCOFFRefDirective("__llvm_prf_names[RO]"); + if (OutContext.hasXCOFFSection( + "__llvm_prf_vnds", + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) + OutStreamer->emitXCOFFRefDirective("__llvm_prf_vnds[RW]"); + } +} + void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { // If there are no functions and there are no toc-data definitions in this // module, we will never need to reference the TOC base. if (M.empty() && TOCDataGlobalVars.empty()) return; + emitPGORefs(); + // Switch to section to emit TOC base. OutStreamer->SwitchSection(getObjFileLowering().getTOCBaseSection()); diff --git a/llvm/test/CodeGen/PowerPC/pgo-ref-directive.ll b/llvm/test/CodeGen/PowerPC/pgo-ref-directive.ll new file mode 100644 index 000000000000..92deedf4a026 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pgo-ref-directive.ll @@ -0,0 +1,82 @@ +; RUN: rm -rf %t && split-file %s %t + +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff -xcoff-traceback-table=false < %t/no-ref.ll | FileCheck %s --check-prefixes=NOREF +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff -xcoff-traceback-table=false < %t/no-vnds.ll | FileCheck %s --check-prefixes=NOVNDS +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff -xcoff-traceback-table=false < %t/with-vnds.ll | FileCheck %s --check-prefixes=WITHVNDS + + +;--- no-ref.ll +; The absence of a __llvm_prf_cnts section should stop generating the .refs. +; +target datalayout = "E-m:a-p:32:32-i64:64-n32" +target triple = "powerpc-ibm-aix7.2.0.0" + +@__profd_main = private global i64 zeroinitializer, section "__llvm_prf_data", align 8 +@__llvm_prf_nm = private constant [6 x i8] c"\04\00main", section "__llvm_prf_names", align 1 + +@llvm.used = appending global [2 x i8*] + [i8* bitcast (i64* @__profd_main to i8*), + i8* getelementptr inbounds ([6 x i8], [6 x i8]* @__llvm_prf_nm, i32 0, i32 0)], section "llvm.metadata" + +define i32 @main() #0 { +entry: + ret i32 1 +} + +; NOREF-NOT: .ref __llvm_prf_data +; NOREF-NOT: .ref __llvm_prf_names +; NOREF-NOT: .ref __llvm_prf_vnds + +;--- no-vnds.ll +; This is the most common case. When -fprofile-generate is used and there exists executable code, we generate the __llvm_prf_cnts, __llvm_prf_data, and __llvm_prf_names sections. +; +target datalayout = "E-m:a-p:32:32-i64:64-n32" +target triple = "powerpc-ibm-aix7.2.0.0" + +@__profc_main = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8 +@__profd_main = private global i64 zeroinitializer, section "__llvm_prf_data", align 8 +@__llvm_prf_nm = private constant [6 x i8] c"\04\00main", section "__llvm_prf_names", align 1 + +@llvm.used = appending global [3 x i8*] + [i8* bitcast ([1 x i64]* @__profc_main to i8*), + i8* bitcast (i64* @__profd_main to i8*), + i8* getelementptr inbounds ([6 x i8], [6 x i8]* @__llvm_prf_nm, i32 0, i32 0)], section "llvm.metadata" + +define i32 @main() #0 { +entry: + ret i32 1 +} +; There will be two __llvm_prf_cnts .csects, one to represent the actual csect +; that holds @__profc_main, and one generated to hold the .ref directives. In +; XCOFF, a csect can be defined in pieces, so this is is legal assembly. +; +; NOVNDS: .csect __llvm_prf_cnts[RW],3 +; NOVNDS: .csect __llvm_prf_cnts[RW],3 +; NOVNDS-NEXT: .ref __llvm_prf_data[RW] +; NOVNDS-NEXT: .ref __llvm_prf_names[RO] +; NOVNDS-NOT: .ref __llvm_prf_vnds + +;--- with-vnds.ll +; When value profiling is needed, the PGO instrumentation generates variables in the __llvm_prf_vnds section, so we generate a .ref for them too. +; +@__profc_main = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8 +@__profd_main = private global i64 zeroinitializer, section "__llvm_prf_data", align 8 +@__llvm_prf_nm = private constant [6 x i8] c"\04\00main", section "__llvm_prf_names", align 1 +@__llvm_prf_vnodes = private global [10 x { i64, i64, i8* }] zeroinitializer, section "__llvm_prf_vnds" + +@llvm.used = appending global [4 x i8*] + [i8* bitcast ([1 x i64]* @__profc_main to i8*), + i8* bitcast (i64* @__profd_main to i8*), + i8* getelementptr inbounds ([6 x i8], [6 x i8]* @__llvm_prf_nm, i32 0, i32 0), + i8* bitcast ([10 x { i64, i64, i8* }]* @__llvm_prf_vnodes to i8*)], section "llvm.metadata" + +define i32 @main() #0 { +entry: + ret i32 1 +} + +; WITHVNDS: .csect __llvm_prf_cnts[RW],3 +; WITHVNDS: .csect __llvm_prf_cnts[RW],3 +; WITHVNDS-NEXT: .ref __llvm_prf_data[RW] +; WITHVNDS-NEXT: .ref __llvm_prf_names[RO] +; WITHVNDS-NEXT: .ref __llvm_prf_vnds[RW]