From 01571ef1e9d58ad097c61226d2f81426d048e287 Mon Sep 17 00:00:00 2001 From: Gordon Henriksen Date: Sun, 24 Aug 2008 03:18:23 +0000 Subject: [PATCH] Update GC docs for clarified naming and AsmWriter refactoring. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55275 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/GarbageCollection.html | 159 +++++++++++++++++++++--------------- 1 file changed, 93 insertions(+), 66 deletions(-) diff --git a/docs/GarbageCollection.html b/docs/GarbageCollection.html index 91768f1e533..3909fb616bf 100644 --- a/docs/GarbageCollection.html +++ b/docs/GarbageCollection.html @@ -75,7 +75,7 @@
  • Generating safe points: NeededSafePoints
  • Emitting assembly code: - beginAssembly and finishAssembly
  • + GCMetadataPrinter @@ -205,7 +205,7 @@ garbage collector implementations in two manners:

    your compiler. For llc, use the -load option.
  • Selecting the collection algorithm by applying the gc "..." attribute to your garbage collected functions, or equivalently with - the setCollector method.
  • + the setGC method.
  • Linking your final executable with the garbage collector runtime.
  • @@ -352,8 +352,7 @@ specified by the runtime.

    The gc function attribute is used to specify the desired collector algorithm to the compiler. It is equivalent to specifying the collector name -programmatically using the setCollector method of -Function.

    +programmatically using the setGC method of Function.

    Specifying the collector on a per-function basis allows LLVM to link together programs that use different garbage collection algorithms.

    @@ -616,35 +615,39 @@ TODO
    -

    User code specifies which collector plugin to use with the gc -function attribute or, equivalently, with the setCollector method of +

    User code specifies which GC code generation to use with the gc +function attribute or, equivalently, with the setGC method of Function.

    -

    To implement a collector plugin, it is necessary to subclass -llvm::Collector, which can be accomplished in a few lines of +

    To implement a GC plugin, it is necessary to subclass +llvm::GCStrategy, which can be accomplished in a few lines of boilerplate code. LLVM's infrastructure provides access to several important algorithms. For an uncontroversial collector, all that remains may be to emit the assembly code for the collector's unique stack map data structure, which might be accomplished in as few as 100 LOC.

    -

    To subclass llvm::Collector and register a collector:

    +

    This is not the appropriate place to implement a garbage collected heap or a +garbage collector itself. That code should exist in the language's runtime +library. The compiler plugin is responsible for generating code which is +compatible with that runtime library.

    -
    // lib/MyGC/MyGC.cpp - Example LLVM collector plugin
    +

    To subclass llvm::GCStrategy and register it with the compiler:

    -#include "llvm/CodeGen/Collector.h" -#include "llvm/CodeGen/Collectors.h" -#include "llvm/CodeGen/CollectorMetadata.h" +
    // lib/MyGC/MyGC.cpp - Example LLVM GC plugin
    +
    +#include "llvm/CodeGen/GCStrategy.h"
    +#include "llvm/CodeGen/GCMetadata.h"
     #include "llvm/Support/Compiler.h"
     
     using namespace llvm;
     
     namespace {
    -  class VISIBILITY_HIDDEN MyCollector : public Collector {
    +  class VISIBILITY_HIDDEN MyGC : public GCStrategy {
       public:
    -    MyCollector() {}
    +    MyGC() {}
       };
       
    -  CollectorRegistry::Add<MyCollector>
    +  GCRegistry::Add<MyGC>
       X("mygc", "My bespoke garbage collector.");
     }
    @@ -986,21 +989,21 @@ interest.

    for (iterator I = begin(), E = end(); I != E; ++I) {
    -  CollectorMetadata *MD = *I;
    -  unsigned FrameSize = MD->getFrameSize();
    -  size_t RootCount = MD->roots_size();
    +  GCFunctionInfo *FI = *I;
    +  unsigned FrameSize = FI->getFrameSize();
    +  size_t RootCount = FI->roots_size();
     
    -  for (CollectorMetadata::roots_iterator RI = MD->roots_begin(),
    -                                         RE = MD->roots_end();
    -                                         RI != RE; ++RI) {
    +  for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
    +                                      RE = FI->roots_end();
    +                                      RI != RE; ++RI) {
         int RootNum = RI->Num;
         int RootStackOffset = RI->StackOffset;
         Constant *RootMetadata = RI->Metadata;
       }
     }
    -

    LLVM automatically computes a stack map. All a Collector needs to do -is access it using CollectorMetadata::roots_begin() and +

    LLVM automatically computes a stack map. All a GCStrategy needs to do +is access it using GCFunctionMetadata::roots_begin() and -end(). If the llvm.gcroot intrinsic is eliminated before code generation by a custom lowering pass, LLVM's stack map will be empty.

    @@ -1015,19 +1018,19 @@ generation by a custom lowering pass, LLVM's stack map will be empty.

    MyCollector::MyCollector() {
    +>MyGC::MyGC() {
       InitRoots = true;
     }

    When set, LLVM will automatically initialize each root to null upon -entry to the function. This prevents the reachability analysis from finding -uninitialized values in stack roots at runtime, which will almost certainly -cause it to segfault. This initialization occurs before custom lowering, so the -two may be used together.

    +entry to the function. This prevents the GC's sweep phase from visiting +uninitialized pointers, which will almost certainly cause it to crash. This +initialization occurs before custom lowering, so the two may be used +together.

    -

    Since LLVM does not yet compute liveness information, this feature should be -used by all collectors which do not custom lower llvm.gcroot, and even -some that do.

    +

    Since LLVM does not yet compute liveness information, there is no means of +distinguishing an uninitialized stack root from an initialized one. Therefore, +this feature should be used by all GC plugins. It is enabled by default.

    @@ -1040,14 +1043,14 @@ some that do.

    -

    For collectors with barriers or unusual treatment of stack roots, these -flags allow the collector to perform any required transformation on the LLVM +

    For GCs which use barriers or unusual treatment of stack roots, these +flags allow the collector to perform arbitrary transformations of the LLVM IR:

    class MyCollector : public Collector {
    +>class MyGC : public GCStrategy {
     public:
    -  MyCollector() {
    +  MyGC() {
         CustomRoots = true;
         CustomReadBarriers = true;
         CustomWriteBarriers = true;
    @@ -1058,8 +1061,8 @@ public:
     };

    If any of these flags are set, then LLVM suppresses its default lowering for -the corresponding intrinsics and instead passes them on to a custom lowering -pass specified by the collector.

    +the corresponding intrinsics and instead calls +performCustomLowering.

    LLVM's default action for each intrinsic is as follows:

    @@ -1074,11 +1077,12 @@ pass specified by the collector.

    then performCustomLowering must eliminate the corresponding barriers.

    -

    performCustomLowering, must comply with the same restrictions as runOnFunction, and -that initializeCustomLowering has the same semantics as doInitialization(Module -&).

    +

    performCustomLowering must comply with the same restrictions as FunctionPass::runOnFunction. +Likewise, initializeCustomLowering has the same semantics as Pass::doInitialization(Module&).

    The following can be used as a template:

    @@ -1086,11 +1090,11 @@ href="WritingAnLLVMPass.html#doInitialization_mod">doInitialization(Module >#include "llvm/Module.h" #include "llvm/IntrinsicInst.h" -bool MyCollector::initializeCustomLowering(Module &M) { +bool MyGC::initializeCustomLowering(Module &M) { return false; } -bool MyCollector::performCustomLowering(Function &F) { +bool MyGC::performCustomLowering(Function &F) { bool MadeChange = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) @@ -1146,7 +1150,7 @@ bool MyCollector::performCustomLowering(Function &F) { NeededSafePoints mask:

    MyCollector::MyCollector() {
    +>MyGC::MyGC() {
       NeededSafePoints = 1 << GC::Loop
                        | 1 << GC::Return
                        | 1 << GC::PreCall
    @@ -1157,11 +1161,11 @@ bool MyCollector::performCustomLowering(Function &F) {
     
     
    for (iterator I = begin(), E = end(); I != E; ++I) {
    -  CollectorMetadata *MD = *I;
    +  GCFunctionInfo *MD = *I;
       size_t PointCount = MD->size();
     
    -  for (CollectorMetadata::iterator PI = MD->begin(),
    -                                   PE = MD->end(); PI != PE; ++PI) {
    +  for (GCFunctionInfo::iterator PI = MD->begin(),
    +                                PE = MD->end(); PI != PE; ++PI) {
         GC::PointKind PointKind = PI->Kind;
         unsigned PointNum = PI->Num;
       }
    @@ -1187,33 +1191,56 @@ safe point (because only the topmost function has been patched).

    -

    LLVM allows a collector to print arbitrary assembly code before and after -the rest of a module's assembly code. From the latter callback, the collector -can print stack maps built by the code generator.

    +

    LLVM allows a GC to print arbitrary assembly code before and after the rest +of a module's assembly code. At the end of the module, the GC can print stack +maps built by the code generator. (At the beginning, this information is not +yet computed.)

    + +

    Since AsmWriter and CodeGen are separate components of LLVM, a separate +abstract base class and registry is provided for printing assembly code, the +GCMetadaPrinter and GCMetadaPrinterRegistry. The AsmWriter +will look for such a subclass if the GCStrategy sets +UsesMetadata:

    + +
    MyGC::MyGC() {
    +  UsesMetadata = true;
    +}

    Note that LLVM does not currently have analogous APIs to support code generation in the JIT, nor using the object writers.

    class MyCollector : public Collector {
    -public:
    -  virtual void beginAssembly(std::ostream &OS, AsmPrinter &AP,
    -                             const TargetAsmInfo &TAI);
    +>// lib/MyGC/MyGCPrinter.cpp - Example LLVM GC printer
     
    -  virtual void finishAssembly(std::ostream &OS, AsmPrinter &AP,
    -                              const TargetAsmInfo &TAI);
    +#include "llvm/CodeGen/GCMetadataPrinter.h"
    +#include "llvm/Support/Compiler.h"
    +
    +using namespace llvm;
    +
    +namespace {
    +  class VISIBILITY_HIDDEN MyGCPrinter : public GCMetadataPrinter {
    +  public:
    +    virtual void beginAssembly(std::ostream &OS, AsmPrinter &AP,
    +                               const TargetAsmInfo &TAI);
    +  
    +    virtual void finishAssembly(std::ostream &OS, AsmPrinter &AP,
    +                                const TargetAsmInfo &TAI);
    +  };
    +  
    +  GCMetadataPrinterRegistry::Add<MyGCPrinter>
    +  X("mygc", "My bespoke garbage collector.");
     }

    The collector should use AsmPrinter and TargetAsmInfo to print portable assembly code to the std::ostream. The collector itself contains the stack map for the entire module, and may access the -CollectorMetadata using its own begin() and end() +GCFunctionInfo using its own begin() and end() methods. Here's a realistic example:

     #include "llvm/Target/TargetData.h"
     #include "llvm/Target/TargetAsmInfo.h"
     
    -void MyCollector::beginAssembly(std::ostream &OS, AsmPrinter &AP,
    +void MyGCPrinter::beginAssembly(std::ostream &OS, AsmPrinter &AP,
                                     const TargetAsmInfo &TAI) {
       // Nothing to do.
     }
     
    -void MyCollector::finishAssembly(std::ostream &OS, AsmPrinter &AP,
    +void MyGCPrinter::finishAssembly(std::ostream &OS, AsmPrinter &AP,
                                      const TargetAsmInfo &TAI) {
       // Set up for emitting addresses.
       const char *AddressDirective;
    @@ -1246,7 +1273,7 @@ void MyCollector::finishAssembly(std::ostream &OS, AsmPrinter &AP,
       
       // For each function...
       for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
    -    CollectorMetadata &MD = **FI;
    +    GCFunctionInfo &MD = **FI;
         
         // Emit this data structure:
         // 
    @@ -1276,7 +1303,7 @@ void MyCollector::finishAssembly(std::ostream &OS, AsmPrinter &AP,
         AP.EOL("safe point count");
         
         // And each safe point...
    -    for (CollectorMetadata::iterator PI = MD.begin(),
    +    for (GCFunctionInfo::iterator PI = MD.begin(),
                                          PE = MD.end(); PI != PE; ++PI) {
           // Align to address width.
           AP.EmitAlignment(AddressAlignLog);
    @@ -1295,7 +1322,7 @@ void MyCollector::finishAssembly(std::ostream &OS, AsmPrinter &AP,
           AP.EOL("live root count");
           
           // And for each live root...
    -      for (CollectorMetadata::live_iterator LI = MD.live_begin(PI),
    +      for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
                                                 LE = MD.live_end(PI);
                                                 LI != LE; ++LI) {
             // Print its offset within the stack frame.