//===-- AsmWriter.cpp - Printing LLVM as an assembly file -----------------===// // // This library implements the functionality defined in llvm/Assembly/Writer.h // // Note that these routines must be extremely tolerant of various errors in the // LLVM code, because it can be used for debugging transformations. // //===----------------------------------------------------------------------===// #include "llvm/Assembly/CachedWriter.h" #include "llvm/Assembly/Writer.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/SlotCalculator.h" #include "llvm/DerivedTypes.h" #include "llvm/Instruction.h" #include "llvm/Module.h" #include "llvm/Constants.h" #include "llvm/iMemory.h" #include "llvm/iTerminators.h" #include "llvm/iPHINode.h" #include "llvm/iOther.h" #include "llvm/SymbolTable.h" #include "llvm/Support/CFG.h" #include "Support/StringExtras.h" #include "Support/STLExtras.h" #include static RegisterPass X("printm", "Print module to stderr",PassInfo::Analysis|PassInfo::Optimization); static RegisterPass Y("print","Print function to stderr",PassInfo::Analysis|PassInfo::Optimization); static void WriteAsOperandInternal(std::ostream &Out, const Value *V, bool PrintName, std::map &TypeTable, SlotCalculator *Table); static const Module *getModuleFromVal(const Value *V) { if (const Argument *MA = dyn_cast(V)) return MA->getParent() ? MA->getParent()->getParent() : 0; else if (const BasicBlock *BB = dyn_cast(V)) return BB->getParent() ? BB->getParent()->getParent() : 0; else if (const Instruction *I = dyn_cast(V)) { const Function *M = I->getParent() ? I->getParent()->getParent() : 0; return M ? M->getParent() : 0; } else if (const GlobalValue *GV = dyn_cast(V)) return GV->getParent(); return 0; } static SlotCalculator *createSlotCalculator(const Value *V) { assert(!isa(V) && "Can't create an SC for a type!"); if (const Argument *FA = dyn_cast(V)) { return new SlotCalculator(FA->getParent(), true); } else if (const Instruction *I = dyn_cast(V)) { return new SlotCalculator(I->getParent()->getParent(), true); } else if (const BasicBlock *BB = dyn_cast(V)) { return new SlotCalculator(BB->getParent(), true); } else if (const GlobalVariable *GV = dyn_cast(V)){ return new SlotCalculator(GV->getParent(), true); } else if (const Function *Func = dyn_cast(V)) { return new SlotCalculator(Func, true); } return 0; } // getLLVMName - Turn the specified string into an 'LLVM name', which is either // prefixed with % (if the string only contains simple characters) or is // surrounded with ""'s (if it has special chars in it). static std::string getLLVMName(const std::string &Name) { assert(!Name.empty() && "Cannot get empty name!"); // First character cannot start with a number... if (Name[0] >= '0' && Name[0] <= '9') return "\"" + Name + "\""; // Scan to see if we have any characters that are not on the "white list" for (unsigned i = 0, e = Name.size(); i != e; ++i) { char C = Name[i]; assert(C != '"' && "Illegal character in LLVM value name!"); if ((C < 'a' || C > 'z') && (C < 'A' || C > 'Z') && (C < '0' || C > '9') && C != '-' && C != '.' && C != '_') return "\"" + Name + "\""; } // If we get here, then the identifier is legal to use as a "VarID". return "%"+Name; } // If the module has a symbol table, take all global types and stuff their // names into the TypeNames map. // static void fillTypeNameTable(const Module *M, std::map &TypeNames) { if (!M) return; const SymbolTable &ST = M->getSymbolTable(); SymbolTable::const_iterator PI = ST.find(Type::TypeTy); if (PI != ST.end()) { SymbolTable::type_const_iterator I = PI->second.begin(); for (; I != PI->second.end(); ++I) { // As a heuristic, don't insert pointer to primitive types, because // they are used too often to have a single useful name. // const Type *Ty = cast(I->second); if (!isa(Ty) || !cast(Ty)->getElementType()->isPrimitiveType()) TypeNames.insert(std::make_pair(Ty, getLLVMName(I->first))); } } } static std::string calcTypeName(const Type *Ty, std::vector &TypeStack, std::map &TypeNames){ if (Ty->isPrimitiveType()) return Ty->getDescription(); // Base case // Check to see if the type is named. std::map::iterator I = TypeNames.find(Ty); if (I != TypeNames.end()) return I->second; // Check to see if the Type is already on the stack... unsigned Slot = 0, CurSize = TypeStack.size(); while (Slot < CurSize && TypeStack[Slot] != Ty) ++Slot; // Scan for type // This is another base case for the recursion. In this case, we know // that we have looped back to a type that we have previously visited. // Generate the appropriate upreference to handle this. // if (Slot < CurSize) return "\\" + utostr(CurSize-Slot); // Here's the upreference TypeStack.push_back(Ty); // Recursive case: Add us to the stack.. std::string Result; switch (Ty->getPrimitiveID()) { case Type::FunctionTyID: { const FunctionType *FTy = cast(Ty); Result = calcTypeName(FTy->getReturnType(), TypeStack, TypeNames) + " ("; for (FunctionType::ParamTypes::const_iterator I = FTy->getParamTypes().begin(), E = FTy->getParamTypes().end(); I != E; ++I) { if (I != FTy->getParamTypes().begin()) Result += ", "; Result += calcTypeName(*I, TypeStack, TypeNames); } if (FTy->isVarArg()) { if (!FTy->getParamTypes().empty()) Result += ", "; Result += "..."; } Result += ")"; break; } case Type::StructTyID: { const StructType *STy = cast(Ty); Result = "{ "; for (StructType::ElementTypes::const_iterator I = STy->getElementTypes().begin(), E = STy->getElementTypes().end(); I != E; ++I) { if (I != STy->getElementTypes().begin()) Result += ", "; Result += calcTypeName(*I, TypeStack, TypeNames); } Result += " }"; break; } case Type::PointerTyID: Result = calcTypeName(cast(Ty)->getElementType(), TypeStack, TypeNames) + "*"; break; case Type::ArrayTyID: { const ArrayType *ATy = cast(Ty); Result = "[" + utostr(ATy->getNumElements()) + " x "; Result += calcTypeName(ATy->getElementType(), TypeStack, TypeNames) + "]"; break; } case Type::OpaqueTyID: Result = "opaque"; break; default: Result = ""; } TypeStack.pop_back(); // Remove self from stack... return Result; } // printTypeInt - The internal guts of printing out a type that has a // potentially named portion. // static std::ostream &printTypeInt(std::ostream &Out, const Type *Ty, std::map &TypeNames) { // Primitive types always print out their description, regardless of whether // they have been named or not. // if (Ty->isPrimitiveType()) return Out << Ty->getDescription(); // Check to see if the type is named. std::map::iterator I = TypeNames.find(Ty); if (I != TypeNames.end()) return Out << I->second; // Otherwise we have a type that has not been named but is a derived type. // Carefully recurse the type hierarchy to print out any contained symbolic // names. // std::vector TypeStack; std::string TypeName = calcTypeName(Ty, TypeStack, TypeNames); TypeNames.insert(std::make_pair(Ty, TypeName));//Cache type name for later use return Out << TypeName; } // WriteTypeSymbolic - This attempts to write the specified type as a symbolic // type, iff there is an entry in the modules symbol table for the specified // type or one of it's component types. This is slower than a simple x << Type; // std::ostream &WriteTypeSymbolic(std::ostream &Out, const Type *Ty, const Module *M) { Out << " "; // If they want us to print out a type, attempt to make it symbolic if there // is a symbol table in the module... if (M) { std::map TypeNames; fillTypeNameTable(M, TypeNames); return printTypeInt(Out, Ty, TypeNames); } else { return Out << Ty->getDescription(); } } static void WriteConstantInt(std::ostream &Out, const Constant *CV, bool PrintName, std::map &TypeTable, SlotCalculator *Table) { if (const ConstantBool *CB = dyn_cast(CV)) { Out << (CB == ConstantBool::True ? "true" : "false"); } else if (const ConstantSInt *CI = dyn_cast(CV)) { Out << CI->getValue(); } else if (const ConstantUInt *CI = dyn_cast(CV)) { Out << CI->getValue(); } else if (const ConstantFP *CFP = dyn_cast(CV)) { // We would like to output the FP constant value in exponential notation, // but we cannot do this if doing so will lose precision. Check here to // make sure that we only output it in exponential format if we can parse // the value back and get the same value. // std::string StrVal = ftostr(CFP->getValue()); // Check to make sure that the stringized number is not some string like // "Inf" or NaN, that atof will accept, but the lexer will not. Check that // the string matches the "[-+]?[0-9]" regex. // if ((StrVal[0] >= '0' && StrVal[0] <= '9') || ((StrVal[0] == '-' || StrVal[0] == '+') && (StrVal[1] >= '0' && StrVal[1] <= '9'))) // Reparse stringized version! if (atof(StrVal.c_str()) == CFP->getValue()) { Out << StrVal; return; } // Otherwise we could not reparse it to exactly the same value, so we must // output the string in hexadecimal format! // // Behave nicely in the face of C TBAA rules... see: // http://www.nullstone.com/htmls/category/aliastyp.htm // double Val = CFP->getValue(); char *Ptr = (char*)&Val; assert(sizeof(double) == sizeof(uint64_t) && sizeof(double) == 8 && "assuming that double is 64 bits!"); Out << "0x" << utohexstr(*(uint64_t*)Ptr); } else if (const ConstantArray *CA = dyn_cast(CV)) { if (CA->getNumOperands() > 5 && CA->isNullValue()) { Out << "zeroinitializer"; return; } // As a special case, print the array as a string if it is an array of // ubytes or an array of sbytes with positive values. // const Type *ETy = CA->getType()->getElementType(); bool isString = (ETy == Type::SByteTy || ETy == Type::UByteTy); if (ETy == Type::SByteTy) for (unsigned i = 0; i < CA->getNumOperands(); ++i) if (cast(CA->getOperand(i))->getValue() < 0) { isString = false; break; } if (isString) { Out << "c\""; for (unsigned i = 0; i < CA->getNumOperands(); ++i) { unsigned char C = cast(CA->getOperand(i))->getRawValue(); if (isprint(C) && C != '"' && C != '\\') { Out << C; } else { Out << '\\' << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')) << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); } } Out << "\""; } else { // Cannot output in string format... Out << "["; if (CA->getNumOperands()) { Out << " "; printTypeInt(Out, ETy, TypeTable); WriteAsOperandInternal(Out, CA->getOperand(0), PrintName, TypeTable, Table); for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) { Out << ", "; printTypeInt(Out, ETy, TypeTable); WriteAsOperandInternal(Out, CA->getOperand(i), PrintName, TypeTable, Table); } } Out << " ]"; } } else if (const ConstantStruct *CS = dyn_cast(CV)) { if (CS->getNumOperands() > 5 && CS->isNullValue()) { Out << "zeroinitializer"; return; } Out << "{"; if (CS->getNumOperands()) { Out << " "; printTypeInt(Out, CS->getOperand(0)->getType(), TypeTable); WriteAsOperandInternal(Out, CS->getOperand(0), PrintName, TypeTable, Table); for (unsigned i = 1; i < CS->getNumOperands(); i++) { Out << ", "; printTypeInt(Out, CS->getOperand(i)->getType(), TypeTable); WriteAsOperandInternal(Out, CS->getOperand(i), PrintName, TypeTable, Table); } } Out << " }"; } else if (isa(CV)) { Out << "null"; } else if (const ConstantPointerRef *PR = dyn_cast(CV)) { const GlobalValue *V = PR->getValue(); if (V->hasName()) { Out << getLLVMName(V->getName()); } else if (Table) { int Slot = Table->getSlot(V); if (Slot >= 0) Out << "%" << Slot; else Out << ""; } else { Out << ""; } } else if (const ConstantExpr *CE = dyn_cast(CV)) { Out << CE->getOpcodeName() << " ("; for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) { printTypeInt(Out, (*OI)->getType(), TypeTable); WriteAsOperandInternal(Out, *OI, PrintName, TypeTable, Table); if (OI+1 != CE->op_end()) Out << ", "; } if (CE->getOpcode() == Instruction::Cast) { Out << " to "; printTypeInt(Out, CE->getType(), TypeTable); } Out << ")"; } else { Out << ""; } } // WriteAsOperand - Write the name of the specified value out to the specified // ostream. This can be useful when you just want to print int %reg126, not the // whole instruction that generated it. // static void WriteAsOperandInternal(std::ostream &Out, const Value *V, bool PrintName, std::map &TypeTable, SlotCalculator *Table) { Out << " "; if (PrintName && V->hasName()) { Out << getLLVMName(V->getName()); } else { if (const Constant *CV = dyn_cast(V)) { WriteConstantInt(Out, CV, PrintName, TypeTable, Table); } else { int Slot; if (Table) { Slot = Table->getSlot(V); } else { if (const Type *Ty = dyn_cast(V)) { Out << Ty->getDescription(); return; } Table = createSlotCalculator(V); if (Table == 0) { Out << "BAD VALUE TYPE!"; return; } Slot = Table->getSlot(V); delete Table; } if (Slot >= 0) Out << "%" << Slot; else if (PrintName) Out << ""; // Not embedded into a location? } } } // WriteAsOperand - Write the name of the specified value out to the specified // ostream. This can be useful when you just want to print int %reg126, not the // whole instruction that generated it. // std::ostream &WriteAsOperand(std::ostream &Out, const Value *V, bool PrintType, bool PrintName, const Module *Context) { std::map TypeNames; if (Context == 0) Context = getModuleFromVal(V); if (Context) fillTypeNameTable(Context, TypeNames); if (PrintType) printTypeInt(Out, V->getType(), TypeNames); WriteAsOperandInternal(Out, V, PrintName, TypeNames, 0); return Out; } class AssemblyWriter { std::ostream &Out; SlotCalculator &Table; const Module *TheModule; std::map TypeNames; public: inline AssemblyWriter(std::ostream &o, SlotCalculator &Tab, const Module *M) : Out(o), Table(Tab), TheModule(M) { // If the module has a symbol table, take all global types and stuff their // names into the TypeNames map. // fillTypeNameTable(M, TypeNames); } inline void write(const Module *M) { printModule(M); } inline void write(const GlobalVariable *G) { printGlobal(G); } inline void write(const Function *F) { printFunction(F); } inline void write(const BasicBlock *BB) { printBasicBlock(BB); } inline void write(const Instruction *I) { printInstruction(*I); } inline void write(const Constant *CPV) { printConstant(CPV); } inline void write(const Type *Ty) { printType(Ty); } void writeOperand(const Value *Op, bool PrintType, bool PrintName = true); private : void printModule(const Module *M); void printSymbolTable(const SymbolTable &ST); void printConstant(const Constant *CPV); void printGlobal(const GlobalVariable *GV); void printFunction(const Function *F); void printArgument(const Argument *FA); void printBasicBlock(const BasicBlock *BB); void printInstruction(const Instruction &I); // printType - Go to extreme measures to attempt to print out a short, // symbolic version of a type name. // std::ostream &printType(const Type *Ty) { return printTypeInt(Out, Ty, TypeNames); } // printTypeAtLeastOneLevel - Print out one level of the possibly complex type // without considering any symbolic types that we may have equal to it. // std::ostream &printTypeAtLeastOneLevel(const Type *Ty); // printInfoComment - Print a little comment after the instruction indicating // which slot it occupies. void printInfoComment(const Value &V); }; // printTypeAtLeastOneLevel - Print out one level of the possibly complex type // without considering any symbolic types that we may have equal to it. // std::ostream &AssemblyWriter::printTypeAtLeastOneLevel(const Type *Ty) { if (const FunctionType *FTy = dyn_cast(Ty)) { printType(FTy->getReturnType()) << " ("; for (FunctionType::ParamTypes::const_iterator I = FTy->getParamTypes().begin(), E = FTy->getParamTypes().end(); I != E; ++I) { if (I != FTy->getParamTypes().begin()) Out << ", "; printType(*I); } if (FTy->isVarArg()) { if (!FTy->getParamTypes().empty()) Out << ", "; Out << "..."; } Out << ")"; } else if (const StructType *STy = dyn_cast(Ty)) { Out << "{ "; for (StructType::ElementTypes::const_iterator I = STy->getElementTypes().begin(), E = STy->getElementTypes().end(); I != E; ++I) { if (I != STy->getElementTypes().begin()) Out << ", "; printType(*I); } Out << " }"; } else if (const PointerType *PTy = dyn_cast(Ty)) { printType(PTy->getElementType()) << "*"; } else if (const ArrayType *ATy = dyn_cast(Ty)) { Out << "[" << ATy->getNumElements() << " x "; printType(ATy->getElementType()) << "]"; } else if (const OpaqueType *OTy = dyn_cast(Ty)) { Out << "opaque"; } else { if (!Ty->isPrimitiveType()) Out << ""; printType(Ty); } return Out; } void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType, bool PrintName) { if (PrintType) { Out << " "; printType(Operand->getType()); } WriteAsOperandInternal(Out, Operand, PrintName, TypeNames, &Table); } void AssemblyWriter::printModule(const Module *M) { switch (M->getEndianness()) { case Module::LittleEndian: Out << "target endian = little\n"; break; case Module::BigEndian: Out << "target endian = big\n"; break; case Module::AnyEndianness: break; } switch (M->getPointerSize()) { case Module::Pointer32: Out << "target pointersize = 32\n"; break; case Module::Pointer64: Out << "target pointersize = 64\n"; break; case Module::AnyPointerSize: break; } // Loop over the symbol table, emitting all named constants... printSymbolTable(M->getSymbolTable()); for (Module::const_giterator I = M->gbegin(), E = M->gend(); I != E; ++I) printGlobal(I); Out << "\nimplementation ; Functions:\n"; // Output all of the functions... for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) printFunction(I); } void AssemblyWriter::printGlobal(const GlobalVariable *GV) { if (GV->hasName()) Out << getLLVMName(GV->getName()) << " = "; if (!GV->hasInitializer()) Out << "external "; else switch (GV->getLinkage()) { case GlobalValue::InternalLinkage: Out << "internal "; break; case GlobalValue::LinkOnceLinkage: Out << "linkonce "; break; case GlobalValue::WeakLinkage: Out << "weak "; break; case GlobalValue::AppendingLinkage: Out << "appending "; break; case GlobalValue::ExternalLinkage: break; } Out << (GV->isConstant() ? "constant " : "global "); printType(GV->getType()->getElementType()); if (GV->hasInitializer()) writeOperand(GV->getInitializer(), false, false); printInfoComment(*GV); Out << "\n"; } // printSymbolTable - Run through symbol table looking for named constants // if a named constant is found, emit it's declaration... // void AssemblyWriter::printSymbolTable(const SymbolTable &ST) { for (SymbolTable::const_iterator TI = ST.begin(); TI != ST.end(); ++TI) { SymbolTable::type_const_iterator I = ST.type_begin(TI->first); SymbolTable::type_const_iterator End = ST.type_end(TI->first); for (; I != End; ++I) { const Value *V = I->second; if (const Constant *CPV = dyn_cast(V)) { printConstant(CPV); } else if (const Type *Ty = dyn_cast(V)) { Out << "\t" << getLLVMName(I->first) << " = type "; // Make sure we print out at least one level of the type structure, so // that we do not get %FILE = type %FILE // printTypeAtLeastOneLevel(Ty) << "\n"; } } } } // printConstant - Print out a constant pool entry... // void AssemblyWriter::printConstant(const Constant *CPV) { // Don't print out unnamed constants, they will be inlined if (!CPV->hasName()) return; // Print out name... Out << "\t" << getLLVMName(CPV->getName()) << " ="; // Write the value out now... writeOperand(CPV, true, false); printInfoComment(*CPV); Out << "\n"; } // printFunction - Print all aspects of a function. // void AssemblyWriter::printFunction(const Function *F) { // Print out the return type and name... Out << "\n"; if (F->isExternal()) Out << "declare "; else switch (F->getLinkage()) { case GlobalValue::InternalLinkage: Out << "internal "; break; case GlobalValue::LinkOnceLinkage: Out << "linkonce "; break; case GlobalValue::WeakLinkage: Out << "weak "; break; case GlobalValue::AppendingLinkage: Out << "appending "; break; case GlobalValue::ExternalLinkage: break; } printType(F->getReturnType()) << " "; if (!F->getName().empty()) Out << getLLVMName(F->getName()); else Out << "\"\""; Out << "("; Table.incorporateFunction(F); // Loop over the arguments, printing them... const FunctionType *FT = F->getFunctionType(); for(Function::const_aiterator I = F->abegin(), E = F->aend(); I != E; ++I) printArgument(I); // Finish printing arguments... if (FT->isVarArg()) { if (FT->getParamTypes().size()) Out << ", "; Out << "..."; // Output varargs portion of signature! } Out << ")"; if (F->isExternal()) { Out << "\n"; } else { Out << " {"; // Output all of its basic blocks... for the function for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) printBasicBlock(I); Out << "}\n"; } Table.purgeFunction(); } // printArgument - This member is called for every argument that // is passed into the function. Simply print it out // void AssemblyWriter::printArgument(const Argument *Arg) { // Insert commas as we go... the first arg doesn't get a comma if (Arg != &Arg->getParent()->afront()) Out << ", "; // Output type... printType(Arg->getType()); // Output name, if available... if (Arg->hasName()) Out << " " << getLLVMName(Arg->getName()); else if (Table.getSlot(Arg) < 0) Out << ""; } // printBasicBlock - This member is called for each basic block in a method. // void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { if (BB->hasName()) { // Print out the label if it exists... Out << "\n" << BB->getName() << ":"; } else if (!BB->use_empty()) { // Don't print block # of no uses... int Slot = Table.getSlot(BB); Out << "\n;