mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-30 17:21:10 +00:00
[AVR] Expand large shifts early in IR
This patch makes sure shift instructions such as this one: %result = shl i32 %n, %amount are expanded just before the IR to SelectionDAG conversion to a loop so that calls to non-existing library functions such as __ashlsi3 are avoided. The generated code is currently pretty bad but there's a lot of room for improvement: the shift itself can be done in just four instructions. Differential Revision: https://reviews.llvm.org/D96677
This commit is contained in:
parent
431a941465
commit
6aa9e746eb
@ -22,6 +22,7 @@ namespace llvm {
|
||||
class AVRTargetMachine;
|
||||
class FunctionPass;
|
||||
|
||||
Pass *createAVRShiftExpandPass();
|
||||
FunctionPass *createAVRISelDag(AVRTargetMachine &TM,
|
||||
CodeGenOpt::Level OptLevel);
|
||||
FunctionPass *createAVRExpandPseudoPass();
|
||||
@ -30,6 +31,7 @@ FunctionPass *createAVRRelaxMemPass();
|
||||
FunctionPass *createAVRDynAllocaSRPass();
|
||||
FunctionPass *createAVRBranchSelectionPass();
|
||||
|
||||
void initializeAVRShiftExpandPass(PassRegistry &);
|
||||
void initializeAVRExpandPseudoPass(PassRegistry&);
|
||||
void initializeAVRRelaxMemPass(PassRegistry&);
|
||||
|
||||
|
147
llvm/lib/Target/AVR/AVRShiftExpand.cpp
Normal file
147
llvm/lib/Target/AVR/AVRShiftExpand.cpp
Normal file
@ -0,0 +1,147 @@
|
||||
//===- AVRShift.cpp - Shift Expansion Pass --------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// Expand 32-bit shift instructions (shl, lshr, ashr) to inline loops, just
|
||||
/// like avr-gcc. This must be done in IR because otherwise the type legalizer
|
||||
/// will turn 32-bit shifts into (non-existing) library calls such as __ashlsi3.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AVR.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstIterator.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class AVRShiftExpand : public FunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
AVRShiftExpand() : FunctionPass(ID) {}
|
||||
|
||||
bool runOnFunction(Function &F) override;
|
||||
|
||||
StringRef getPassName() const override { return "AVR Shift Expansion"; }
|
||||
|
||||
private:
|
||||
void expand(BinaryOperator *BI);
|
||||
};
|
||||
|
||||
} // end of anonymous namespace
|
||||
|
||||
char AVRShiftExpand::ID = 0;
|
||||
|
||||
INITIALIZE_PASS(AVRShiftExpand, "avr-shift-expand", "AVR Shift Expansion",
|
||||
false, false)
|
||||
|
||||
Pass *llvm::createAVRShiftExpandPass() { return new AVRShiftExpand(); }
|
||||
|
||||
bool AVRShiftExpand::runOnFunction(Function &F) {
|
||||
SmallVector<BinaryOperator *, 1> ShiftInsts;
|
||||
auto &Ctx = F.getContext();
|
||||
for (Instruction &I : instructions(F)) {
|
||||
if (!I.isShift())
|
||||
// Only expand shift instructions (shl, lshr, ashr).
|
||||
continue;
|
||||
if (I.getType() != Type::getInt32Ty(Ctx))
|
||||
// Only expand plain i32 types.
|
||||
continue;
|
||||
if (isa<ConstantInt>(I.getOperand(1)))
|
||||
// Only expand when the shift amount is not known.
|
||||
// Known shift amounts are (currently) better expanded inline.
|
||||
continue;
|
||||
ShiftInsts.push_back(cast<BinaryOperator>(&I));
|
||||
}
|
||||
|
||||
// The expanding itself needs to be done separately as expand() will remove
|
||||
// these instructions. Removing instructions while iterating over a basic
|
||||
// block is not a great idea.
|
||||
for (auto *I : ShiftInsts) {
|
||||
expand(I);
|
||||
}
|
||||
|
||||
// Return whether this function expanded any shift instructions.
|
||||
return ShiftInsts.size() > 0;
|
||||
}
|
||||
|
||||
void AVRShiftExpand::expand(BinaryOperator *BI) {
|
||||
auto &Ctx = BI->getContext();
|
||||
IRBuilder<> Builder(BI);
|
||||
Type *Int32Ty = Type::getInt32Ty(Ctx);
|
||||
Type *Int8Ty = Type::getInt8Ty(Ctx);
|
||||
Value *Int8Zero = ConstantInt::get(Int8Ty, 0);
|
||||
|
||||
// Split the current basic block at the point of the existing shift
|
||||
// instruction and insert a new basic block for the loop.
|
||||
BasicBlock *BB = BI->getParent();
|
||||
Function *F = BB->getParent();
|
||||
BasicBlock *EndBB = BB->splitBasicBlock(BI, "shift.done");
|
||||
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "shift.loop", F, EndBB);
|
||||
|
||||
// Truncate the shift amount to i8, which is trivially lowered to a single
|
||||
// AVR register.
|
||||
Builder.SetInsertPoint(&BB->back());
|
||||
Value *ShiftAmount = Builder.CreateTrunc(BI->getOperand(1), Int8Ty);
|
||||
|
||||
// Replace the unconditional branch that splitBasicBlock created with a
|
||||
// conditional branch.
|
||||
Value *Cmp1 = Builder.CreateICmpEQ(ShiftAmount, Int8Zero);
|
||||
Builder.CreateCondBr(Cmp1, EndBB, LoopBB);
|
||||
BB->back().eraseFromParent();
|
||||
|
||||
// Create the loop body starting with PHI nodes.
|
||||
Builder.SetInsertPoint(LoopBB);
|
||||
PHINode *ShiftAmountPHI = Builder.CreatePHI(Int8Ty, 2);
|
||||
ShiftAmountPHI->addIncoming(ShiftAmount, BB);
|
||||
PHINode *ValuePHI = Builder.CreatePHI(Int32Ty, 2);
|
||||
ValuePHI->addIncoming(BI->getOperand(0), BB);
|
||||
|
||||
// Subtract the shift amount by one, as we're shifting one this loop
|
||||
// iteration.
|
||||
Value *ShiftAmountSub =
|
||||
Builder.CreateSub(ShiftAmountPHI, ConstantInt::get(Int8Ty, 1));
|
||||
ShiftAmountPHI->addIncoming(ShiftAmountSub, LoopBB);
|
||||
|
||||
// Emit the actual shift instruction. The difference is that this shift
|
||||
// instruction has a constant shift amount, which can be emitted inline
|
||||
// without a library call.
|
||||
Value *ValueShifted;
|
||||
switch (BI->getOpcode()) {
|
||||
case Instruction::Shl:
|
||||
ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(Int32Ty, 1));
|
||||
break;
|
||||
case Instruction::LShr:
|
||||
ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
|
||||
break;
|
||||
case Instruction::AShr:
|
||||
ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("asked to expand an instruction that is not a shift");
|
||||
}
|
||||
ValuePHI->addIncoming(ValueShifted, LoopBB);
|
||||
|
||||
// Branch to either the loop again (if there is more to shift) or to the
|
||||
// basic block after the loop (if all bits are shifted).
|
||||
Value *Cmp2 = Builder.CreateICmpEQ(ShiftAmountSub, Int8Zero);
|
||||
Builder.CreateCondBr(Cmp2, EndBB, LoopBB);
|
||||
|
||||
// Collect the resulting value. This is necessary in the IR but won't produce
|
||||
// any actual instructions.
|
||||
Builder.SetInsertPoint(BI);
|
||||
PHINode *Result = Builder.CreatePHI(Int32Ty, 2);
|
||||
Result->addIncoming(BI->getOperand(0), BB);
|
||||
Result->addIncoming(ValueShifted, LoopBB);
|
||||
|
||||
// Replace the original shift instruction.
|
||||
BI->replaceAllUsesWith(Result);
|
||||
BI->eraseFromParent();
|
||||
}
|
@ -65,6 +65,7 @@ public:
|
||||
return getTM<AVRTargetMachine>();
|
||||
}
|
||||
|
||||
void addIRPasses() override;
|
||||
bool addInstSelector() override;
|
||||
void addPreSched2() override;
|
||||
void addPreEmitPass() override;
|
||||
@ -76,6 +77,15 @@ TargetPassConfig *AVRTargetMachine::createPassConfig(PassManagerBase &PM) {
|
||||
return new AVRPassConfig(*this, PM);
|
||||
}
|
||||
|
||||
void AVRPassConfig::addIRPasses() {
|
||||
// Expand instructions like
|
||||
// %result = shl i32 %n, %amount
|
||||
// to a loop so that library calls are avoided.
|
||||
addPass(createAVRShiftExpandPass());
|
||||
|
||||
TargetPassConfig::addIRPasses();
|
||||
}
|
||||
|
||||
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTarget() {
|
||||
// Register the target.
|
||||
RegisterTargetMachine<AVRTargetMachine> X(getTheAVRTarget());
|
||||
@ -83,6 +93,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTarget() {
|
||||
auto &PR = *PassRegistry::getPassRegistry();
|
||||
initializeAVRExpandPseudoPass(PR);
|
||||
initializeAVRRelaxMemPass(PR);
|
||||
initializeAVRShiftExpandPass(PR);
|
||||
}
|
||||
|
||||
const AVRSubtarget *AVRTargetMachine::getSubtargetImpl() const {
|
||||
|
@ -24,6 +24,7 @@ add_llvm_target(AVRCodeGen
|
||||
AVRMCInstLower.cpp
|
||||
AVRRelaxMemOperations.cpp
|
||||
AVRRegisterInfo.cpp
|
||||
AVRShiftExpand.cpp
|
||||
AVRSubtarget.cpp
|
||||
AVRTargetMachine.cpp
|
||||
AVRTargetObjectFile.cpp
|
||||
|
89
llvm/test/CodeGen/AVR/shift-expand.ll
Normal file
89
llvm/test/CodeGen/AVR/shift-expand.ll
Normal file
@ -0,0 +1,89 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -avr-shift-expand -S %s -o - | FileCheck %s
|
||||
|
||||
; The avr-shift-expand pass expands large shifts with a non-constant shift
|
||||
; amount to a loop. These loops avoid generating a (non-existing) builtin such
|
||||
; as __ashlsi3.
|
||||
|
||||
target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
|
||||
target triple = "avr"
|
||||
|
||||
define i32 @shl(i32 %value, i32 %amount) addrspace(1) {
|
||||
; CHECK-LABEL: @shl(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
|
||||
; CHECK: shift.loop:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[TMP6]] = shl i32 [[TMP4]], 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
|
||||
; CHECK: shift.done:
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
|
||||
; CHECK-NEXT: ret i32 [[TMP8]]
|
||||
;
|
||||
%result = shl i32 %value, %amount
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
define i32 @lshr(i32 %value, i32 %amount) addrspace(1) {
|
||||
; CHECK-LABEL: @lshr(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
|
||||
; CHECK: shift.loop:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[TMP6]] = lshr i32 [[TMP4]], 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
|
||||
; CHECK: shift.done:
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
|
||||
; CHECK-NEXT: ret i32 [[TMP8]]
|
||||
;
|
||||
%result = lshr i32 %value, %amount
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
define i32 @ashr(i32 %0, i32 %1) addrspace(1) {
|
||||
; CHECK-LABEL: @ashr(
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1:%.*]] to i8
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i8 [[TMP3]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
|
||||
; CHECK: shift.loop:
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP3]], [[TMP2:%.*]] ], [ [[TMP7:%.*]], [[SHIFT_LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP0:%.*]], [[TMP2]] ], [ [[TMP8:%.*]], [[SHIFT_LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP7]] = sub i8 [[TMP5]], 1
|
||||
; CHECK-NEXT: [[TMP8]] = ashr i32 [[TMP6]], 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP7]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP9]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
|
||||
; CHECK: shift.done:
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP0]], [[TMP2]] ], [ [[TMP8]], [[SHIFT_LOOP]] ]
|
||||
; CHECK-NEXT: ret i32 [[TMP10]]
|
||||
;
|
||||
%3 = ashr i32 %0, %1
|
||||
ret i32 %3
|
||||
}
|
||||
|
||||
; This function is not modified because it is not an i32.
|
||||
define i40 @shl40(i40 %value, i40 %amount) addrspace(1) {
|
||||
; CHECK-LABEL: @shl40(
|
||||
; CHECK-NEXT: [[RESULT:%.*]] = shl i40 [[VALUE:%.*]], [[AMOUNT:%.*]]
|
||||
; CHECK-NEXT: ret i40 [[RESULT]]
|
||||
;
|
||||
%result = shl i40 %value, %amount
|
||||
ret i40 %result
|
||||
}
|
||||
|
||||
; This function isn't either, although perhaps it should.
|
||||
define i24 @shl24(i24 %value, i24 %amount) addrspace(1) {
|
||||
; CHECK-LABEL: @shl24(
|
||||
; CHECK-NEXT: [[RESULT:%.*]] = shl i24 [[VALUE:%.*]], [[AMOUNT:%.*]]
|
||||
; CHECK-NEXT: ret i24 [[RESULT]]
|
||||
;
|
||||
%result = shl i24 %value, %amount
|
||||
ret i24 %result
|
||||
}
|
Loading…
Reference in New Issue
Block a user