mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-24 20:29:53 +00:00
cd54bb4874
There are several requirements that ended up with this design; 1. Matching bitreversals is too heavyweight for InstCombine and doesn't really need to be done so early. 2. Bitreversals and byteswaps are very related in their matching logic. 3. We want to implement support for matching more advanced bswap/bitreverse patterns like partial bswaps/bitreverses. 4. Bswaps are best matched early in InstCombine. The result of these is that a new utility function is created in Transforms/Utils/Local.h that can be configured to search for bswaps, bitreverses or both. InstCombine uses it to find only bswaps, CGP uses it to find only bitreversals. We can then extend the matching logic in one place only. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257875 91177308-0d34-0410-b5e6-96231b3b80d8
54 lines
1.9 KiB
LLVM
54 lines
1.9 KiB
LLVM
; RUN: opt < %s -loop-unroll -instcombine -S | FileCheck %s
|
|
|
|
; This test is a worst-case scenario for bitreversal/byteswap detection.
|
|
; After loop unrolling (the unrolled loop is unreadably large so it has been kept
|
|
; rolled here), we have a binary tree of OR operands (as bitreversal detection
|
|
; looks straight through shifts):
|
|
;
|
|
; OR
|
|
; | \
|
|
; | LSHR
|
|
; | /
|
|
; OR
|
|
; | \
|
|
; | LSHR
|
|
; | /
|
|
; OR
|
|
;
|
|
; This results in exponential runtime. The loop here is 32 iterations which will
|
|
; totally hang if we don't deal with this case cleverly.
|
|
|
|
@b = common global i32 0, align 4
|
|
|
|
; CHECK: define i32 @fn1
|
|
define i32 @fn1() #0 {
|
|
entry:
|
|
%b.promoted = load i32, i32* @b, align 4, !tbaa !2
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%or4 = phi i32 [ %b.promoted, %entry ], [ %or, %for.body ]
|
|
%i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%shr = lshr i32 %or4, 1
|
|
%or = or i32 %shr, %or4
|
|
%inc = add nuw nsw i32 %i.03, 1
|
|
%exitcond = icmp eq i32 %inc, 32
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body
|
|
store i32 %or, i32* @b, align 4, !tbaa !2
|
|
ret i32 undef
|
|
}
|
|
|
|
attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!llvm.ident = !{!1}
|
|
|
|
!0 = !{i32 1, !"PIC Level", i32 2}
|
|
!1 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git eb70f4e9cc9a4dc3dd57b032fb858d56b4b64a0e)"}
|
|
!2 = !{!3, !3, i64 0}
|
|
!3 = !{!"int", !4, i64 0}
|
|
!4 = !{!"omnipotent char", !5, i64 0}
|
|
!5 = !{!"Simple C/C++ TBAA"}
|