mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-24 03:25:00 +00:00
x86: preserve flags when folding atomic operations
D4796 taught LLVM to fold some atomic integer operations into a single instruction. The pattern was unaware that the instructions clobbered flags. I fixed some of this issue in D13680 but had missed INC/DEC. This patch adds the missing EFLAGS definition. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250438 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8b9371cdcf
commit
4b6405d130
@ -821,24 +821,29 @@ multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
|
||||
[(atomic_store_64 addr:$dst, dag64)]>;
|
||||
}
|
||||
|
||||
defm RELEASE_INC : RELEASE_UNOP<
|
||||
(add (atomic_load_8 addr:$dst), (i8 1)),
|
||||
(add (atomic_load_16 addr:$dst), (i16 1)),
|
||||
(add (atomic_load_32 addr:$dst), (i32 1)),
|
||||
(add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
|
||||
defm RELEASE_DEC : RELEASE_UNOP<
|
||||
(add (atomic_load_8 addr:$dst), (i8 -1)),
|
||||
(add (atomic_load_16 addr:$dst), (i16 -1)),
|
||||
(add (atomic_load_32 addr:$dst), (i32 -1)),
|
||||
(add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
|
||||
let Defs = [EFLAGS] in {
|
||||
defm RELEASE_INC : RELEASE_UNOP<
|
||||
(add (atomic_load_8 addr:$dst), (i8 1)),
|
||||
(add (atomic_load_16 addr:$dst), (i16 1)),
|
||||
(add (atomic_load_32 addr:$dst), (i32 1)),
|
||||
(add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
|
||||
defm RELEASE_DEC : RELEASE_UNOP<
|
||||
(add (atomic_load_8 addr:$dst), (i8 -1)),
|
||||
(add (atomic_load_16 addr:$dst), (i16 -1)),
|
||||
(add (atomic_load_32 addr:$dst), (i32 -1)),
|
||||
(add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
|
||||
}
|
||||
/*
|
||||
TODO: These don't work because the type inference of TableGen fails.
|
||||
TODO: find a way to fix it.
|
||||
defm RELEASE_NEG : RELEASE_UNOP<
|
||||
(ineg (atomic_load_8 addr:$dst)),
|
||||
(ineg (atomic_load_16 addr:$dst)),
|
||||
(ineg (atomic_load_32 addr:$dst)),
|
||||
(ineg (atomic_load_64 addr:$dst))>;
|
||||
let Defs = [EFLAGS] in {
|
||||
defm RELEASE_NEG : RELEASE_UNOP<
|
||||
(ineg (atomic_load_8 addr:$dst)),
|
||||
(ineg (atomic_load_16 addr:$dst)),
|
||||
(ineg (atomic_load_32 addr:$dst)),
|
||||
(ineg (atomic_load_64 addr:$dst))>;
|
||||
}
|
||||
// NOT doesn't set flags.
|
||||
defm RELEASE_NOT : RELEASE_UNOP<
|
||||
(not (atomic_load_8 addr:$dst)),
|
||||
(not (atomic_load_16 addr:$dst)),
|
||||
|
@ -3,8 +3,8 @@
|
||||
|
||||
; Make sure that flags are properly preserved despite atomic optimizations.
|
||||
|
||||
define i32 @atomic_and_flags(i8* %p, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: atomic_and_flags:
|
||||
define i32 @atomic_and_flags_1(i8* %p, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: atomic_and_flags_1:
|
||||
|
||||
; Generate flags value, and use it.
|
||||
; CHECK: cmpl
|
||||
@ -14,9 +14,9 @@ define i32 @atomic_and_flags(i8* %p, i32 %a, i32 %b) {
|
||||
|
||||
L1:
|
||||
; The following pattern will get folded.
|
||||
; CHECK: addb
|
||||
; CHECK: incb
|
||||
%1 = load atomic i8, i8* %p seq_cst, align 1
|
||||
%2 = add i8 %1, 2
|
||||
%2 = add i8 %1, 1 ; This forces the INC instruction to be generated.
|
||||
store atomic i8 %2, i8* %p release, align 1
|
||||
|
||||
; Use the comparison result again. We need to rematerialize the comparison
|
||||
@ -36,3 +36,26 @@ L3:
|
||||
L4:
|
||||
ret i32 4
|
||||
}
|
||||
|
||||
; Same as above, but using 2 as immediate to avoid the INC instruction.
|
||||
define i32 @atomic_and_flags_2(i8* %p, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: atomic_and_flags_2:
|
||||
; CHECK: cmpl
|
||||
; CHECK-NEXT: jne
|
||||
%cmp = icmp eq i32 %a, %b
|
||||
br i1 %cmp, label %L1, label %L2
|
||||
L1:
|
||||
; CHECK: addb
|
||||
%1 = load atomic i8, i8* %p seq_cst, align 1
|
||||
%2 = add i8 %1, 2
|
||||
store atomic i8 %2, i8* %p release, align 1
|
||||
; CHECK-NEXT: cmpl
|
||||
; CHECK-NEXT: jne
|
||||
br i1 %cmp, label %L3, label %L4
|
||||
L2:
|
||||
ret i32 2
|
||||
L3:
|
||||
ret i32 3
|
||||
L4:
|
||||
ret i32 4
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user