From 04a5b0845c0a5c51fd36ea3b87b070d448c4cd35 Mon Sep 17 00:00:00 2001 From: JF Bastien Date: Thu, 15 Oct 2015 18:24:52 +0000 Subject: [PATCH] x86: preserve flags when folding atomic operations D4796 taught LLVM to fold some atomic integer operations into a single instruction. The pattern was unaware that the instructions clobbered flags. I fixed some of this issue in D13680 but had missed INC/DEC. This patch adds the missing EFLAGS definition. llvm-svn: 250438 --- lib/Target/X86/X86InstrCompiler.td | 35 +++++++++++++++++------------- test/CodeGen/X86/atomic-flags.ll | 31 ++++++++++++++++++++++---- 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 4c793e9e4d8..3e16eedfe70 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -821,24 +821,29 @@ multiclass RELEASE_UNOP { [(atomic_store_64 addr:$dst, dag64)]>; } -defm RELEASE_INC : RELEASE_UNOP< - (add (atomic_load_8 addr:$dst), (i8 1)), - (add (atomic_load_16 addr:$dst), (i16 1)), - (add (atomic_load_32 addr:$dst), (i32 1)), - (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>; -defm RELEASE_DEC : RELEASE_UNOP< - (add (atomic_load_8 addr:$dst), (i8 -1)), - (add (atomic_load_16 addr:$dst), (i16 -1)), - (add (atomic_load_32 addr:$dst), (i32 -1)), - (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>; +let Defs = [EFLAGS] in { + defm RELEASE_INC : RELEASE_UNOP< + (add (atomic_load_8 addr:$dst), (i8 1)), + (add (atomic_load_16 addr:$dst), (i16 1)), + (add (atomic_load_32 addr:$dst), (i32 1)), + (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>; + defm RELEASE_DEC : RELEASE_UNOP< + (add (atomic_load_8 addr:$dst), (i8 -1)), + (add (atomic_load_16 addr:$dst), (i16 -1)), + (add (atomic_load_32 addr:$dst), (i32 -1)), + (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>; +} /* TODO: These don't work because the type inference of TableGen fails. TODO: find a way to fix it. -defm RELEASE_NEG : RELEASE_UNOP< - (ineg (atomic_load_8 addr:$dst)), - (ineg (atomic_load_16 addr:$dst)), - (ineg (atomic_load_32 addr:$dst)), - (ineg (atomic_load_64 addr:$dst))>; +let Defs = [EFLAGS] in { + defm RELEASE_NEG : RELEASE_UNOP< + (ineg (atomic_load_8 addr:$dst)), + (ineg (atomic_load_16 addr:$dst)), + (ineg (atomic_load_32 addr:$dst)), + (ineg (atomic_load_64 addr:$dst))>; +} +// NOT doesn't set flags. defm RELEASE_NOT : RELEASE_UNOP< (not (atomic_load_8 addr:$dst)), (not (atomic_load_16 addr:$dst)), diff --git a/test/CodeGen/X86/atomic-flags.ll b/test/CodeGen/X86/atomic-flags.ll index 141a7690dba..e0c4a915965 100644 --- a/test/CodeGen/X86/atomic-flags.ll +++ b/test/CodeGen/X86/atomic-flags.ll @@ -3,8 +3,8 @@ ; Make sure that flags are properly preserved despite atomic optimizations. -define i32 @atomic_and_flags(i8* %p, i32 %a, i32 %b) { -; CHECK-LABEL: atomic_and_flags: +define i32 @atomic_and_flags_1(i8* %p, i32 %a, i32 %b) { +; CHECK-LABEL: atomic_and_flags_1: ; Generate flags value, and use it. ; CHECK: cmpl @@ -14,9 +14,9 @@ define i32 @atomic_and_flags(i8* %p, i32 %a, i32 %b) { L1: ; The following pattern will get folded. - ; CHECK: addb + ; CHECK: incb %1 = load atomic i8, i8* %p seq_cst, align 1 - %2 = add i8 %1, 2 + %2 = add i8 %1, 1 ; This forces the INC instruction to be generated. store atomic i8 %2, i8* %p release, align 1 ; Use the comparison result again. We need to rematerialize the comparison @@ -36,3 +36,26 @@ L3: L4: ret i32 4 } + +; Same as above, but using 2 as immediate to avoid the INC instruction. +define i32 @atomic_and_flags_2(i8* %p, i32 %a, i32 %b) { +; CHECK-LABEL: atomic_and_flags_2: + ; CHECK: cmpl + ; CHECK-NEXT: jne + %cmp = icmp eq i32 %a, %b + br i1 %cmp, label %L1, label %L2 +L1: + ; CHECK: addb + %1 = load atomic i8, i8* %p seq_cst, align 1 + %2 = add i8 %1, 2 + store atomic i8 %2, i8* %p release, align 1 + ; CHECK-NEXT: cmpl + ; CHECK-NEXT: jne + br i1 %cmp, label %L3, label %L4 +L2: + ret i32 2 +L3: + ret i32 3 +L4: + ret i32 4 +}