From 1d928ef8811578916ab00c1273409d55a8e2d40f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@gmail.com>
Date: Tue, 10 Jan 2017 06:01:16 +0000
Subject: [PATCH] AMD family 17h (znver1) enablement

Summary:
This patch enables the following
1. AMD family 17h architecture using "znver1" tune flag (-march, -mcpu).
2. ISAs that are enabled for "znver1" architecture.
3. Checks ADX isa from cpuid to identify "znver1" flag when -march=native is used.
4. ISAs FMA4, XOP are disabled as they are dropped from amdfam17.
5. For the time being, it uses the btver2 scheduler model.
6. Test file is updated to check this flag.

This item is linked to clang review item https://reviews.llvm.org/D28018

Patch by Ganesh Gopalasubramanian

Reviewers: RKSimon, craig.topper

Subscribers: vprasad, RKSimon, ashutosh.nema, llvm-commits

Differential Revision: https://reviews.llvm.org/D28017

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291543 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Support/Host.cpp                         | 19 +++++++++++
 lib/Target/X86/X86.td                        | 36 ++++++++++++++++++++
 test/CodeGen/X86/cpus.ll                     |  1 +
 test/CodeGen/X86/lzcnt-zext-cmp.ll           |  2 ++
 test/CodeGen/X86/slow-unaligned-mem.ll       |  1 +
 test/CodeGen/X86/x86-64-double-shifts-var.ll |  1 +
 6 files changed, 60 insertions(+)

diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 8a09589aa88..d1b40412a6f 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -111,6 +111,7 @@ enum ProcessorTypes {
   AMDATHLON,
   AMDFAM14H,
   AMDFAM16H,
+  AMDFAM17H,
   CPU_TYPE_MAX
 };
 
@@ -149,6 +150,7 @@ enum ProcessorSubtypes {
   AMD_BTVER2,
   AMDFAM15H_BDVER3,
   AMDFAM15H_BDVER4,
+  AMDFAM17H_ZNVER1,
   CPU_SUBTYPE_MAX
 };
 
@@ -742,6 +744,14 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family,
     }
     *Subtype = AMD_BTVER2;
     break; // "btver2"
+  case 23:
+    *Type = AMDFAM17H;
+    if (Features & (1 << FEATURE_ADX)) {
+      *Subtype = AMDFAM17H_ZNVER1;
+      break; // "znver1"
+    }
+    *Subtype =  AMD_BTVER1;
+    break;
   default:
     break; // "generic"
   }
@@ -950,6 +960,15 @@ StringRef sys::getHostCPUName() {
       default:
         return "amdfam16";
       }
+    case AMDFAM17H:
+      switch (Subtype) {
+      case AMD_BTVER1:
+        return "btver1";
+      case AMDFAM17H_ZNVER1:
+        return "znver1";
+      default:
+        return "amdfam17";
+      }
     default:
       return "generic";
     }
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index dc18a59a30b..f95022077d3 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -760,6 +760,42 @@ def : Proc<"bdver4", [
   FeatureMWAITX
 ]>;
 
+// TODO: The scheduler model falls to BTVER2 model.
+// The znver1 model has to be put in place.
+// Zen
+def: ProcessorModel<"znver1", BtVer2Model, [
+  FeatureADX,
+  FeatureAES,
+  FeatureAVX2,
+  FeatureBMI,
+  FeatureBMI2,
+  FeatureCLFLUSHOPT,
+  FeatureCMPXCHG16B,
+  FeatureF16C,
+  FeatureFMA,
+  FeatureFSGSBase,
+  FeatureFXSR,
+  FeatureFastLZCNT,
+  FeatureLAHFSAHF,
+  FeatureLZCNT,
+  FeatureMMX,
+  FeatureMOVBE,
+  FeatureMWAITX,
+  FeaturePCLMUL,
+  FeaturePOPCNT,
+  FeaturePRFCHW,
+  FeatureRDRAND,
+  FeatureRDSEED,
+  FeatureSHA,
+  FeatureSMAP,
+  FeatureSSE4A,
+  FeatureSlowSHLD,
+  FeatureX87,
+  FeatureXSAVE,
+  FeatureXSAVEC,
+  FeatureXSAVEOPT,
+  FeatureXSAVES]>;
+
 def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
 
 def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
diff --git a/test/CodeGen/X86/cpus.ll b/test/CodeGen/X86/cpus.ll
index ee1f7bb5295..20ce932a184 100644
--- a/test/CodeGen/X86/cpus.ll
+++ b/test/CodeGen/X86/cpus.ll
@@ -33,3 +33,4 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
diff --git a/test/CodeGen/X86/lzcnt-zext-cmp.ll b/test/CodeGen/X86/lzcnt-zext-cmp.ll
index 6f4cb84a2b9..c69dbf573f4 100644
--- a/test/CodeGen/X86/lzcnt-zext-cmp.ll
+++ b/test/CodeGen/X86/lzcnt-zext-cmp.ll
@@ -3,6 +3,8 @@
 ; Eg: zext(or(setcc(cmp), setcc(cmp))) -> shr(or(lzcnt, lzcnt))
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=NOFASTLZCNT %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=NOFASTLZCNT %s
 
 ; Test one 32-bit input, output is 32-bit, no transformations expected.
 define i32 @test_zext_cmp0(i32 %a) {
diff --git a/test/CodeGen/X86/slow-unaligned-mem.ll b/test/CodeGen/X86/slow-unaligned-mem.ll
index 41e9a95bcdd..8251eb324a7 100644
--- a/test/CodeGen/X86/slow-unaligned-mem.ll
+++ b/test/CodeGen/X86/slow-unaligned-mem.ll
@@ -46,6 +46,7 @@
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2        2>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3        2>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1        2>&1 | FileCheck %s --check-prefix=FAST
 
 ; Other chips with slow unaligned memory accesses
 
diff --git a/test/CodeGen/X86/x86-64-double-shifts-var.ll b/test/CodeGen/X86/x86-64-double-shifts-var.ll
index 8d2dbbdb5d2..c025ee874b2 100644
--- a/test/CodeGen/X86/x86-64-double-shifts-var.ll
+++ b/test/CodeGen/X86/x86-64-double-shifts-var.ll
@@ -17,6 +17,7 @@
 ; RUN: llc < %s -march=x86-64 -mcpu=bdver2 | FileCheck %s
 ; RUN: llc < %s -march=x86-64 -mcpu=bdver3 | FileCheck %s
 ; RUN: llc < %s -march=x86-64 -mcpu=bdver4 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=znver1 | FileCheck %s
 
 ; Verify that for the X86_64 processors that are known to have poor latency 
 ; double precision shift instructions we do not generate 'shld' or 'shrd'