ARM-NEON: make type modifiers orthogonal and allow multiple modifiers.

The modifier system used to mutate types on NEON intrinsic definitions had a separate letter for all kinds of transformations that might be needed, and we were quite quickly running out of letters to use. This patch converts to a much smaller set of orthogonal modifiers that can be applied together to achieve the desired effect. When merging with downstream it is likely to cause a conflict with any local modifications to the .td files. There is a new script in utils/convert_arm_neon.py that was used to convert all .td definitions and I would suggest running it on the last downstream version of those files before this commit rather than resolving conflicts manually.
2025-01-17 21:49:21 +00:00 · 2019-10-11 13:46:47 +01:00 · 2019-10-11 13:46:47 +01:00 · 3f91705ca5
commit 3f91705ca5
parent e23d6f3184
7 changed files with 1163 additions and 1132 deletions
--- a/clang/include/clang/Basic/arm_fp16.td
+++ b/clang/include/clang/Basic/arm_fp16.td
@ -17,118 +17,118 @@ include "arm_neon_incl.td"
 let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in {

  // Negate
-  def VNEGSH          : SInst<"vneg", "ss", "Sh">;
+  def VNEGSH          : SInst<"vneg", "11", "Sh">;

  // Reciprocal/Sqrt
-  def SCALAR_FRECPSH  : IInst<"vrecps", "sss", "Sh">;
-  def FSQRTSH         : SInst<"vsqrt", "ss", "Sh">;
-  def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">;
+  def SCALAR_FRECPSH  : IInst<"vrecps", "111", "Sh">;
+  def FSQRTSH         : SInst<"vsqrt", "11", "Sh">;
+  def SCALAR_FRSQRTSH : IInst<"vrsqrts", "111", "Sh">;

  // Reciprocal Estimate
-  def SCALAR_FRECPEH  : IInst<"vrecpe", "ss", "Sh">;
+  def SCALAR_FRECPEH  : IInst<"vrecpe", "11", "Sh">;

  // Reciprocal Exponent
-  def SCALAR_FRECPXH  : IInst<"vrecpx", "ss", "Sh">;
+  def SCALAR_FRECPXH  : IInst<"vrecpx", "11", "Sh">;

  // Reciprocal Square Root Estimate
-  def SCALAR_FRSQRTEH : IInst<"vrsqrte", "ss", "Sh">;
+  def SCALAR_FRSQRTEH : IInst<"vrsqrte", "11", "Sh">;

  // Rounding
-  def FRINTZ_S64H     : SInst<"vrnd", "ss", "Sh">;
-  def FRINTA_S64H     : SInst<"vrnda", "ss", "Sh">;
-  def FRINTI_S64H     : SInst<"vrndi", "ss", "Sh">;
-  def FRINTM_S64H     : SInst<"vrndm", "ss", "Sh">;
-  def FRINTN_S64H     : SInst<"vrndn", "ss", "Sh">;
-  def FRINTP_S64H     : SInst<"vrndp", "ss", "Sh">;
-  def FRINTX_S64H     : SInst<"vrndx", "ss", "Sh">;
+  def FRINTZ_S64H     : SInst<"vrnd", "11", "Sh">;
+  def FRINTA_S64H     : SInst<"vrnda", "11", "Sh">;
+  def FRINTI_S64H     : SInst<"vrndi", "11", "Sh">;
+  def FRINTM_S64H     : SInst<"vrndm", "11", "Sh">;
+  def FRINTN_S64H     : SInst<"vrndn", "11", "Sh">;
+  def FRINTP_S64H     : SInst<"vrndp", "11", "Sh">;
+  def FRINTX_S64H     : SInst<"vrndx", "11", "Sh">;

  // Conversion
-  def SCALAR_SCVTFSH  : SInst<"vcvth_f16", "Ys", "sUs">;
-  def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "Ys", "iUi">;
-  def SCALAR_SCVTFSH2 : SInst<"vcvth_f16", "Ys", "lUl">;
-  def SCALAR_FCVTZSH  : SInst<"vcvt_s16", "$s", "Sh">;
-  def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">;
-  def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">;
-  def SCALAR_FCVTZUH  : SInst<"vcvt_u16", "bs", "Sh">;
-  def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">;
-  def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">;
-  def SCALAR_FCVTASH  : SInst<"vcvta_s16", "$s", "Sh">;
-  def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">;
-  def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">;
-  def SCALAR_FCVTAUH  : SInst<"vcvta_u16", "bs", "Sh">;
-  def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">;
-  def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">;
-  def SCALAR_FCVTMSH  : SInst<"vcvtm_s16", "$s", "Sh">;
-  def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">;
-  def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "Ls", "Sh">;
-  def SCALAR_FCVTMUH  : SInst<"vcvtm_u16", "bs", "Sh">;
-  def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">;
-  def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "Os", "Sh">;
-  def SCALAR_FCVTNSH  : SInst<"vcvtn_s16", "$s", "Sh">;
-  def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">;
-  def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "Ls", "Sh">;
-  def SCALAR_FCVTNUH  : SInst<"vcvtn_u16", "bs", "Sh">;
-  def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">;
-  def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "Os", "Sh">;
-  def SCALAR_FCVTPSH  : SInst<"vcvtp_s16", "$s", "Sh">;
-  def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">;
-  def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "Ls", "Sh">;
-  def SCALAR_FCVTPUH  : SInst<"vcvtp_u16", "bs", "Sh">;
-  def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">;
-  def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">;
+  def SCALAR_SCVTFSH  : SInst<"vcvth_f16", "(1F)(1!)", "sUs">;
+  def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "(1F<)(1!)", "iUi">;
+  def SCALAR_SCVTFSH2 : SInst<"vcvth_f16", "(1F<<)(1!)", "lUl">;
+  def SCALAR_FCVTZSH  : SInst<"vcvt_s16", "(1S)1", "Sh">;
+  def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "(1S>)1", "Sh">;
+  def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "(1S>>)1", "Sh">;
+  def SCALAR_FCVTZUH  : SInst<"vcvt_u16", "(1U)1", "Sh">;
+  def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "(1U>)1", "Sh">;
+  def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "(1U>>)1", "Sh">;
+  def SCALAR_FCVTASH  : SInst<"vcvta_s16", "(1S)1", "Sh">;
+  def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "(1S>)1", "Sh">;
+  def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "(1S>>)1", "Sh">;
+  def SCALAR_FCVTAUH  : SInst<"vcvta_u16", "(1U)1", "Sh">;
+  def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "(1U>)1", "Sh">;
+  def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "(1U>>)1", "Sh">;
+  def SCALAR_FCVTMSH  : SInst<"vcvtm_s16", "(1S)1", "Sh">;
+  def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "(1S>)1", "Sh">;
+  def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "(1S>>)1", "Sh">;
+  def SCALAR_FCVTMUH  : SInst<"vcvtm_u16", "(1U)1", "Sh">;
+  def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "(1U>)1", "Sh">;
+  def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "(1U>>)1", "Sh">;
+  def SCALAR_FCVTNSH  : SInst<"vcvtn_s16", "(1S)1", "Sh">;
+  def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "(1S>)1", "Sh">;
+  def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "(1S>>)1", "Sh">;
+  def SCALAR_FCVTNUH  : SInst<"vcvtn_u16", "(1U)1", "Sh">;
+  def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "(1U>)1", "Sh">;
+  def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "(1U>>)1", "Sh">;
+  def SCALAR_FCVTPSH  : SInst<"vcvtp_s16", "(1S)1", "Sh">;
+  def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "(1S>)1", "Sh">;
+  def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "(1S>>)1", "Sh">;
+  def SCALAR_FCVTPUH  : SInst<"vcvtp_u16", "(1U)1", "Sh">;
+  def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "(1U>)1", "Sh">;
+  def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "(1U>>)1", "Sh">;
  let isVCVT_N = 1 in {
-    def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "sUs">;
-    def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "Ysi", "iUi">;
-    def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "Ysi", "lUl">;
-    def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">;
-    def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">;
-    def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">;
-    def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">;
-    def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">;
-    def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">;
+    def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "(1F)(1!)I", "sUs">;
+    def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "(1F<)(1!)I", "iUi">;
+    def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "(1F<<)(1!)I", "lUl">;
+    def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "(1S)1I", "Sh">;
+    def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "(1S>)1I", "Sh">;
+    def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "(1S>>)1I", "Sh">;
+    def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "(1U)1I", "Sh">;
+    def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "(1U>)1I", "Sh">;
+    def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "(1U>>)1I", "Sh">;
  }
  // Comparison
-  def SCALAR_CMEQRH   : SInst<"vceq", "bss", "Sh">;
-  def SCALAR_CMEQZH   : SInst<"vceqz", "bs", "Sh">;
-  def SCALAR_CMGERH   : SInst<"vcge", "bss", "Sh">;
-  def SCALAR_CMGEZH   : SInst<"vcgez", "bs", "Sh">;
-  def SCALAR_CMGTRH   : SInst<"vcgt", "bss", "Sh">;
-  def SCALAR_CMGTZH   : SInst<"vcgtz", "bs", "Sh">;
-  def SCALAR_CMLERH   : SInst<"vcle", "bss", "Sh">;
-  def SCALAR_CMLEZH   : SInst<"vclez", "bs", "Sh">;
-  def SCALAR_CMLTH    : SInst<"vclt", "bss", "Sh">;
-  def SCALAR_CMLTZH   : SInst<"vcltz", "bs", "Sh">;
+  def SCALAR_CMEQRH   : SInst<"vceq", "(1U)11", "Sh">;
+  def SCALAR_CMEQZH   : SInst<"vceqz", "(1U)1", "Sh">;
+  def SCALAR_CMGERH   : SInst<"vcge", "(1U)11", "Sh">;
+  def SCALAR_CMGEZH   : SInst<"vcgez", "(1U)1", "Sh">;
+  def SCALAR_CMGTRH   : SInst<"vcgt", "(1U)11", "Sh">;
+  def SCALAR_CMGTZH   : SInst<"vcgtz", "(1U)1", "Sh">;
+  def SCALAR_CMLERH   : SInst<"vcle", "(1U)11", "Sh">;
+  def SCALAR_CMLEZH   : SInst<"vclez", "(1U)1", "Sh">;
+  def SCALAR_CMLTH    : SInst<"vclt", "(1U)11", "Sh">;
+  def SCALAR_CMLTZH   : SInst<"vcltz", "(1U)1", "Sh">;

  // Absolute Compare Mask Greater Than Or Equal
-  def SCALAR_FACGEH   : IInst<"vcage", "bss", "Sh">;
-  def SCALAR_FACLEH   : IInst<"vcale", "bss", "Sh">;
+  def SCALAR_FACGEH   : IInst<"vcage", "(1U)11", "Sh">;
+  def SCALAR_FACLEH   : IInst<"vcale", "(1U)11", "Sh">;

  // Absolute Compare Mask Greater Than
-  def SCALAR_FACGT    : IInst<"vcagt", "bss", "Sh">;
-  def SCALAR_FACLT    : IInst<"vcalt", "bss", "Sh">;
+  def SCALAR_FACGT    : IInst<"vcagt", "(1U)11", "Sh">;
+  def SCALAR_FACLT    : IInst<"vcalt", "(1U)11", "Sh">;

  // Scalar Absolute Value
-  def SCALAR_ABSH     : SInst<"vabs", "ss", "Sh">;
+  def SCALAR_ABSH     : SInst<"vabs", "11", "Sh">;

  // Scalar Absolute Difference
-  def SCALAR_ABDH: IInst<"vabd", "sss", "Sh">;
+  def SCALAR_ABDH: IInst<"vabd", "111", "Sh">;

  // Add/Sub
-  def VADDSH          : SInst<"vadd", "sss", "Sh">;
-  def VSUBHS          : SInst<"vsub", "sss", "Sh">;
+  def VADDSH          : SInst<"vadd", "111", "Sh">;
+  def VSUBHS          : SInst<"vsub", "111", "Sh">;

  // Max/Min
-  def VMAXHS          : SInst<"vmax", "sss", "Sh">;
-  def VMINHS          : SInst<"vmin", "sss", "Sh">;
-  def FMAXNMHS        : SInst<"vmaxnm", "sss", "Sh">;
-  def FMINNMHS        : SInst<"vminnm", "sss", "Sh">;
+  def VMAXHS          : SInst<"vmax", "111", "Sh">;
+  def VMINHS          : SInst<"vmin", "111", "Sh">;
+  def FMAXNMHS        : SInst<"vmaxnm", "111", "Sh">;
+  def FMINNMHS        : SInst<"vminnm", "111", "Sh">;

  // Multiplication/Division
-  def VMULHS          : SInst<"vmul", "sss", "Sh">;
-  def MULXHS          : SInst<"vmulx", "sss", "Sh">;
-  def FDIVHS          : SInst<"vdiv", "sss",  "Sh">;
+  def VMULHS          : SInst<"vmul", "111", "Sh">;
+  def MULXHS          : SInst<"vmulx", "111", "Sh">;
+  def FDIVHS          : SInst<"vdiv", "111",  "Sh">;

  // Vector fused multiply-add operations
-  def VFMAHS          : SInst<"vfma", "ssss", "Sh">;
-  def VFMSHS          : SInst<"vfms", "ssss", "Sh">;
+  def VFMAHS          : SInst<"vfma", "1111", "Sh">;
+  def VFMSHS          : SInst<"vfms", "1111", "Sh">;
 }
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
--- a/clang/include/clang/Basic/arm_neon_incl.td
+++ b/clang/include/clang/Basic/arm_neon_incl.td
@ -198,10 +198,8 @@ def OP_UNAVAILABLE : Operation {
 //
 // The prototype is a string that defines the return type of the intrinsic
 // and the type of each argument. The return type and every argument gets a
-// "modifier" that can change in some way the "base type" of the intrinsic.
-//
-// The modifier 'd' means "default" and does not modify the base type in any
-// way. The available modifiers are given below.
+// set of "modifiers" that can change in some way the "base type" of the
+// intrinsic.
 //
 // Typespecs
 // ---------
@ -226,41 +224,34 @@ def OP_UNAVAILABLE : Operation {
 // -------------------
 // prototype: return (arg, arg, ...)
 //
-// v: void
-// t: best-fit integer (int/poly args)
-// x: signed integer   (int/float args)
-// u: unsigned integer (int/float args)
-// f: float (int args)
-// F: double (int args)
-// H: half (int args)
-// 0: half (int args), ignore 'Q' size modifier.
-// 1: half (int args), force 'Q' size modifier.
-// d: default
-// g: default, ignore 'Q' size modifier.
-// j: default, force 'Q' size modifier.
-// w: double width elements, same num elts
-// n: double width elements, half num elts
-// h: half width elements, double num elts
-// q: half width elements, quad num elts
-// e: half width elements, double num elts, unsigned
-// m: half width elements, same num elts
-// i: constant int
-// l: constant uint64
-// s: scalar of element type
-// z: scalar of half width element type, signed
-// r: scalar of double width element type, signed
-// b: scalar of unsigned integer/long type (int/float args)
-// $: scalar of signed integer/long type (int/float args)
-// y: scalar of float
-// o: scalar of double
-// k: default elt width, double num elts
-// 2,3,4: array of default vectors
-// B,C,D: array of default elts, force 'Q' size modifier.
-// p: pointer type
-// c: const pointer type
-// 7: vector of 8-bit elements, ignore 'Q' size modifier
-// 8: vector of 8-bit elements, same width as default type
-// 9: vector of 8-bit elements, force 'Q' size modifier
+// Each type modifier is either a single character, or a group surrounded by
+// parentheses.
+//
+// .: default
+// v: change to void category.
+// S: change to signed integer category.
+// U: change to unsigned integer category.
+// F: change to floating category.
+// P: change to polynomial category.
+// p: change polynomial to equivalent integer category. Otherwise nop.
+//
+// >: double element width (vector size unchanged).
+// <: half element width (vector size unchanged).
+//
+// 1: change to scalar.
+// 2: change to struct of two vectors.
+// 3: change to struct of three vectors.
+// 4: change to struct of four vectors.
+//
+// *: make a pointer argument.
+// c: make a constant argument (for pointers).
+//
+// Q: force 128-bit width.
+// q: force 64-bit width.
+//
+// I: make 32-bit signed scalar immediate
+// !: make this the key type passed to CGBuiltin.cpp in a polymorphic call.
+

 // Every intrinsic subclasses Inst.
 class Inst <string n, string p, string t, Operation o> {
--- a/clang/test/CodeGen/aarch64-neon-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c
@ -17756,8 +17756,6 @@ float32_t test_vminnmv_f32(float32x2_t a) {
 }

 // CHECK-LABEL: @test_vpaddq_s64(
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
 // CHECK:   ret <2 x i64> [[VPADDQ_V2_I]]
@ -17766,8 +17764,6 @@ int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
 }

 // CHECK-LABEL: @test_vpaddq_u64(
-// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
 // CHECK:   ret <2 x i64> [[VPADDQ_V2_I]]
--- a/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
+++ b/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
@ -407,12 +407,10 @@ int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
 }

 // CHECK-LABEL: define <1 x double> @test_vmulx_lane_f64_0() #0 {
-// CHECK:   [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
-// CHECK:   [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
-// CHECK:   [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
+// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> <double 0x3FD6304BC43AB5C2>, i32 0
+// CHECK:   [[VGET_LANE7:%.*]] = extractelement <1 x double> <double 0x3FEE211E215AEEF3>, i32 0
 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE7]])
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
+// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> <double 0x3FD6304BC43AB5C2>, double [[VMULXD_F64_I]], i32 0
 // CHECK:   ret <1 x double> [[VSET_LANE]]
 float64x1_t test_vmulx_lane_f64_0() {
      float64x1_t arg1;
@ -426,13 +424,11 @@ float64x1_t test_vmulx_lane_f64_0() {
 }

 // CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_2() #1 {
-// CHECK:   [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
-// CHECK:   [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
-// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1>
-// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
+// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x double> <double 0x3FD6304BC43AB5C2>, <1 x double> <double 0x3FEE211E215AEEF3>, <2 x i32> <i32 0, i32 1>
+// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> <double 0x3FD6304BC43AB5C2>, i32 0
 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[SHUFFLE_I]], i32 1
 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
-// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
+// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> <double 0x3FD6304BC43AB5C2>, double [[VMULXD_F64_I]], i32 0
 // CHECK:   ret <1 x double> [[VSET_LANE]]
 float64x1_t test_vmulx_laneq_f64_2() {
      float64x1_t arg1;
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@ -161,11 +161,11 @@ public:
        Pointer(false), ScalarForMangling(false), NoManglingQ(false),
        Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}

-  Type(TypeSpec TS, char CharMod)
+  Type(TypeSpec TS, StringRef CharMods)
      : TS(std::move(TS)), Kind(Void), Immediate(false),
        Constant(false), Pointer(false), ScalarForMangling(false),
        NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {
-    applyModifier(CharMod);
+    applyModifiers(CharMods);
  }

  /// Returns a type representing "void".
@ -181,13 +181,15 @@ public:
  bool noManglingQ() const { return NoManglingQ; }

  bool isPointer() const { return Pointer; }
+  bool isValue() const { return !isVoid() && !isPointer(); }
+  bool isScalar() const { return isValue() && NumVectors == 0; }
+  bool isVector() const { return isValue() && NumVectors > 0; }
+  bool isConstPointer() const { return Constant; }
  bool isFloating() const { return Kind == Float; }
  bool isInteger() const { return Kind == SInt || Kind == UInt; }
  bool isPoly() const { return Kind == Poly; }
  bool isSigned() const { return Kind == SInt; }
  bool isImmediate() const { return Immediate; }
-  bool isScalar() const { return NumVectors == 0; }
-  bool isVector() const { return NumVectors > 0; }
  bool isFloat() const { return isFloating() && ElementBitwidth == 32; }
  bool isDouble() const { return isFloating() && ElementBitwidth == 64; }
  bool isHalf() const { return isFloating() && ElementBitwidth == 16; }
@ -205,11 +207,11 @@ public:
  // Mutator functions
  //
  void makeUnsigned() {
-    assert(isInteger() && "not a potentially signed type");
+    assert(!isVoid() && "not a potentially signed type");
    Kind = UInt;
  }
  void makeSigned() {
-    assert(isInteger() && "not a potentially signed type");
+    assert(!isVoid() && "not a potentially signed type");
    Kind = SInt;
  }

@ -267,8 +269,8 @@ private:
  /// seen. This is needed by applyModifier as some modifiers
  /// only take effect if the type size was changed by "Q" or "H".
  void applyTypespec(bool &Quad);
-  /// Applies a prototype modifiers to the type.
-  void applyModifier(char Mod);
+  /// Applies prototype modifiers to the type.
+  void applyModifiers(StringRef Mods);
 };

 //===----------------------------------------------------------------------===//
@ -299,8 +301,8 @@ class Intrinsic {

  /// The Record this intrinsic was created from.
  Record *R;
-  /// The unmangled name and prototype.
-  std::string Name, Proto;
+  /// The unmangled name.
+  std::string Name;
  /// The input and output typespecs. InTS == OutTS except when
  /// CartesianProductOfTypes is 1 - this is the case for vreinterpret.
  TypeSpec OutTS, InTS;
@ -323,6 +325,8 @@ class Intrinsic {

  /// The types of return value [0] and parameters [1..].
  std::vector<Type> Types;
+  /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
+  int PolymorphicKeyType;
  /// The local variables defined.
  std::map<std::string, Variable> Variables;
  /// NeededEarly - set if any other intrinsic depends on this intrinsic.
@ -358,34 +362,39 @@ public:
  Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
            TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
            StringRef Guard, bool IsUnavailable, bool BigEndianSafe)
-      : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS),
-        CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable),
-        BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false),
-        BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) {
-    // If this builtin takes an immediate argument, we need to #define it rather
-    // than use a standard declaration, so that SemaChecking can range check
-    // the immediate passed by the user.
-    if (Proto.find('i') != std::string::npos)
-      UseMacro = true;
-
-    // Pointer arguments need to use macros to avoid hiding aligned attributes
-    // from the pointer type.
-    if (Proto.find('p') != std::string::npos ||
-        Proto.find('c') != std::string::npos)
-      UseMacro = true;
-
-    // It is not permitted to pass or return an __fp16 by value, so intrinsics
-    // taking a scalar float16_t must be implemented as macros.
-    if (OutTS.find('h') != std::string::npos &&
-        Proto.find('s') != std::string::npos)
-      UseMacro = true;
-
+      : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),
+        Guard(Guard.str()), IsUnavailable(IsUnavailable),
+        BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false),
+        UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."),
+        Emitter(Emitter) {
    // Modify the TypeSpec per-argument to get a concrete Type, and create
    // known variables for each.
    // Types[0] is the return value.
-    Types.emplace_back(OutTS, Proto[0]);
-    for (unsigned I = 1; I < Proto.size(); ++I)
-      Types.emplace_back(InTS, Proto[I]);
+    unsigned Pos = 0;
+    Types.emplace_back(OutTS, getNextModifiers(Proto, Pos));
+    StringRef Mods = getNextModifiers(Proto, Pos);
+    while (!Mods.empty()) {
+      Types.emplace_back(InTS, Mods);
+      if (Mods.find("!") != StringRef::npos)
+        PolymorphicKeyType = Types.size() - 1;
+
+      Mods = getNextModifiers(Proto, Pos);
+    }
+
+    for (auto Type : Types) {
+      // If this builtin takes an immediate argument, we need to #define it rather
+      // than use a standard declaration, so that SemaChecking can range check
+      // the immediate passed by the user.
+
+      // Pointer arguments need to use macros to avoid hiding aligned attributes
+      // from the pointer type.
+
+      // It is not permitted to pass or return an __fp16 by value, so intrinsics
+      // taking a scalar float16_t must be implemented as macros.
+      if (Type.isImmediate() || Type.isPointer() ||
+          (Type.isScalar() && Type.isHalf()))
+        UseMacro = true;
+    }
  }

  /// Get the Record that this intrinsic is based off.
@ -401,23 +410,24 @@ public:

  /// Return true if the intrinsic takes an immediate operand.
  bool hasImmediate() const {
-    return Proto.find('i') != std::string::npos;
+    return std::any_of(Types.begin(), Types.end(),
+                       [](const Type &T) { return T.isImmediate(); });
  }

  /// Return the parameter index of the immediate operand.
  unsigned getImmediateIdx() const {
-    assert(hasImmediate());
-    unsigned Idx = Proto.find('i');
-    assert(Idx > 0 && "Can't return an immediate!");
-    return Idx - 1;
+    for (unsigned Idx = 0; Idx < Types.size(); ++Idx)
+      if (Types[Idx].isImmediate())
+        return Idx - 1;
+    llvm_unreachable("Intrinsic has no immediate");
  }

-  unsigned getNumParams() const { return Proto.size() - 1; }
+
+  unsigned getNumParams() const { return Types.size() - 1; }
  Type getReturnType() const { return Types[0]; }
  Type getParamType(unsigned I) const { return Types[I + 1]; }
  Type getBaseType() const { return BaseType; }
-  /// Return the raw prototype string.
-  std::string getProto() const { return Proto; }
+  Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; }

  /// Return true if the prototype has a scalar argument.
  bool protoHasScalar() const;
@ -471,6 +481,8 @@ public:
  void indexBody();

 private:
+  StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const;
+
  std::string mangleName(std::string Name, ClassKind CK) const;

  void initVariables();
@ -614,10 +626,14 @@ std::string Type::builtin_str() const {
  if (isVoid())
    return "v";

-  if (Pointer)
+  if (isPointer()) {
    // All pointers are void pointers.
-    S += "v";
-  else if (isInteger())
+    S = "v";
+    if (isConstPointer())
+      S += "C";
+    S += "*";
+    return S;
+  } else if (isInteger())
    switch (ElementBitwidth) {
    case 8: S += "c"; break;
    case 16: S += "s"; break;
@ -634,10 +650,11 @@ std::string Type::builtin_str() const {
    default: llvm_unreachable("Unhandled case!");
    }

+  // FIXME: NECESSARY???????????????????????????????????????????????????????????????????????
  if (isChar() && !isPointer() && isSigned())
    // Make chars explicitly signed.
    S = "S" + S;
-  else if (!isPointer() && isInteger() && !isSigned())
+  else if (isInteger() && !isSigned())
    S = "U" + S;

  // Constant indices are "int", but have the "constant expression" modifier.
@ -646,11 +663,8 @@ std::string Type::builtin_str() const {
    S = "I" + S;
  }

-  if (isScalar()) {
-    if (Constant) S += "C";
-    if (Pointer) S += "*";
+  if (isScalar())
    return S;
-  }

  std::string Ret;
  for (unsigned I = 0; I < NumVectors; ++I)
@ -812,202 +826,77 @@ void Type::applyTypespec(bool &Quad) {
  Bitwidth = Quad ? 128 : 64;
 }

-void Type::applyModifier(char Mod) {
+void Type::applyModifiers(StringRef Mods) {
  bool AppliedQuad = false;
  applyTypespec(AppliedQuad);

-  switch (Mod) {
-  case 'v':
-    Kind = Void;
-    break;
-  case 't':
-    if (isPoly())
+  for (char Mod : Mods) {
+    switch (Mod) {
+    case '.':
+      break;
+    case 'v':
+      Kind = Void;
+      break;
+    case 'S':
+      Kind = SInt;
+      break;
+    case 'U':
      Kind = UInt;
-    break;
-  case 'b':
-    Kind = UInt;
-    NumVectors = 0;
-    Bitwidth = ElementBitwidth;
-    break;
-  case '$':
-    Kind = SInt;
-    NumVectors = 0;
-    Bitwidth = ElementBitwidth;
-    break;
-  case 'u':
-    Kind = UInt;
-    break;
-  case 'x':
-    assert(!isPoly() && "'u' can't be used with poly types!");
-    Kind = SInt;
-    break;
-  case 'o':
-    Bitwidth = ElementBitwidth = 64;
-    NumVectors = 0;
-    Kind = Float;
-    break;
-  case 'y':
-    Bitwidth = ElementBitwidth = 32;
-    NumVectors = 0;
-    Kind = Float;
-    break;
-  case 'Y':
-    Bitwidth = ElementBitwidth = 16;
-    NumVectors = 0;
-    Kind = Float;
-    break;
-  case 'I':
-    Bitwidth = ElementBitwidth = 32;
-    NumVectors = 0;
-    Kind = SInt;
-    break;
-  case 'L':
-    Bitwidth = ElementBitwidth = 64;
-    NumVectors = 0;
-    Kind = SInt;
-    break;
-  case 'U':
-    Bitwidth = ElementBitwidth = 32;
-    NumVectors = 0;
-    Kind = UInt;
-    break;
-  case 'O':
-    Bitwidth = ElementBitwidth = 64;
-    NumVectors = 0;
-    Kind = UInt;
-    break;
-  case 'f':
-    Kind = Float;
-    ElementBitwidth = 32;
-    break;
-  case 'F':
-    Kind = Float;
-    ElementBitwidth = 64;
-    break;
-  case 'H':
-    Kind = Float;
-    ElementBitwidth = 16;
-    break;
-  case '0':
-    Kind = Float;
-    if (AppliedQuad)
-      Bitwidth /= 2;
-    ElementBitwidth = 16;
-    break;
-  case '1':
-    Kind = Float;
-    if (!AppliedQuad)
-      Bitwidth *= 2;
-    ElementBitwidth = 16;
-    break;
-  case 'g':
-    if (AppliedQuad)
-      Bitwidth /= 2;
-    break;
-  case 'j':
-    if (!AppliedQuad)
-      Bitwidth *= 2;
-    break;
-  case 'w':
-    ElementBitwidth *= 2;
-    Bitwidth *= 2;
-    break;
-  case 'n':
-    ElementBitwidth *= 2;
-    break;
-  case 'i':
-    Kind = SInt;
-    ElementBitwidth = Bitwidth = 32;
-    NumVectors = 0;
-    Immediate = true;
-    break;
-  case 'l':
-    Kind = UInt;
-    ElementBitwidth = Bitwidth = 64;
-    NumVectors = 0;
-    Immediate = true;
-    break;
-  case 'z':
-    ElementBitwidth /= 2;
-    Bitwidth = ElementBitwidth;
-    NumVectors = 0;
-    break;
-  case 'r':
-    ElementBitwidth *= 2;
-    Bitwidth = ElementBitwidth;
-    NumVectors = 0;
-    break;
-  case 's':
-    Bitwidth = ElementBitwidth;
-    NumVectors = 0;
-    break;
-  case 'k':
-    Bitwidth *= 2;
-    break;
-  case 'c':
-    Constant = true;
-    LLVM_FALLTHROUGH;
-  case 'p':
-    Pointer = true;
-    Bitwidth = ElementBitwidth;
-    NumVectors = 0;
-    break;
-  case 'h':
-    ElementBitwidth /= 2;
-    break;
-  case 'q':
-    ElementBitwidth /= 2;
-    Bitwidth *= 2;
-    break;
-  case 'e':
-    ElementBitwidth /= 2;
-    Kind = UInt;
-    break;
-  case 'm':
-    ElementBitwidth /= 2;
-    Bitwidth /= 2;
-    break;
-  case 'd':
-    break;
-  case '2':
-    NumVectors = 2;
-    break;
-  case '3':
-    NumVectors = 3;
-    break;
-  case '4':
-    NumVectors = 4;
-    break;
-  case 'B':
-    NumVectors = 2;
-    if (!AppliedQuad)
-      Bitwidth *= 2;
-    break;
-  case 'C':
-    NumVectors = 3;
-    if (!AppliedQuad)
-      Bitwidth *= 2;
-    break;
-  case 'D':
-    NumVectors = 4;
-    if (!AppliedQuad)
-      Bitwidth *= 2;
-    break;
-  case '7':
-    if (AppliedQuad)
-      Bitwidth /= 2;
-    ElementBitwidth = 8;
-    break;
-  case '8':
-    ElementBitwidth = 8;
-    break;
-  case '9':
-    if (!AppliedQuad)
-      Bitwidth *= 2;
-    ElementBitwidth = 8;
-    break;
-  default:
-    llvm_unreachable("Unhandled character!");
+      break;
+    case 'F':
+      Kind = Float;
+      break;
+    case 'P':
+      Kind = Poly;
+      break;
+    case '>':
+      assert(ElementBitwidth < 128);
+      ElementBitwidth *= 2;
+      break;
+    case '<':
+      assert(ElementBitwidth > 8);
+      ElementBitwidth /= 2;
+      break;
+    case '1':
+      NumVectors = 0;
+      break;
+    case '2':
+      NumVectors = 2;
+      break;
+    case '3':
+      NumVectors = 3;
+      break;
+    case '4':
+      NumVectors = 4;
+      break;
+    case '*':
+      Pointer = true;
+      break;
+    case 'c':
+      Constant = true;
+      break;
+    case 'Q':
+      Bitwidth = 128;
+      break;
+    case 'q':
+      Bitwidth = 64;
+      break;
+    case 'I':
+      Kind = SInt;
+      ElementBitwidth = Bitwidth = 32;
+      NumVectors = 0;
+      Immediate = true;
+      break;
+    case 'p':
+      if (isPoly())
+        Kind = UInt;
+      break;
+    case '!':
+      // Key type, handled elsewhere.
+      break;
+    default:
+      llvm_unreachable("Unhandled character!");
+    }
  }
 }

@ -1015,6 +904,19 @@ void Type::applyModifier(char Mod) {
 // Intrinsic implementation
 //===----------------------------------------------------------------------===//

+StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const {
+  if (Proto.size() == Pos)
+    return StringRef();
+  else if (Proto[Pos] != '(')
+    return Proto.substr(Pos++, 1);
+
+  size_t Start = Pos + 1;
+  size_t End = Proto.find(')', Start);
+  assert_with_loc(End != StringRef::npos, "unmatched modifier group paren");
+  Pos = End + 1;
+  return Proto.slice(Start, End);
+}
+
 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
  char typeCode = '\0';
  bool printNumber = true;
@ -1053,17 +955,13 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
  return S;
 }

-static bool isFloatingPointProtoModifier(char Mod) {
-  return Mod == 'F' || Mod == 'f' || Mod == 'H' || Mod == 'Y' || Mod == 'I';
-}
-
 std::string Intrinsic::getBuiltinTypeStr() {
  ClassKind LocalCK = getClassKind(true);
  std::string S;

  Type RetT = getReturnType();
  if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
-      !RetT.isFloating() && !RetT.isVoid())
+      !RetT.isFloating())
    RetT.makeInteger(RetT.getElementSizeInBits(), false);

  // Since the return value must be one type, return a vector type of the
@ -1078,7 +976,7 @@ std::string Intrinsic::getBuiltinTypeStr() {
    if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned())
      RetT.makeSigned();

-    if (LocalCK == ClassB && !RetT.isVoid() && !RetT.isScalar())
+    if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar())
      // Cast to vector of 8-bit elements.
      RetT.makeInteger(8, true);

@ -1194,7 +1092,7 @@ void Intrinsic::initVariables() {

  // Modify the TypeSpec per-argument to get a concrete Type, and create
  // known variables for each.
-  for (unsigned I = 1; I < Proto.size(); ++I) {
+  for (unsigned I = 1; I < Types.size(); ++I) {
    char NameC = '0' + (I - 1);
    std::string Name = "p";
    Name.push_back(NameC);
@ -1315,7 +1213,7 @@ void Intrinsic::emitShadowedArgs() {
  for (unsigned I = 0; I < getNumParams(); ++I) {
    // Do not create a temporary for an immediate argument.
    // That would defeat the whole point of using a macro!
-    if (hasImmediate() && Proto[I+1] == 'i')
+    if (getParamType(I).isImmediate())
      continue;
    // Do not create a temporary for pointer arguments. The input
    // pointer may have an alignment hint.
@ -1339,13 +1237,9 @@ void Intrinsic::emitShadowedArgs() {
 }

 bool Intrinsic::protoHasScalar() const {
-  return (Proto.find('s') != std::string::npos ||
-          Proto.find('z') != std::string::npos ||
-          Proto.find('r') != std::string::npos ||
-          Proto.find('b') != std::string::npos ||
-          Proto.find('$') != std::string::npos ||
-          Proto.find('y') != std::string::npos ||
-          Proto.find('o') != std::string::npos);
+  return std::any_of(Types.begin(), Types.end(), [](const Type &T) {
+    return T.isScalar() && !T.isImmediate();
+  });
 }

 void Intrinsic::emitBodyAsBuiltinCall() {
@ -1408,13 +1302,7 @@ void Intrinsic::emitBodyAsBuiltinCall() {

  // Extra constant integer to hold type class enum for this function, e.g. s8
  if (getClassKind(true) == ClassB) {
-    Type ThisTy = getReturnType();
-    if (Proto[0] == 'v' || isFloatingPointProtoModifier(Proto[0]))
-      ThisTy = getParamType(0);
-    if (ThisTy.isPointer())
-      ThisTy = getParamType(1);
-
-    S += utostr(ThisTy.getNeonEnum());
+    S += utostr(getPolymorphicKeyType().getNeonEnum());
  } else {
    // Remove extraneous ", ".
    S.pop_back();
@ -2019,9 +1907,9 @@ void NeonEmitter::createIntrinsic(Record *R,
  std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
  for (auto TS : TypeSpecs) {
    if (CartesianProductOfTypes) {
-      Type DefaultT(TS, 'd');
+      Type DefaultT(TS, ".");
      for (auto SrcTS : TypeSpecs) {
-        Type DefaultSrcT(SrcTS, 'd');
+        Type DefaultSrcT(SrcTS, ".");
        if (TS == SrcTS ||
            DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
          continue;
@ -2101,31 +1989,19 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
      continue;

    uint64_t Mask = 0ULL;
-    Type Ty = Def->getReturnType();
-    if (Def->getProto()[0] == 'v' ||
-        isFloatingPointProtoModifier(Def->getProto()[0]))
-      Ty = Def->getParamType(0);
-    if (Ty.isPointer())
-      Ty = Def->getParamType(1);
-
-    Mask |= 1ULL << Ty.getNeonEnum();
+    Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum();

    // Check if the function has a pointer or const pointer argument.
-    std::string Proto = Def->getProto();
    int PtrArgNum = -1;
    bool HasConstPtr = false;
    for (unsigned I = 0; I < Def->getNumParams(); ++I) {
-      char ArgType = Proto[I + 1];
-      if (ArgType == 'c') {
-        HasConstPtr = true;
+      const auto &Type = Def->getParamType(I);
+      if (Type.isPointer()) {
        PtrArgNum = I;
-        break;
-      }
-      if (ArgType == 'p') {
-        PtrArgNum = I;
-        break;
+        HasConstPtr = Type.isConstPointer();
      }
    }
+
    // For sret builtins, adjust the pointer argument index.
    if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
      PtrArgNum += 1;
@ -2349,7 +2225,7 @@ void NeonEmitter::run(raw_ostream &OS) {
  bool InIfdef = false;
  for (auto &TS : TDTypeVec) {
    bool IsA64 = false;
-    Type T(TS, 'd');
+    Type T(TS, ".");
    if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64))
      IsA64 = true;

@ -2382,7 +2258,7 @@ void NeonEmitter::run(raw_ostream &OS) {
  for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
    for (auto &TS : TDTypeVec) {
      bool IsA64 = false;
-      Type T(TS, 'd');
+      Type T(TS, ".");
      if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64))
        IsA64 = true;

@ -2395,8 +2271,8 @@ void NeonEmitter::run(raw_ostream &OS) {
        InIfdef = true;
      }

-      char M = '2' + (NumMembers - 2);
-      Type VT(TS, M);
+      const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0};
+      Type VT(TS, Mods);
      OS << "typedef struct " << VT.str() << " {\n";
      OS << "  " << T.str() << " val";
      OS << "[" << NumMembers << "]";
--- a/clang/utils/convert_arm_neon.py
+++ b/clang/utils/convert_arm_neon.py
@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+
+# This script was committed on 20/11/2019 and it would probably make sense to remove
+# it after the next release branches.
+
+# This script is pipe based and converts an arm_neon.td (or arm_fp16.td) file
+# using the old single-char type modifiers to an equivalent new-style form where
+# each modifier is orthogonal and they can be composed.
+#
+# It was used to directly generate the .td files on master, so if you have any
+# local additions I would suggest implementing any modifiers here, and running
+# it over your entire pre-merge .td files rather than trying to resolve any
+# conflicts manually.
+
+import re, sys
+MOD_MAP = {
+    'v': 'v',
+    'x': 'S',
+    'u': 'U',
+    'd': '.',
+    'g': 'q',
+    'j': 'Q',
+    'w': '>Q',
+    'n': '>',
+    'h': '<',
+    'q': '<Q',
+    'e': '<U',
+    'm': '<q',
+    'i': 'I',
+    'l': 'IU>',
+    's': '1',
+    'z': '1<',
+    'r': '1>',
+    'b': '1U',
+    '$': '1S',
+    'k': 'Q',
+    '2': '2',
+    '3': '3',
+    '4': '4',
+    'B': '2Q',
+    'C': '3Q',
+    'D': '4Q',
+    'p': '*',
+    'c': 'c*',
+    '7': '<<q',
+    '8': '<<',
+    '9': '<<Q',
+    't': 'p'
+    }
+
+
+def typespec_elt_size(typespec):
+    if 'c' in typespec:
+        return 8
+    elif 's' in typespec or 'h' in typespec:
+        return 16
+    elif 'i' in typespec or 'f' in typespec:
+        return 32
+    elif 'l' in typespec or 'd' in typespec:
+        return 64
+    elif 'k' in typespec:
+        return 128
+
+def get_resize(cur, desired):
+    res = ''
+    while cur < desired:
+        res += '>'
+        cur *= 2
+    while cur > desired:
+        res += '<'
+        cur /= 2
+    return res
+
+
+def remap_protocol(proto, typespec, name):
+    key_type = 0
+
+    # Conversions like to see the integer type so they know signedness.
+    if 'vcvt' in name and '_f' in name and name != 'vcvt_f32_f64' and name != 'vcvt_f64_f32':
+        key_type = 1
+    default_width = typespec_elt_size(typespec)
+    inconsistent_width = False
+    for elt in typespec:
+        new_width = typespec_elt_size(elt)
+        if new_width and new_width != default_width:
+            inconsistent_width = True
+
+    res = ''
+    for i, c in enumerate(proto):
+        # void and pointers make for bad discriminators in CGBuiltin.cpp.
+        if c in 'vcp':
+                key_type += 1
+
+        if c in MOD_MAP:
+            cur_mod = MOD_MAP[c]
+        elif inconsistent_width:
+            # Otherwise it's a fixed output width modifier.
+            sys.stderr.write(f'warning: {name} uses fixed output size but has inconsistent input widths: {proto} {typespec}\n')
+
+        if c == 'Y':
+            # y: scalar of half float
+            resize = get_resize(default_width, 16)
+            cur_mod = f'1F{resize}'
+        elif c == 'y':
+            # y: scalar of float
+            resize = get_resize(default_width, 32)
+            cur_mod = f'1F{resize}'
+        elif c == 'o':
+            # o: scalar of double
+            resize = get_resize(default_width, 64)
+            cur_mod = f'1F{resize}'
+        elif c == 'I':
+            # I: scalar of 32-bit signed
+            resize = get_resize(default_width, 32)
+            cur_mod = f'1S{resize}'
+        elif c == 'L':
+            # L: scalar of 64-bit signed
+            resize = get_resize(default_width, 64)
+            cur_mod = f'1S{resize}'
+        elif c == 'U':
+            # I: scalar of 32-bit unsigned
+            resize = get_resize(default_width, 32)
+            cur_mod = f'1U{resize}'
+        elif c == 'O':
+            # O: scalar of 64-bit unsigned
+            resize = get_resize(default_width, 64)
+            cur_mod = f'1U{resize}'
+        elif c == 'f':
+            # f: float (int args)
+            resize = get_resize(default_width, 32)
+            cur_mod = f'F{resize}'
+        elif c == 'F':
+            # F: double (int args)
+            resize = get_resize(default_width, 64)
+            cur_mod = f'F{resize}'
+        elif c == 'H':
+            # H: half (int args)
+            resize = get_resize(default_width, 16)
+            cur_mod = f'F{resize}'
+        elif c == '0':
+            # 0: half (int args), ignore 'Q' size modifier.
+            resize = get_resize(default_width, 16)
+            cur_mod = f'Fq{resize}'
+        elif c == '1':
+            # 1: half (int args), force 'Q' size modifier.
+            resize = get_resize(default_width, 16)
+            cur_mod = f'FQ{resize}'
+
+        if len(cur_mod) == 0:
+            raise Exception(f'WTF: {c} in {name}')
+
+        if key_type != 0 and key_type == i:
+            cur_mod += '!'
+
+        if len(cur_mod) == 1:
+            res += cur_mod
+        else:
+            res += '(' + cur_mod + ')'
+
+    return res
+
+def replace_insts(m):
+    start, end = m.span('proto')
+    start -= m.start()
+    end -= m.start()
+    new_proto = remap_protocol(m['proto'], m['kinds'], m['name'])
+    return m.group()[:start] + new_proto + m.group()[end:]
+
+INST = re.compile(r'Inst<"(?P<name>.*?)",\s*"(?P<proto>.*?)",\s*"(?P<kinds>.*?)"')
+
+new_td = INST.sub(replace_insts, sys.stdin.read())
+sys.stdout.write(new_td)