From 3f91705ca54bc76b50c35f9e0831ab356d653c5c Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 11 Oct 2019 13:46:47 +0100 Subject: [PATCH] ARM-NEON: make type modifiers orthogonal and allow multiple modifiers. The modifier system used to mutate types on NEON intrinsic definitions had a separate letter for all kinds of transformations that might be needed, and we were quite quickly running out of letters to use. This patch converts to a much smaller set of orthogonal modifiers that can be applied together to achieve the desired effect. When merging with downstream it is likely to cause a conflict with any local modifications to the .td files. There is a new script in utils/convert_arm_neon.py that was used to convert all .td definitions and I would suggest running it on the last downstream version of those files before this commit rather than resolving conflicts manually. --- clang/include/clang/Basic/arm_fp16.td | 166 +- clang/include/clang/Basic/arm_neon.td | 1416 ++++++++--------- clang/include/clang/Basic/arm_neon_incl.td | 69 +- clang/test/CodeGen/aarch64-neon-intrinsics.c | 4 - .../aarch64-neon-scalar-x-indexed-elem.c | 16 +- clang/utils/TableGen/NeonEmitter.cpp | 452 ++---- clang/utils/convert_arm_neon.py | 172 ++ 7 files changed, 1163 insertions(+), 1132 deletions(-) create mode 100644 clang/utils/convert_arm_neon.py diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td index bb9873efac85..79cd16233c10 100644 --- a/clang/include/clang/Basic/arm_fp16.td +++ b/clang/include/clang/Basic/arm_fp16.td @@ -17,118 +17,118 @@ include "arm_neon_incl.td" let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in { // Negate - def VNEGSH : SInst<"vneg", "ss", "Sh">; + def VNEGSH : SInst<"vneg", "11", "Sh">; // Reciprocal/Sqrt - def SCALAR_FRECPSH : IInst<"vrecps", "sss", "Sh">; - def FSQRTSH : SInst<"vsqrt", "ss", "Sh">; - def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">; + def SCALAR_FRECPSH : IInst<"vrecps", "111", "Sh">; + def FSQRTSH : SInst<"vsqrt", "11", "Sh">; + def SCALAR_FRSQRTSH : IInst<"vrsqrts", "111", "Sh">; // Reciprocal Estimate - def SCALAR_FRECPEH : IInst<"vrecpe", "ss", "Sh">; + def SCALAR_FRECPEH : IInst<"vrecpe", "11", "Sh">; // Reciprocal Exponent - def SCALAR_FRECPXH : IInst<"vrecpx", "ss", "Sh">; + def SCALAR_FRECPXH : IInst<"vrecpx", "11", "Sh">; // Reciprocal Square Root Estimate - def SCALAR_FRSQRTEH : IInst<"vrsqrte", "ss", "Sh">; + def SCALAR_FRSQRTEH : IInst<"vrsqrte", "11", "Sh">; // Rounding - def FRINTZ_S64H : SInst<"vrnd", "ss", "Sh">; - def FRINTA_S64H : SInst<"vrnda", "ss", "Sh">; - def FRINTI_S64H : SInst<"vrndi", "ss", "Sh">; - def FRINTM_S64H : SInst<"vrndm", "ss", "Sh">; - def FRINTN_S64H : SInst<"vrndn", "ss", "Sh">; - def FRINTP_S64H : SInst<"vrndp", "ss", "Sh">; - def FRINTX_S64H : SInst<"vrndx", "ss", "Sh">; + def FRINTZ_S64H : SInst<"vrnd", "11", "Sh">; + def FRINTA_S64H : SInst<"vrnda", "11", "Sh">; + def FRINTI_S64H : SInst<"vrndi", "11", "Sh">; + def FRINTM_S64H : SInst<"vrndm", "11", "Sh">; + def FRINTN_S64H : SInst<"vrndn", "11", "Sh">; + def FRINTP_S64H : SInst<"vrndp", "11", "Sh">; + def FRINTX_S64H : SInst<"vrndx", "11", "Sh">; // Conversion - def SCALAR_SCVTFSH : SInst<"vcvth_f16", "Ys", "sUs">; - def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "Ys", "iUi">; - def SCALAR_SCVTFSH2 : SInst<"vcvth_f16", "Ys", "lUl">; - def SCALAR_FCVTZSH : SInst<"vcvt_s16", "$s", "Sh">; - def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">; - def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">; - def SCALAR_FCVTZUH : SInst<"vcvt_u16", "bs", "Sh">; - def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">; - def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">; - def SCALAR_FCVTASH : SInst<"vcvta_s16", "$s", "Sh">; - def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">; - def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">; - def SCALAR_FCVTAUH : SInst<"vcvta_u16", "bs", "Sh">; - def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">; - def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">; - def SCALAR_FCVTMSH : SInst<"vcvtm_s16", "$s", "Sh">; - def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">; - def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "Ls", "Sh">; - def SCALAR_FCVTMUH : SInst<"vcvtm_u16", "bs", "Sh">; - def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">; - def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "Os", "Sh">; - def SCALAR_FCVTNSH : SInst<"vcvtn_s16", "$s", "Sh">; - def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">; - def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "Ls", "Sh">; - def SCALAR_FCVTNUH : SInst<"vcvtn_u16", "bs", "Sh">; - def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">; - def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "Os", "Sh">; - def SCALAR_FCVTPSH : SInst<"vcvtp_s16", "$s", "Sh">; - def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">; - def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "Ls", "Sh">; - def SCALAR_FCVTPUH : SInst<"vcvtp_u16", "bs", "Sh">; - def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">; - def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">; + def SCALAR_SCVTFSH : SInst<"vcvth_f16", "(1F)(1!)", "sUs">; + def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "(1F<)(1!)", "iUi">; + def SCALAR_SCVTFSH2 : SInst<"vcvth_f16", "(1F<<)(1!)", "lUl">; + def SCALAR_FCVTZSH : SInst<"vcvt_s16", "(1S)1", "Sh">; + def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTZUH : SInst<"vcvt_u16", "(1U)1", "Sh">; + def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "(1U>>)1", "Sh">; + def SCALAR_FCVTASH : SInst<"vcvta_s16", "(1S)1", "Sh">; + def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTAUH : SInst<"vcvta_u16", "(1U)1", "Sh">; + def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "(1U>>)1", "Sh">; + def SCALAR_FCVTMSH : SInst<"vcvtm_s16", "(1S)1", "Sh">; + def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTMUH : SInst<"vcvtm_u16", "(1U)1", "Sh">; + def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "(1U>>)1", "Sh">; + def SCALAR_FCVTNSH : SInst<"vcvtn_s16", "(1S)1", "Sh">; + def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTNUH : SInst<"vcvtn_u16", "(1U)1", "Sh">; + def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "(1U>>)1", "Sh">; + def SCALAR_FCVTPSH : SInst<"vcvtp_s16", "(1S)1", "Sh">; + def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTPUH : SInst<"vcvtp_u16", "(1U)1", "Sh">; + def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "(1U>>)1", "Sh">; let isVCVT_N = 1 in { - def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "sUs">; - def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "Ysi", "iUi">; - def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "Ysi", "lUl">; - def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">; - def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">; - def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">; - def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">; - def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">; - def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">; + def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "(1F)(1!)I", "sUs">; + def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "(1F<)(1!)I", "iUi">; + def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "(1F<<)(1!)I", "lUl">; + def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "(1S)1I", "Sh">; + def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "(1S>)1I", "Sh">; + def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "(1S>>)1I", "Sh">; + def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "(1U)1I", "Sh">; + def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "(1U>)1I", "Sh">; + def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "(1U>>)1I", "Sh">; } // Comparison - def SCALAR_CMEQRH : SInst<"vceq", "bss", "Sh">; - def SCALAR_CMEQZH : SInst<"vceqz", "bs", "Sh">; - def SCALAR_CMGERH : SInst<"vcge", "bss", "Sh">; - def SCALAR_CMGEZH : SInst<"vcgez", "bs", "Sh">; - def SCALAR_CMGTRH : SInst<"vcgt", "bss", "Sh">; - def SCALAR_CMGTZH : SInst<"vcgtz", "bs", "Sh">; - def SCALAR_CMLERH : SInst<"vcle", "bss", "Sh">; - def SCALAR_CMLEZH : SInst<"vclez", "bs", "Sh">; - def SCALAR_CMLTH : SInst<"vclt", "bss", "Sh">; - def SCALAR_CMLTZH : SInst<"vcltz", "bs", "Sh">; + def SCALAR_CMEQRH : SInst<"vceq", "(1U)11", "Sh">; + def SCALAR_CMEQZH : SInst<"vceqz", "(1U)1", "Sh">; + def SCALAR_CMGERH : SInst<"vcge", "(1U)11", "Sh">; + def SCALAR_CMGEZH : SInst<"vcgez", "(1U)1", "Sh">; + def SCALAR_CMGTRH : SInst<"vcgt", "(1U)11", "Sh">; + def SCALAR_CMGTZH : SInst<"vcgtz", "(1U)1", "Sh">; + def SCALAR_CMLERH : SInst<"vcle", "(1U)11", "Sh">; + def SCALAR_CMLEZH : SInst<"vclez", "(1U)1", "Sh">; + def SCALAR_CMLTH : SInst<"vclt", "(1U)11", "Sh">; + def SCALAR_CMLTZH : SInst<"vcltz", "(1U)1", "Sh">; // Absolute Compare Mask Greater Than Or Equal - def SCALAR_FACGEH : IInst<"vcage", "bss", "Sh">; - def SCALAR_FACLEH : IInst<"vcale", "bss", "Sh">; + def SCALAR_FACGEH : IInst<"vcage", "(1U)11", "Sh">; + def SCALAR_FACLEH : IInst<"vcale", "(1U)11", "Sh">; // Absolute Compare Mask Greater Than - def SCALAR_FACGT : IInst<"vcagt", "bss", "Sh">; - def SCALAR_FACLT : IInst<"vcalt", "bss", "Sh">; + def SCALAR_FACGT : IInst<"vcagt", "(1U)11", "Sh">; + def SCALAR_FACLT : IInst<"vcalt", "(1U)11", "Sh">; // Scalar Absolute Value - def SCALAR_ABSH : SInst<"vabs", "ss", "Sh">; + def SCALAR_ABSH : SInst<"vabs", "11", "Sh">; // Scalar Absolute Difference - def SCALAR_ABDH: IInst<"vabd", "sss", "Sh">; + def SCALAR_ABDH: IInst<"vabd", "111", "Sh">; // Add/Sub - def VADDSH : SInst<"vadd", "sss", "Sh">; - def VSUBHS : SInst<"vsub", "sss", "Sh">; + def VADDSH : SInst<"vadd", "111", "Sh">; + def VSUBHS : SInst<"vsub", "111", "Sh">; // Max/Min - def VMAXHS : SInst<"vmax", "sss", "Sh">; - def VMINHS : SInst<"vmin", "sss", "Sh">; - def FMAXNMHS : SInst<"vmaxnm", "sss", "Sh">; - def FMINNMHS : SInst<"vminnm", "sss", "Sh">; + def VMAXHS : SInst<"vmax", "111", "Sh">; + def VMINHS : SInst<"vmin", "111", "Sh">; + def FMAXNMHS : SInst<"vmaxnm", "111", "Sh">; + def FMINNMHS : SInst<"vminnm", "111", "Sh">; // Multiplication/Division - def VMULHS : SInst<"vmul", "sss", "Sh">; - def MULXHS : SInst<"vmulx", "sss", "Sh">; - def FDIVHS : SInst<"vdiv", "sss", "Sh">; + def VMULHS : SInst<"vmul", "111", "Sh">; + def MULXHS : SInst<"vmulx", "111", "Sh">; + def FDIVHS : SInst<"vdiv", "111", "Sh">; // Vector fused multiply-add operations - def VFMAHS : SInst<"vfma", "ssss", "Sh">; - def VFMSHS : SInst<"vfms", "ssss", "Sh">; + def VFMAHS : SInst<"vfma", "1111", "Sh">; + def VFMSHS : SInst<"vfms", "1111", "Sh">; } diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 127c5af97ce6..25db142624ee 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -226,240 +226,240 @@ def OP_FMLSL_LN_Hi : Op<(call "vfmlsl_high", $p0, $p1, //////////////////////////////////////////////////////////////////////////////// // E.3.1 Addition -def VADD : IOpInst<"vadd", "ddd", +def VADD : IOpInst<"vadd", "...", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUl", OP_ADD>; -def VADDL : SOpInst<"vaddl", "wdd", "csiUcUsUi", OP_ADDL>; -def VADDW : SOpInst<"vaddw", "wwd", "csiUcUsUi", OP_ADDW>; -def VHADD : SInst<"vhadd", "ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VRHADD : SInst<"vrhadd", "ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VQADD : SInst<"vqadd", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VADDHN : IInst<"vaddhn", "hkk", "silUsUiUl">; -def VRADDHN : IInst<"vraddhn", "hkk", "silUsUiUl">; +def VADDL : SOpInst<"vaddl", "(>Q)..", "csiUcUsUi", OP_ADDL>; +def VADDW : SOpInst<"vaddw", "(>Q)(>Q).", "csiUcUsUi", OP_ADDW>; +def VHADD : SInst<"vhadd", "...", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VRHADD : SInst<"vrhadd", "...", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VQADD : SInst<"vqadd", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VADDHN : IInst<"vaddhn", "; +def VRADDHN : IInst<"vraddhn", "; //////////////////////////////////////////////////////////////////////////////// // E.3.2 Multiplication -def VMUL : IOpInst<"vmul", "ddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MUL>; -def VMULP : SInst<"vmul", "ddd", "PcQPc">; -def VMLA : IOpInst<"vmla", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLA>; -def VMLAL : SOpInst<"vmlal", "wwdd", "csiUcUsUi", OP_MLAL>; -def VMLS : IOpInst<"vmls", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLS>; -def VMLSL : SOpInst<"vmlsl", "wwdd", "csiUcUsUi", OP_MLSL>; -def VQDMULH : SInst<"vqdmulh", "ddd", "siQsQi">; -def VQRDMULH : SInst<"vqrdmulh", "ddd", "siQsQi">; +def VMUL : IOpInst<"vmul", "...", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MUL>; +def VMULP : SInst<"vmul", "...", "PcQPc">; +def VMLA : IOpInst<"vmla", "....", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLA>; +def VMLAL : SOpInst<"vmlal", "(>Q)(>Q)..", "csiUcUsUi", OP_MLAL>; +def VMLS : IOpInst<"vmls", "....", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLS>; +def VMLSL : SOpInst<"vmlsl", "(>Q)(>Q)..", "csiUcUsUi", OP_MLSL>; +def VQDMULH : SInst<"vqdmulh", "...", "siQsQi">; +def VQRDMULH : SInst<"vqrdmulh", "...", "siQsQi">; let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in { -def VQRDMLAH : SOpInst<"vqrdmlah", "dddd", "siQsQi", OP_QRDMLAH>; -def VQRDMLSH : SOpInst<"vqrdmlsh", "dddd", "siQsQi", OP_QRDMLSH>; +def VQRDMLAH : SOpInst<"vqrdmlah", "....", "siQsQi", OP_QRDMLAH>; +def VQRDMLSH : SOpInst<"vqrdmlsh", "....", "siQsQi", OP_QRDMLSH>; } -def VQDMLAL : SInst<"vqdmlal", "wwdd", "si">; -def VQDMLSL : SInst<"vqdmlsl", "wwdd", "si">; -def VMULL : SInst<"vmull", "wdd", "csiUcUsUiPc">; -def VQDMULL : SInst<"vqdmull", "wdd", "si">; +def VQDMLAL : SInst<"vqdmlal", "(>Q)(>Q)..", "si">; +def VQDMLSL : SInst<"vqdmlsl", "(>Q)(>Q)..", "si">; +def VMULL : SInst<"vmull", "(>Q)..", "csiUcUsUiPc">; +def VQDMULL : SInst<"vqdmull", "(>Q)..", "si">; //////////////////////////////////////////////////////////////////////////////// // E.3.3 Subtraction -def VSUB : IOpInst<"vsub", "ddd", +def VSUB : IOpInst<"vsub", "...", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUl", OP_SUB>; -def VSUBL : SOpInst<"vsubl", "wdd", "csiUcUsUi", OP_SUBL>; -def VSUBW : SOpInst<"vsubw", "wwd", "csiUcUsUi", OP_SUBW>; -def VQSUB : SInst<"vqsub", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VHSUB : SInst<"vhsub", "ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VSUBHN : IInst<"vsubhn", "hkk", "silUsUiUl">; -def VRSUBHN : IInst<"vrsubhn", "hkk", "silUsUiUl">; +def VSUBL : SOpInst<"vsubl", "(>Q)..", "csiUcUsUi", OP_SUBL>; +def VSUBW : SOpInst<"vsubw", "(>Q)(>Q).", "csiUcUsUi", OP_SUBW>; +def VQSUB : SInst<"vqsub", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VHSUB : SInst<"vhsub", "...", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VSUBHN : IInst<"vsubhn", "; +def VRSUBHN : IInst<"vrsubhn", "; //////////////////////////////////////////////////////////////////////////////// // E.3.4 Comparison -def VCEQ : IOpInst<"vceq", "udd", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPc", OP_EQ>; -def VCGE : SOpInst<"vcge", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GE>; +def VCEQ : IOpInst<"vceq", "U..", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPc", OP_EQ>; +def VCGE : SOpInst<"vcge", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GE>; let InstName = "vcge" in -def VCLE : SOpInst<"vcle", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LE>; -def VCGT : SOpInst<"vcgt", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GT>; +def VCLE : SOpInst<"vcle", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LE>; +def VCGT : SOpInst<"vcgt", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GT>; let InstName = "vcgt" in -def VCLT : SOpInst<"vclt", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LT>; +def VCLT : SOpInst<"vclt", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LT>; let InstName = "vacge" in { -def VCAGE : IInst<"vcage", "udd", "fQf">; -def VCALE : IInst<"vcale", "udd", "fQf">; +def VCAGE : IInst<"vcage", "U..", "fQf">; +def VCALE : IInst<"vcale", "U..", "fQf">; } let InstName = "vacgt" in { -def VCAGT : IInst<"vcagt", "udd", "fQf">; -def VCALT : IInst<"vcalt", "udd", "fQf">; +def VCAGT : IInst<"vcagt", "U..", "fQf">; +def VCALT : IInst<"vcalt", "U..", "fQf">; } -def VTST : WInst<"vtst", "udd", "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPs">; +def VTST : WInst<"vtst", "U..", "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPs">; //////////////////////////////////////////////////////////////////////////////// // E.3.5 Absolute Difference -def VABD : SInst<"vabd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; -def VABDL : SOpInst<"vabdl", "wdd", "csiUcUsUi", OP_ABDL>; -def VABA : SOpInst<"vaba", "dddd", "csiUcUsUiQcQsQiQUcQUsQUi", OP_ABA>; -def VABAL : SOpInst<"vabal", "wwdd", "csiUcUsUi", OP_ABAL>; +def VABD : SInst<"vabd", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">; +def VABDL : SOpInst<"vabdl", "(>Q)..", "csiUcUsUi", OP_ABDL>; +def VABA : SOpInst<"vaba", "....", "csiUcUsUiQcQsQiQUcQUsQUi", OP_ABA>; +def VABAL : SOpInst<"vabal", "(>Q)(>Q)..", "csiUcUsUi", OP_ABAL>; //////////////////////////////////////////////////////////////////////////////// // E.3.6 Max/Min -def VMAX : SInst<"vmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; -def VMIN : SInst<"vmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; +def VMAX : SInst<"vmax", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">; +def VMIN : SInst<"vmin", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">; //////////////////////////////////////////////////////////////////////////////// // E.3.7 Pairwise Addition -def VPADD : IInst<"vpadd", "ddd", "csiUcUsUif">; -def VPADDL : SInst<"vpaddl", "nd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VPADAL : SInst<"vpadal", "nnd", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VPADD : IInst<"vpadd", "...", "csiUcUsUif">; +def VPADDL : SInst<"vpaddl", ">.", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VPADAL : SInst<"vpadal", ">>.", "csiUcUsUiQcQsQiQUcQUsQUi">; //////////////////////////////////////////////////////////////////////////////// // E.3.8-9 Folding Max/Min -def VPMAX : SInst<"vpmax", "ddd", "csiUcUsUif">; -def VPMIN : SInst<"vpmin", "ddd", "csiUcUsUif">; +def VPMAX : SInst<"vpmax", "...", "csiUcUsUif">; +def VPMIN : SInst<"vpmin", "...", "csiUcUsUif">; //////////////////////////////////////////////////////////////////////////////// // E.3.10 Reciprocal/Sqrt -def VRECPS : IInst<"vrecps", "ddd", "fQf">; -def VRSQRTS : IInst<"vrsqrts", "ddd", "fQf">; +def VRECPS : IInst<"vrecps", "...", "fQf">; +def VRSQRTS : IInst<"vrsqrts", "...", "fQf">; //////////////////////////////////////////////////////////////////////////////// // E.3.11 Shifts by signed variable -def VSHL : SInst<"vshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQSHL : SInst<"vqshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VRSHL : SInst<"vrshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQRSHL : SInst<"vqrshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VSHL : SInst<"vshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VQSHL : SInst<"vqshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VRSHL : SInst<"vrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VQRSHL : SInst<"vqrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; //////////////////////////////////////////////////////////////////////////////// // E.3.12 Shifts by constant let isShift = 1 in { -def VSHR_N : SInst<"vshr_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VSHL_N : IInst<"vshl_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VRSHR_N : SInst<"vrshr_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VSRA_N : SInst<"vsra_n", "dddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VRSRA_N : SInst<"vrsra_n", "dddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQSHL_N : SInst<"vqshl_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQSHLU_N : SInst<"vqshlu_n", "udi", "csilQcQsQiQl">; -def VSHRN_N : IInst<"vshrn_n", "hki", "silUsUiUl">; -def VQSHRUN_N : SInst<"vqshrun_n", "eki", "sil">; -def VQRSHRUN_N : SInst<"vqrshrun_n", "eki", "sil">; -def VQSHRN_N : SInst<"vqshrn_n", "hki", "silUsUiUl">; -def VRSHRN_N : IInst<"vrshrn_n", "hki", "silUsUiUl">; -def VQRSHRN_N : SInst<"vqrshrn_n", "hki", "silUsUiUl">; -def VSHLL_N : SInst<"vshll_n", "wdi", "csiUcUsUi">; +def VSHR_N : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VSHL_N : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VRSHR_N : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VSRA_N : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VRSRA_N : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VQSHL_N : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VQSHLU_N : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl">; +def VSHRN_N : IInst<"vshrn_n", "; +def VQSHRUN_N : SInst<"vqshrun_n", "(; +def VQRSHRUN_N : SInst<"vqrshrun_n", "(; +def VQSHRN_N : SInst<"vqshrn_n", "; +def VRSHRN_N : IInst<"vrshrn_n", "; +def VQRSHRN_N : SInst<"vqrshrn_n", "; +def VSHLL_N : SInst<"vshll_n", "(>Q).I", "csiUcUsUi">; //////////////////////////////////////////////////////////////////////////////// // E.3.13 Shifts with insert -def VSRI_N : WInst<"vsri_n", "dddi", +def VSRI_N : WInst<"vsri_n", "...I", "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">; -def VSLI_N : WInst<"vsli_n", "dddi", +def VSLI_N : WInst<"vsli_n", "...I", "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">; } //////////////////////////////////////////////////////////////////////////////// // E.3.14 Loads and stores of a single vector -def VLD1 : WInst<"vld1", "dc", +def VLD1 : WInst<"vld1", ".(c*!)", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD1_X2 : WInst<"vld1_x2", "2c", +def VLD1_X2 : WInst<"vld1_x2", "2(c*!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VLD1_X3 : WInst<"vld1_x3", "3c", +def VLD1_X3 : WInst<"vld1_x3", "3(c*!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VLD1_X4 : WInst<"vld1_x4", "4c", +def VLD1_X4 : WInst<"vld1_x4", "4(c*!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VLD1_LANE : WInst<"vld1_lane", "dcdi", +def VLD1_LANE : WInst<"vld1_lane", ".(c*!).I", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD1_DUP : WInst<"vld1_dup", "dc", +def VLD1_DUP : WInst<"vld1_dup", ".(c*!)", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST1 : WInst<"vst1", "vpd", +def VST1 : WInst<"vst1", "v*(.!)", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST1_X2 : WInst<"vst1_x2", "vp2", +def VST1_X2 : WInst<"vst1_x2", "v*(2!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VST1_X3 : WInst<"vst1_x3", "vp3", +def VST1_X3 : WInst<"vst1_x3", "v*(3!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VST1_X4 : WInst<"vst1_x4", "vp4", +def VST1_X4 : WInst<"vst1_x4", "v*(4!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VST1_LANE : WInst<"vst1_lane", "vpdi", +def VST1_LANE : WInst<"vst1_lane", "v*(.!)I", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; let ArchGuard = "(__ARM_FP & 2)" in { -def VLD1_F16 : WInst<"vld1", "dc", "hQh">; -def VLD1_X2_F16 : WInst<"vld1_x2", "2c", "hQh">; -def VLD1_X3_F16 : WInst<"vld1_x3", "3c", "hQh">; -def VLD1_X4_F16 : WInst<"vld1_x4", "4c", "hQh">; -def VLD1_LANE_F16 : WInst<"vld1_lane", "dcdi", "hQh">; -def VLD1_DUP_F16 : WInst<"vld1_dup", "dc", "hQh">; -def VST1_F16 : WInst<"vst1", "vpd", "hQh">; -def VST1_X2_F16 : WInst<"vst1_x2", "vp2", "hQh">; -def VST1_X3_F16 : WInst<"vst1_x3", "vp3", "hQh">; -def VST1_X4_F16 : WInst<"vst1_x4", "vp4", "hQh">; -def VST1_LANE_F16 : WInst<"vst1_lane", "vpdi", "hQh">; +def VLD1_F16 : WInst<"vld1", ".(c*!)", "hQh">; +def VLD1_X2_F16 : WInst<"vld1_x2", "2(c*!)", "hQh">; +def VLD1_X3_F16 : WInst<"vld1_x3", "3(c*!)", "hQh">; +def VLD1_X4_F16 : WInst<"vld1_x4", "4(c*!)", "hQh">; +def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh">; +def VLD1_DUP_F16 : WInst<"vld1_dup", ".(c*!)", "hQh">; +def VST1_F16 : WInst<"vst1", "v*(.!)", "hQh">; +def VST1_X2_F16 : WInst<"vst1_x2", "v*(2!)", "hQh">; +def VST1_X3_F16 : WInst<"vst1_x3", "v*(3!)", "hQh">; +def VST1_X4_F16 : WInst<"vst1_x4", "v*(4!)", "hQh">; +def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh">; } //////////////////////////////////////////////////////////////////////////////// // E.3.15 Loads and stores of an N-element structure -def VLD2 : WInst<"vld2", "2c", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD3 : WInst<"vld3", "3c", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD4 : WInst<"vld4", "4c", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD2_DUP : WInst<"vld2_dup", "2c", +def VLD2 : WInst<"vld2", "2(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VLD3 : WInst<"vld3", "3(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VLD4 : WInst<"vld4", "4(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VLD2_DUP : WInst<"vld2_dup", "2(c*!)", "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">; -def VLD3_DUP : WInst<"vld3_dup", "3c", +def VLD3_DUP : WInst<"vld3_dup", "3(c*!)", "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">; -def VLD4_DUP : WInst<"vld4_dup", "4c", +def VLD4_DUP : WInst<"vld4_dup", "4(c*!)", "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">; -def VLD2_LANE : WInst<"vld2_lane", "2c2i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VLD3_LANE : WInst<"vld3_lane", "3c3i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VLD4_LANE : WInst<"vld4_lane", "4c4i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VST2 : WInst<"vst2", "vp2", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST3 : WInst<"vst3", "vp3", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST4 : WInst<"vst4", "vp4", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST2_LANE : WInst<"vst2_lane", "vp2i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VST3_LANE : WInst<"vst3_lane", "vp3i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VST4_LANE : WInst<"vst4_lane", "vp4i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VST2 : WInst<"vst2", "v*(2!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VST3 : WInst<"vst3", "v*(3!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VST4 : WInst<"vst4", "v*(4!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; let ArchGuard = "(__ARM_FP & 2)" in { -def VLD2_F16 : WInst<"vld2", "2c", "hQh">; -def VLD3_F16 : WInst<"vld3", "3c", "hQh">; -def VLD4_F16 : WInst<"vld4", "4c", "hQh">; -def VLD2_DUP_F16 : WInst<"vld2_dup", "2c", "hQh">; -def VLD3_DUP_F16 : WInst<"vld3_dup", "3c", "hQh">; -def VLD4_DUP_F16 : WInst<"vld4_dup", "4c", "hQh">; -def VLD2_LANE_F16 : WInst<"vld2_lane", "2c2i", "hQh">; -def VLD3_LANE_F16 : WInst<"vld3_lane", "3c3i", "hQh">; -def VLD4_LANE_F16 : WInst<"vld4_lane", "4c4i", "hQh">; -def VST2_F16 : WInst<"vst2", "vp2", "hQh">; -def VST3_F16 : WInst<"vst3", "vp3", "hQh">; -def VST4_F16 : WInst<"vst4", "vp4", "hQh">; -def VST2_LANE_F16 : WInst<"vst2_lane", "vp2i", "hQh">; -def VST3_LANE_F16 : WInst<"vst3_lane", "vp3i", "hQh">; -def VST4_LANE_F16 : WInst<"vst4_lane", "vp4i", "hQh">; +def VLD2_F16 : WInst<"vld2", "2(c*!)", "hQh">; +def VLD3_F16 : WInst<"vld3", "3(c*!)", "hQh">; +def VLD4_F16 : WInst<"vld4", "4(c*!)", "hQh">; +def VLD2_DUP_F16 : WInst<"vld2_dup", "2(c*!)", "hQh">; +def VLD3_DUP_F16 : WInst<"vld3_dup", "3(c*!)", "hQh">; +def VLD4_DUP_F16 : WInst<"vld4_dup", "4(c*!)", "hQh">; +def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh">; +def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh">; +def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh">; +def VST2_F16 : WInst<"vst2", "v*(2!)", "hQh">; +def VST3_F16 : WInst<"vst3", "v*(3!)", "hQh">; +def VST4_F16 : WInst<"vst4", "v*(4!)", "hQh">; +def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh">; +def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh">; +def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh">; } //////////////////////////////////////////////////////////////////////////////// // E.3.16 Extract lanes from a vector let InstName = "vmov" in -def VGET_LANE : IInst<"vget_lane", "sdi", +def VGET_LANE : IInst<"vget_lane", "1.I", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">; //////////////////////////////////////////////////////////////////////////////// // E.3.17 Set lanes within a vector let InstName = "vmov" in -def VSET_LANE : IInst<"vset_lane", "dsdi", +def VSET_LANE : IInst<"vset_lane", ".1.I", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">; //////////////////////////////////////////////////////////////////////////////// // E.3.18 Initialize a vector from bit pattern -def VCREATE : NoTestOpInst<"vcreate", "dl", "csihfUcUsUiUlPcPsl", OP_CAST> { +def VCREATE : NoTestOpInst<"vcreate", ".(IU>)", "csihfUcUsUiUlPcPsl", OP_CAST> { let BigEndianSafe = 1; } //////////////////////////////////////////////////////////////////////////////// // E.3.19 Set all lanes to same value let InstName = "vmov" in { -def VDUP_N : WOpInst<"vdup_n", "ds", +def VDUP_N : WOpInst<"vdup_n", ".1", "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl", OP_DUP>; -def VMOV_N : WOpInst<"vmov_n", "ds", +def VMOV_N : WOpInst<"vmov_n", ".1", "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl", OP_DUP>; } let InstName = "" in -def VDUP_LANE: WOpInst<"vdup_lane", "dgi", +def VDUP_LANE: WOpInst<"vdup_lane", ".qI", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", OP_DUP_LN>; //////////////////////////////////////////////////////////////////////////////// // E.3.20 Combining vectors -def VCOMBINE : NoTestOpInst<"vcombine", "kdd", "csilhfUcUsUiUlPcPs", OP_CONC>; +def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPs", OP_CONC>; //////////////////////////////////////////////////////////////////////////////// // E.3.21 Splitting vectors @@ -468,127 +468,127 @@ def VCOMBINE : NoTestOpInst<"vcombine", "kdd", "csilhfUcUsUiUlPcPs", OP_CONC>; // versions of these intrinsics in both AArch32 and AArch64 architectures. See // D45668 for more details. let InstName = "vmov" in { -def VGET_HIGH : NoTestOpInst<"vget_high", "dk", "csilhfUcUsUiUlPcPs", OP_HI>; -def VGET_LOW : NoTestOpInst<"vget_low", "dk", "csilhfUcUsUiUlPcPs", OP_LO>; +def VGET_HIGH : NoTestOpInst<"vget_high", ".Q", "csilhfUcUsUiUlPcPs", OP_HI>; +def VGET_LOW : NoTestOpInst<"vget_low", ".Q", "csilhfUcUsUiUlPcPs", OP_LO>; } //////////////////////////////////////////////////////////////////////////////// // E.3.22 Converting vectors let ArchGuard = "(__ARM_FP & 2)" in { - def VCVT_F16_F32 : SInst<"vcvt_f16_f32", "md", "Hf">; - def VCVT_F32_F16 : SInst<"vcvt_f32_f16", "wd", "h">; + def VCVT_F16_F32 : SInst<"vcvt_f16_f32", "(; + def VCVT_F32_F16 : SInst<"vcvt_f32_f16", "(>Q)(.!)", "h">; } -def VCVT_S32 : SInst<"vcvt_s32", "xd", "fQf">; -def VCVT_U32 : SInst<"vcvt_u32", "ud", "fQf">; -def VCVT_F32 : SInst<"vcvt_f32", "fd", "iUiQiQUi">; +def VCVT_S32 : SInst<"vcvt_s32", "S.", "fQf">; +def VCVT_U32 : SInst<"vcvt_u32", "U.", "fQf">; +def VCVT_F32 : SInst<"vcvt_f32", "F(.!)", "iUiQiQUi">; let isVCVT_N = 1 in { -def VCVT_N_S32 : SInst<"vcvt_n_s32", "xdi", "fQf">; -def VCVT_N_U32 : SInst<"vcvt_n_u32", "udi", "fQf">; -def VCVT_N_F32 : SInst<"vcvt_n_f32", "fdi", "iUiQiQUi">; +def VCVT_N_S32 : SInst<"vcvt_n_s32", "S.I", "fQf">; +def VCVT_N_U32 : SInst<"vcvt_n_u32", "U.I", "fQf">; +def VCVT_N_F32 : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi">; } -def VMOVN : IInst<"vmovn", "hk", "silUsUiUl">; -def VMOVL : SInst<"vmovl", "wd", "csiUcUsUi">; -def VQMOVN : SInst<"vqmovn", "hk", "silUsUiUl">; -def VQMOVUN : SInst<"vqmovun", "ek", "sil">; +def VMOVN : IInst<"vmovn", "; +def VMOVL : SInst<"vmovl", "(>Q).", "csiUcUsUi">; +def VQMOVN : SInst<"vqmovn", "; +def VQMOVUN : SInst<"vqmovun", "(; //////////////////////////////////////////////////////////////////////////////// // E.3.23-24 Table lookup, Extended table lookup let InstName = "vtbl" in { -def VTBL1 : WInst<"vtbl1", "ddt", "UccPc">; -def VTBL2 : WInst<"vtbl2", "d2t", "UccPc">; -def VTBL3 : WInst<"vtbl3", "d3t", "UccPc">; -def VTBL4 : WInst<"vtbl4", "d4t", "UccPc">; +def VTBL1 : WInst<"vtbl1", "..p", "UccPc">; +def VTBL2 : WInst<"vtbl2", ".2p", "UccPc">; +def VTBL3 : WInst<"vtbl3", ".3p", "UccPc">; +def VTBL4 : WInst<"vtbl4", ".4p", "UccPc">; } let InstName = "vtbx" in { -def VTBX1 : WInst<"vtbx1", "dddt", "UccPc">; -def VTBX2 : WInst<"vtbx2", "dd2t", "UccPc">; -def VTBX3 : WInst<"vtbx3", "dd3t", "UccPc">; -def VTBX4 : WInst<"vtbx4", "dd4t", "UccPc">; +def VTBX1 : WInst<"vtbx1", "...p", "UccPc">; +def VTBX2 : WInst<"vtbx2", "..2p", "UccPc">; +def VTBX3 : WInst<"vtbx3", "..3p", "UccPc">; +def VTBX4 : WInst<"vtbx4", "..4p", "UccPc">; } //////////////////////////////////////////////////////////////////////////////// // E.3.25 Operations with a scalar value -def VMLA_LANE : IOpInst<"vmla_lane", "dddgi", +def VMLA_LANE : IOpInst<"vmla_lane", "...qI", "siUsUifQsQiQUsQUiQf", OP_MLA_LN>; -def VMLAL_LANE : SOpInst<"vmlal_lane", "wwddi", "siUsUi", OP_MLAL_LN>; -def VQDMLAL_LANE : SOpInst<"vqdmlal_lane", "wwddi", "si", OP_QDMLAL_LN>; -def VMLS_LANE : IOpInst<"vmls_lane", "dddgi", +def VMLAL_LANE : SOpInst<"vmlal_lane", "(>Q)(>Q)..I", "siUsUi", OP_MLAL_LN>; +def VQDMLAL_LANE : SOpInst<"vqdmlal_lane", "(>Q)(>Q)..I", "si", OP_QDMLAL_LN>; +def VMLS_LANE : IOpInst<"vmls_lane", "...qI", "siUsUifQsQiQUsQUiQf", OP_MLS_LN>; -def VMLSL_LANE : SOpInst<"vmlsl_lane", "wwddi", "siUsUi", OP_MLSL_LN>; -def VQDMLSL_LANE : SOpInst<"vqdmlsl_lane", "wwddi", "si", OP_QDMLSL_LN>; -def VMUL_N : IOpInst<"vmul_n", "dds", "sifUsUiQsQiQfQUsQUi", OP_MUL_N>; -def VMUL_LANE : IOpInst<"vmul_lane", "ddgi", +def VMLSL_LANE : SOpInst<"vmlsl_lane", "(>Q)(>Q)..I", "siUsUi", OP_MLSL_LN>; +def VQDMLSL_LANE : SOpInst<"vqdmlsl_lane", "(>Q)(>Q)..I", "si", OP_QDMLSL_LN>; +def VMUL_N : IOpInst<"vmul_n", "..1", "sifUsUiQsQiQfQUsQUi", OP_MUL_N>; +def VMUL_LANE : IOpInst<"vmul_lane", "..qI", "sifUsUiQsQiQfQUsQUi", OP_MUL_LN>; -def VMULL_N : SOpInst<"vmull_n", "wds", "siUsUi", OP_MULL_N>; -def VMULL_LANE : SOpInst<"vmull_lane", "wddi", "siUsUi", OP_MULL_LN>; -def VQDMULL_N : SOpInst<"vqdmull_n", "wds", "si", OP_QDMULL_N>; -def VQDMULL_LANE : SOpInst<"vqdmull_lane", "wddi", "si", OP_QDMULL_LN>; -def VQDMULH_N : SOpInst<"vqdmulh_n", "dds", "siQsQi", OP_QDMULH_N>; -def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "ddgi", "siQsQi", OP_QDMULH_LN>; -def VQRDMULH_N : SOpInst<"vqrdmulh_n", "dds", "siQsQi", OP_QRDMULH_N>; -def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ddgi", "siQsQi", OP_QRDMULH_LN>; +def VMULL_N : SOpInst<"vmull_n", "(>Q).1", "siUsUi", OP_MULL_N>; +def VMULL_LANE : SOpInst<"vmull_lane", "(>Q)..I", "siUsUi", OP_MULL_LN>; +def VQDMULL_N : SOpInst<"vqdmull_n", "(>Q).1", "si", OP_QDMULL_N>; +def VQDMULL_LANE : SOpInst<"vqdmull_lane", "(>Q)..I", "si", OP_QDMULL_LN>; +def VQDMULH_N : SOpInst<"vqdmulh_n", "..1", "siQsQi", OP_QDMULH_N>; +def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "..qI", "siQsQi", OP_QDMULH_LN>; +def VQRDMULH_N : SOpInst<"vqrdmulh_n", "..1", "siQsQi", OP_QRDMULH_N>; +def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>; let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in { -def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "dddgi", "siQsQi", OP_QRDMLAH_LN>; -def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "dddgi", "siQsQi", OP_QRDMLSH_LN>; +def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "...qI", "siQsQi", OP_QRDMLAH_LN>; +def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "...qI", "siQsQi", OP_QRDMLSH_LN>; } -def VMLA_N : IOpInst<"vmla_n", "ddds", "siUsUifQsQiQUsQUiQf", OP_MLA_N>; -def VMLAL_N : SOpInst<"vmlal_n", "wwds", "siUsUi", OP_MLAL_N>; -def VQDMLAL_N : SOpInst<"vqdmlal_n", "wwds", "si", OP_QDMLAL_N>; -def VMLS_N : IOpInst<"vmls_n", "ddds", "siUsUifQsQiQUsQUiQf", OP_MLS_N>; -def VMLSL_N : SOpInst<"vmlsl_n", "wwds", "siUsUi", OP_MLSL_N>; -def VQDMLSL_N : SOpInst<"vqdmlsl_n", "wwds", "si", OP_QDMLSL_N>; +def VMLA_N : IOpInst<"vmla_n", "...1", "siUsUifQsQiQUsQUiQf", OP_MLA_N>; +def VMLAL_N : SOpInst<"vmlal_n", "(>Q)(>Q).1", "siUsUi", OP_MLAL_N>; +def VQDMLAL_N : SOpInst<"vqdmlal_n", "(>Q)(>Q).1", "si", OP_QDMLAL_N>; +def VMLS_N : IOpInst<"vmls_n", "...1", "siUsUifQsQiQUsQUiQf", OP_MLS_N>; +def VMLSL_N : SOpInst<"vmlsl_n", "(>Q)(>Q).1", "siUsUi", OP_MLSL_N>; +def VQDMLSL_N : SOpInst<"vqdmlsl_n", "(>Q)(>Q).1", "si", OP_QDMLSL_N>; //////////////////////////////////////////////////////////////////////////////// // E.3.26 Vector Extract -def VEXT : WInst<"vext", "dddi", +def VEXT : WInst<"vext", "...I", "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf">; //////////////////////////////////////////////////////////////////////////////// // E.3.27 Reverse vector elements -def VREV64 : WOpInst<"vrev64", "dd", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQf", +def VREV64 : WOpInst<"vrev64", "..", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQf", OP_REV64>; -def VREV32 : WOpInst<"vrev32", "dd", "csUcUsPcPsQcQsQUcQUsQPcQPs", OP_REV32>; -def VREV16 : WOpInst<"vrev16", "dd", "cUcPcQcQUcQPc", OP_REV16>; +def VREV32 : WOpInst<"vrev32", "..", "csUcUsPcPsQcQsQUcQUsQPcQPs", OP_REV32>; +def VREV16 : WOpInst<"vrev16", "..", "cUcPcQcQUcQPc", OP_REV16>; //////////////////////////////////////////////////////////////////////////////// // E.3.28 Other single operand arithmetic -def VABS : SInst<"vabs", "dd", "csifQcQsQiQf">; -def VQABS : SInst<"vqabs", "dd", "csiQcQsQi">; -def VNEG : SOpInst<"vneg", "dd", "csifQcQsQiQf", OP_NEG>; -def VQNEG : SInst<"vqneg", "dd", "csiQcQsQi">; -def VCLS : SInst<"vcls", "dd", "csiQcQsQi">; -def VCLZ : IInst<"vclz", "dd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VCNT : WInst<"vcnt", "dd", "UccPcQUcQcQPc">; -def VRECPE : SInst<"vrecpe", "dd", "fUiQfQUi">; -def VRSQRTE : SInst<"vrsqrte", "dd", "fUiQfQUi">; +def VABS : SInst<"vabs", "..", "csifQcQsQiQf">; +def VQABS : SInst<"vqabs", "..", "csiQcQsQi">; +def VNEG : SOpInst<"vneg", "..", "csifQcQsQiQf", OP_NEG>; +def VQNEG : SInst<"vqneg", "..", "csiQcQsQi">; +def VCLS : SInst<"vcls", "..", "csiQcQsQi">; +def VCLZ : IInst<"vclz", "..", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VCNT : WInst<"vcnt", "..", "UccPcQUcQcQPc">; +def VRECPE : SInst<"vrecpe", "..", "fUiQfQUi">; +def VRSQRTE : SInst<"vrsqrte", "..", "fUiQfQUi">; //////////////////////////////////////////////////////////////////////////////// // E.3.29 Logical operations -def VMVN : LOpInst<"vmvn", "dd", "csiUcUsUiPcQcQsQiQUcQUsQUiQPc", OP_NOT>; -def VAND : LOpInst<"vand", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_AND>; -def VORR : LOpInst<"vorr", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>; -def VEOR : LOpInst<"veor", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>; -def VBIC : LOpInst<"vbic", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>; -def VORN : LOpInst<"vorn", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>; +def VMVN : LOpInst<"vmvn", "..", "csiUcUsUiPcQcQsQiQUcQUsQUiQPc", OP_NOT>; +def VAND : LOpInst<"vand", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_AND>; +def VORR : LOpInst<"vorr", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>; +def VEOR : LOpInst<"veor", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>; +def VBIC : LOpInst<"vbic", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>; +def VORN : LOpInst<"vorn", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>; let isHiddenLInst = 1 in -def VBSL : SInst<"vbsl", "dudd", +def VBSL : SInst<"vbsl", ".U..", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">; //////////////////////////////////////////////////////////////////////////////// // E.3.30 Transposition operations -def VTRN : WInst<"vtrn", "2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; -def VZIP : WInst<"vzip", "2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; -def VUZP : WInst<"vuzp", "2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; +def VTRN : WInst<"vtrn", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; +def VZIP : WInst<"vzip", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; +def VUZP : WInst<"vuzp", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; //////////////////////////////////////////////////////////////////////////////// // E.3.31 Vector reinterpret cast operations def VREINTERPRET - : NoTestOpInst<"vreinterpret", "dd", + : NoTestOpInst<"vreinterpret", "..", "csilUcUsUiUlhfPcPsQcQsQiQlQUcQUsQUiQUlQhQfQPcQPs", OP_REINT> { let CartesianProductOfTypes = 1; let ArchGuard = "!defined(__aarch64__)"; @@ -599,17 +599,17 @@ def VREINTERPRET // Vector fused multiply-add operations let ArchGuard = "defined(__ARM_FEATURE_FMA)" in { - def VFMA : SInst<"vfma", "dddd", "fQf">; - def VFMS : SOpInst<"vfms", "dddd", "fQf", OP_FMLS>; - def FMLA_N_F32 : SOpInst<"vfma_n", "ddds", "fQf", OP_FMLA_N>; + def VFMA : SInst<"vfma", "....", "fQf">; + def VFMS : SOpInst<"vfms", "....", "fQf", OP_FMLS>; + def FMLA_N_F32 : SOpInst<"vfma_n", "...1", "fQf", OP_FMLA_N>; } //////////////////////////////////////////////////////////////////////////////// // fp16 vector operations -def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "sdi", "h", OP_SCALAR_HALF_GET_LN>; -def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", "dsdi", "h", OP_SCALAR_HALF_SET_LN>; -def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "sdi", "Qh", OP_SCALAR_HALF_GET_LNQ>; -def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", "dsdi", "Qh", OP_SCALAR_HALF_SET_LNQ>; +def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "1.I", "h", OP_SCALAR_HALF_GET_LN>; +def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", ".1.I", "h", OP_SCALAR_HALF_SET_LN>; +def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "1.I", "Qh", OP_SCALAR_HALF_GET_LNQ>; +def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", ".1.I", "Qh", OP_SCALAR_HALF_SET_LNQ>; //////////////////////////////////////////////////////////////////////////////// // AArch64 Intrinsics @@ -618,474 +618,474 @@ let ArchGuard = "defined(__aarch64__)" in { //////////////////////////////////////////////////////////////////////////////// // Load/Store -def LD1 : WInst<"vld1", "dc", "dQdPlQPl">; -def LD2 : WInst<"vld2", "2c", "QUlQldQdPlQPl">; -def LD3 : WInst<"vld3", "3c", "QUlQldQdPlQPl">; -def LD4 : WInst<"vld4", "4c", "QUlQldQdPlQPl">; -def ST1 : WInst<"vst1", "vpd", "dQdPlQPl">; -def ST2 : WInst<"vst2", "vp2", "QUlQldQdPlQPl">; -def ST3 : WInst<"vst3", "vp3", "QUlQldQdPlQPl">; -def ST4 : WInst<"vst4", "vp4", "QUlQldQdPlQPl">; +def LD1 : WInst<"vld1", ".(c*!)", "dQdPlQPl">; +def LD2 : WInst<"vld2", "2(c*!)", "QUlQldQdPlQPl">; +def LD3 : WInst<"vld3", "3(c*!)", "QUlQldQdPlQPl">; +def LD4 : WInst<"vld4", "4(c*!)", "QUlQldQdPlQPl">; +def ST1 : WInst<"vst1", "v*(.!)", "dQdPlQPl">; +def ST2 : WInst<"vst2", "v*(2!)", "QUlQldQdPlQPl">; +def ST3 : WInst<"vst3", "v*(3!)", "QUlQldQdPlQPl">; +def ST4 : WInst<"vst4", "v*(4!)", "QUlQldQdPlQPl">; -def LD1_X2 : WInst<"vld1_x2", "2c", +def LD1_X2 : WInst<"vld1_x2", "2(c*!)", "dQdPlQPl">; -def LD1_X3 : WInst<"vld1_x3", "3c", +def LD1_X3 : WInst<"vld1_x3", "3(c*!)", "dQdPlQPl">; -def LD1_X4 : WInst<"vld1_x4", "4c", +def LD1_X4 : WInst<"vld1_x4", "4(c*!)", "dQdPlQPl">; -def ST1_X2 : WInst<"vst1_x2", "vp2", "dQdPlQPl">; -def ST1_X3 : WInst<"vst1_x3", "vp3", "dQdPlQPl">; -def ST1_X4 : WInst<"vst1_x4", "vp4", "dQdPlQPl">; +def ST1_X2 : WInst<"vst1_x2", "v*(2!)", "dQdPlQPl">; +def ST1_X3 : WInst<"vst1_x3", "v*(3!)", "dQdPlQPl">; +def ST1_X4 : WInst<"vst1_x4", "v*(4!)", "dQdPlQPl">; -def LD1_LANE : WInst<"vld1_lane", "dcdi", "dQdPlQPl">; -def LD2_LANE : WInst<"vld2_lane", "2c2i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def LD3_LANE : WInst<"vld3_lane", "3c3i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def LD4_LANE : WInst<"vld4_lane", "4c4i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def ST1_LANE : WInst<"vst1_lane", "vpdi", "dQdPlQPl">; -def ST2_LANE : WInst<"vst2_lane", "vp2i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def ST3_LANE : WInst<"vst3_lane", "vp3i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def ST4_LANE : WInst<"vst4_lane", "vp4i", "lUlQcQUcQPcQlQUldQdPlQPl">; +def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl">; +def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl">; +def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl">; -def LD1_DUP : WInst<"vld1_dup", "dc", "dQdPlQPl">; -def LD2_DUP : WInst<"vld2_dup", "2c", "dQdPlQPl">; -def LD3_DUP : WInst<"vld3_dup", "3c", "dQdPlQPl">; -def LD4_DUP : WInst<"vld4_dup", "4c", "dQdPlQPl">; +def LD1_DUP : WInst<"vld1_dup", ".(c*!)", "dQdPlQPl">; +def LD2_DUP : WInst<"vld2_dup", "2(c*!)", "dQdPlQPl">; +def LD3_DUP : WInst<"vld3_dup", "3(c*!)", "dQdPlQPl">; +def LD4_DUP : WInst<"vld4_dup", "4(c*!)", "dQdPlQPl">; -def VLDRQ : WInst<"vldrq", "sc", "Pk">; -def VSTRQ : WInst<"vstrq", "vps", "Pk">; +def VLDRQ : WInst<"vldrq", "1(c*!)", "Pk">; +def VSTRQ : WInst<"vstrq", "v*(1!)", "Pk">; //////////////////////////////////////////////////////////////////////////////// // Addition -def ADD : IOpInst<"vadd", "ddd", "dQd", OP_ADD>; +def ADD : IOpInst<"vadd", "...", "dQd", OP_ADD>; //////////////////////////////////////////////////////////////////////////////// // Subtraction -def SUB : IOpInst<"vsub", "ddd", "dQd", OP_SUB>; +def SUB : IOpInst<"vsub", "...", "dQd", OP_SUB>; //////////////////////////////////////////////////////////////////////////////// // Multiplication -def MUL : IOpInst<"vmul", "ddd", "dQd", OP_MUL>; -def MLA : IOpInst<"vmla", "dddd", "dQd", OP_MLA>; -def MLS : IOpInst<"vmls", "dddd", "dQd", OP_MLS>; +def MUL : IOpInst<"vmul", "...", "dQd", OP_MUL>; +def MLA : IOpInst<"vmla", "....", "dQd", OP_MLA>; +def MLS : IOpInst<"vmls", "....", "dQd", OP_MLS>; //////////////////////////////////////////////////////////////////////////////// // Multiplication Extended -def MULX : SInst<"vmulx", "ddd", "fdQfQd">; +def MULX : SInst<"vmulx", "...", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// // Division -def FDIV : IOpInst<"vdiv", "ddd", "fdQfQd", OP_DIV>; +def FDIV : IOpInst<"vdiv", "...", "fdQfQd", OP_DIV>; //////////////////////////////////////////////////////////////////////////////// // Vector fused multiply-add operations -def FMLA : SInst<"vfma", "dddd", "dQd">; -def FMLS : SOpInst<"vfms", "dddd", "dQd", OP_FMLS>; +def FMLA : SInst<"vfma", "....", "dQd">; +def FMLS : SOpInst<"vfms", "....", "dQd", OP_FMLS>; //////////////////////////////////////////////////////////////////////////////// // MUL, MLA, MLS, FMA, FMS definitions with scalar argument -def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>; +def VMUL_N_A64 : IOpInst<"vmul_n", "..1", "Qd", OP_MUL_N>; -def FMLA_N : SOpInst<"vfma_n", "ddds", "dQd", OP_FMLA_N>; -def FMLS_N : SOpInst<"vfms_n", "ddds", "fdQfQd", OP_FMLS_N>; +def FMLA_N : SOpInst<"vfma_n", "...1", "dQd", OP_FMLA_N>; +def FMLS_N : SOpInst<"vfms_n", "...1", "fdQfQd", OP_FMLS_N>; -def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>; -def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>; +def MLA_N : SOpInst<"vmla_n", "...1", "Qd", OP_MLA_N>; +def MLS_N : SOpInst<"vmls_n", "...1", "Qd", OP_MLS_N>; //////////////////////////////////////////////////////////////////////////////// // Logical operations -def BSL : SInst<"vbsl", "dudd", "dPlQdQPl">; +def BSL : SInst<"vbsl", ".U..", "dPlQdQPl">; //////////////////////////////////////////////////////////////////////////////// // Absolute Difference -def ABD : SInst<"vabd", "ddd", "dQd">; +def ABD : SInst<"vabd", "...", "dQd">; //////////////////////////////////////////////////////////////////////////////// // saturating absolute/negate -def ABS : SInst<"vabs", "dd", "dQdlQl">; -def QABS : SInst<"vqabs", "dd", "lQl">; -def NEG : SOpInst<"vneg", "dd", "dlQdQl", OP_NEG>; -def QNEG : SInst<"vqneg", "dd", "lQl">; +def ABS : SInst<"vabs", "..", "dQdlQl">; +def QABS : SInst<"vqabs", "..", "lQl">; +def NEG : SOpInst<"vneg", "..", "dlQdQl", OP_NEG>; +def QNEG : SInst<"vqneg", "..", "lQl">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Accumulated of Unsigned Value -def SUQADD : SInst<"vuqadd", "ddu", "csilQcQsQiQl">; +def SUQADD : SInst<"vuqadd", "..U", "csilQcQsQiQl">; //////////////////////////////////////////////////////////////////////////////// // Unsigned Saturating Accumulated of Signed Value -def USQADD : SInst<"vsqadd", "ddx", "UcUsUiUlQUcQUsQUiQUl">; +def USQADD : SInst<"vsqadd", "..S", "UcUsUiUlQUcQUsQUiQUl">; //////////////////////////////////////////////////////////////////////////////// // Reciprocal/Sqrt -def FRECPS : IInst<"vrecps", "ddd", "dQd">; -def FRSQRTS : IInst<"vrsqrts", "ddd", "dQd">; -def FRECPE : SInst<"vrecpe", "dd", "dQd">; -def FRSQRTE : SInst<"vrsqrte", "dd", "dQd">; -def FSQRT : SInst<"vsqrt", "dd", "fdQfQd">; +def FRECPS : IInst<"vrecps", "...", "dQd">; +def FRSQRTS : IInst<"vrsqrts", "...", "dQd">; +def FRECPE : SInst<"vrecpe", "..", "dQd">; +def FRSQRTE : SInst<"vrsqrte", "..", "dQd">; +def FSQRT : SInst<"vsqrt", "..", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// // bitwise reverse -def RBIT : IInst<"vrbit", "dd", "cUcPcQcQUcQPc">; +def RBIT : IInst<"vrbit", "..", "cUcPcQcQUcQPc">; //////////////////////////////////////////////////////////////////////////////// // Integer extract and narrow to high -def XTN2 : SOpInst<"vmovn_high", "qhk", "silUsUiUl", OP_XTN>; +def XTN2 : SOpInst<"vmovn_high", "(; //////////////////////////////////////////////////////////////////////////////// // Signed integer saturating extract and unsigned narrow to high -def SQXTUN2 : SOpInst<"vqmovun_high", "emd", "HsHiHl", OP_SQXTUN>; +def SQXTUN2 : SOpInst<"vqmovun_high", "(; //////////////////////////////////////////////////////////////////////////////// // Integer saturating extract and narrow to high -def QXTN2 : SOpInst<"vqmovn_high", "qhk", "silUsUiUl", OP_QXTN>; +def QXTN2 : SOpInst<"vqmovn_high", "(; //////////////////////////////////////////////////////////////////////////////// // Converting vectors -def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "md", "Qd">; -def VCVT_F64_F32 : SInst<"vcvt_f64_f32", "wd", "f">; +def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "(; +def VCVT_F64_F32 : SInst<"vcvt_f64_f32", "(>Q).", "f">; -def VCVT_S64 : SInst<"vcvt_s64", "xd", "dQd">; -def VCVT_U64 : SInst<"vcvt_u64", "ud", "dQd">; -def VCVT_F64 : SInst<"vcvt_f64", "Fd", "lUlQlQUl">; +def VCVT_S64 : SInst<"vcvt_s64", "S.", "dQd">; +def VCVT_U64 : SInst<"vcvt_u64", "U.", "dQd">; +def VCVT_F64 : SInst<"vcvt_f64", "F(.!)", "lUlQlQUl">; -def VCVT_HIGH_F16_F32 : SOpInst<"vcvt_high_f16", "hmj", "Hf", OP_VCVT_NA_HI_F16>; -def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "wk", "h", OP_VCVT_EX_HI_F32>; -def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "qfj", "d", OP_VCVT_NA_HI_F32>; -def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI_F64>; +def VCVT_HIGH_F16_F32 : SOpInst<"vcvt_high_f16", "<(; +def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "(>Q)(Q!)", "h", OP_VCVT_EX_HI_F32>; +def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "(; +def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "(>Q)(Q!)", "f", OP_VCVT_EX_HI_F64>; -def VCVTX_F32_F64 : SInst<"vcvtx_f32", "fj", "d">; -def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "qfj", "d", OP_VCVTX_HI>; +def VCVTX_F32_F64 : SInst<"vcvtx_f32", "(F<)(Q!)", "d">; +def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "(; //////////////////////////////////////////////////////////////////////////////// // Comparison -def FCAGE : IInst<"vcage", "udd", "dQd">; -def FCAGT : IInst<"vcagt", "udd", "dQd">; -def FCALE : IInst<"vcale", "udd", "dQd">; -def FCALT : IInst<"vcalt", "udd", "dQd">; -def CMTST : WInst<"vtst", "udd", "lUlPlQlQUlQPl">; -def CFMEQ : SOpInst<"vceq", "udd", "lUldQdQlQUlPlQPl", OP_EQ>; -def CFMGE : SOpInst<"vcge", "udd", "lUldQdQlQUl", OP_GE>; -def CFMLE : SOpInst<"vcle", "udd", "lUldQdQlQUl", OP_LE>; -def CFMGT : SOpInst<"vcgt", "udd", "lUldQdQlQUl", OP_GT>; -def CFMLT : SOpInst<"vclt", "udd", "lUldQdQlQUl", OP_LT>; +def FCAGE : IInst<"vcage", "U..", "dQd">; +def FCAGT : IInst<"vcagt", "U..", "dQd">; +def FCALE : IInst<"vcale", "U..", "dQd">; +def FCALT : IInst<"vcalt", "U..", "dQd">; +def CMTST : WInst<"vtst", "U..", "lUlPlQlQUlQPl">; +def CFMEQ : SOpInst<"vceq", "U..", "lUldQdQlQUlPlQPl", OP_EQ>; +def CFMGE : SOpInst<"vcge", "U..", "lUldQdQlQUl", OP_GE>; +def CFMLE : SOpInst<"vcle", "U..", "lUldQdQlQUl", OP_LE>; +def CFMGT : SOpInst<"vcgt", "U..", "lUldQdQlQUl", OP_GT>; +def CFMLT : SOpInst<"vclt", "U..", "lUldQdQlQUl", OP_LT>; -def CMEQ : SInst<"vceqz", "ud", +def CMEQ : SInst<"vceqz", "U.", "csilfUcUsUiUlPcPsPlQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQdQPl">; -def CMGE : SInst<"vcgez", "ud", "csilfdQcQsQiQlQfQd">; -def CMLE : SInst<"vclez", "ud", "csilfdQcQsQiQlQfQd">; -def CMGT : SInst<"vcgtz", "ud", "csilfdQcQsQiQlQfQd">; -def CMLT : SInst<"vcltz", "ud", "csilfdQcQsQiQlQfQd">; +def CMGE : SInst<"vcgez", "U.", "csilfdQcQsQiQlQfQd">; +def CMLE : SInst<"vclez", "U.", "csilfdQcQsQiQlQfQd">; +def CMGT : SInst<"vcgtz", "U.", "csilfdQcQsQiQlQfQd">; +def CMLT : SInst<"vcltz", "U.", "csilfdQcQsQiQlQfQd">; //////////////////////////////////////////////////////////////////////////////// // Max/Min Integer -def MAX : SInst<"vmax", "ddd", "dQd">; -def MIN : SInst<"vmin", "ddd", "dQd">; +def MAX : SInst<"vmax", "...", "dQd">; +def MIN : SInst<"vmin", "...", "dQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise Max/Min -def MAXP : SInst<"vpmax", "ddd", "QcQsQiQUcQUsQUiQfQd">; -def MINP : SInst<"vpmin", "ddd", "QcQsQiQUcQUsQUiQfQd">; +def MAXP : SInst<"vpmax", "...", "QcQsQiQUcQUsQUiQfQd">; +def MINP : SInst<"vpmin", "...", "QcQsQiQUcQUsQUiQfQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise MaxNum/MinNum Floating Point -def FMAXNMP : SInst<"vpmaxnm", "ddd", "fQfQd">; -def FMINNMP : SInst<"vpminnm", "ddd", "fQfQd">; +def FMAXNMP : SInst<"vpmaxnm", "...", "fQfQd">; +def FMINNMP : SInst<"vpminnm", "...", "fQfQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise Addition -def ADDP : IInst<"vpadd", "ddd", "QcQsQiQlQUcQUsQUiQUlQfQd">; +def ADDP : IInst<"vpadd", "...", "QcQsQiQlQUcQUsQUiQUlQfQd">; //////////////////////////////////////////////////////////////////////////////// // Shifts by constant let isShift = 1 in { // Left shift long high -def SHLL_HIGH_N : SOpInst<"vshll_high_n", "ndi", "HcHsHiHUcHUsHUi", +def SHLL_HIGH_N : SOpInst<"vshll_high_n", ">.I", "HcHsHiHUcHUsHUi", OP_LONG_HI>; //////////////////////////////////////////////////////////////////////////////// -def SRI_N : WInst<"vsri_n", "dddi", "PlQPl">; -def SLI_N : WInst<"vsli_n", "dddi", "PlQPl">; +def SRI_N : WInst<"vsri_n", "...I", "PlQPl">; +def SLI_N : WInst<"vsli_n", "...I", "PlQPl">; // Right shift narrow high -def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "hmdi", +def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "<(; -def QSHRUN_HIGH_N : SOpInst<"vqshrun_high_n", "hmdi", +def QSHRUN_HIGH_N : SOpInst<"vqshrun_high_n", "<(; -def RSHRN_HIGH_N : IOpInst<"vrshrn_high_n", "hmdi", +def RSHRN_HIGH_N : IOpInst<"vrshrn_high_n", "<(; -def QRSHRUN_HIGH_N : SOpInst<"vqrshrun_high_n", "hmdi", +def QRSHRUN_HIGH_N : SOpInst<"vqrshrun_high_n", "<(; -def QSHRN_HIGH_N : SOpInst<"vqshrn_high_n", "hmdi", +def QSHRN_HIGH_N : SOpInst<"vqshrn_high_n", "<(; -def QRSHRN_HIGH_N : SOpInst<"vqrshrn_high_n", "hmdi", +def QRSHRN_HIGH_N : SOpInst<"vqrshrn_high_n", "<(; } //////////////////////////////////////////////////////////////////////////////// // Converting vectors -def VMOVL_HIGH : SOpInst<"vmovl_high", "nd", "HcHsHiHUcHUsHUi", OP_MOVL_HI>; +def VMOVL_HIGH : SOpInst<"vmovl_high", ">.", "HcHsHiHUcHUsHUi", OP_MOVL_HI>; let isVCVT_N = 1 in { -def CVTF_N_F64 : SInst<"vcvt_n_f64", "Fdi", "lUlQlQUl">; -def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "xdi", "dQd">; -def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "udi", "dQd">; +def CVTF_N_F64 : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl">; +def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd">; +def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd">; } //////////////////////////////////////////////////////////////////////////////// // 3VDiff class using high 64-bit in operands -def VADDL_HIGH : SOpInst<"vaddl_high", "wkk", "csiUcUsUi", OP_ADDLHi>; -def VADDW_HIGH : SOpInst<"vaddw_high", "wwk", "csiUcUsUi", OP_ADDWHi>; -def VSUBL_HIGH : SOpInst<"vsubl_high", "wkk", "csiUcUsUi", OP_SUBLHi>; -def VSUBW_HIGH : SOpInst<"vsubw_high", "wwk", "csiUcUsUi", OP_SUBWHi>; +def VADDL_HIGH : SOpInst<"vaddl_high", "(>Q)QQ", "csiUcUsUi", OP_ADDLHi>; +def VADDW_HIGH : SOpInst<"vaddw_high", "(>Q)(>Q)Q", "csiUcUsUi", OP_ADDWHi>; +def VSUBL_HIGH : SOpInst<"vsubl_high", "(>Q)QQ", "csiUcUsUi", OP_SUBLHi>; +def VSUBW_HIGH : SOpInst<"vsubw_high", "(>Q)(>Q)Q", "csiUcUsUi", OP_SUBWHi>; -def VABDL_HIGH : SOpInst<"vabdl_high", "wkk", "csiUcUsUi", OP_ABDLHi>; -def VABAL_HIGH : SOpInst<"vabal_high", "wwkk", "csiUcUsUi", OP_ABALHi>; +def VABDL_HIGH : SOpInst<"vabdl_high", "(>Q)QQ", "csiUcUsUi", OP_ABDLHi>; +def VABAL_HIGH : SOpInst<"vabal_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_ABALHi>; -def VMULL_HIGH : SOpInst<"vmull_high", "wkk", "csiUcUsUiPc", OP_MULLHi>; -def VMULL_HIGH_N : SOpInst<"vmull_high_n", "wks", "siUsUi", OP_MULLHi_N>; -def VMLAL_HIGH : SOpInst<"vmlal_high", "wwkk", "csiUcUsUi", OP_MLALHi>; -def VMLAL_HIGH_N : SOpInst<"vmlal_high_n", "wwks", "siUsUi", OP_MLALHi_N>; -def VMLSL_HIGH : SOpInst<"vmlsl_high", "wwkk", "csiUcUsUi", OP_MLSLHi>; -def VMLSL_HIGH_N : SOpInst<"vmlsl_high_n", "wwks", "siUsUi", OP_MLSLHi_N>; +def VMULL_HIGH : SOpInst<"vmull_high", "(>Q)QQ", "csiUcUsUiPc", OP_MULLHi>; +def VMULL_HIGH_N : SOpInst<"vmull_high_n", "(>Q)Q1", "siUsUi", OP_MULLHi_N>; +def VMLAL_HIGH : SOpInst<"vmlal_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_MLALHi>; +def VMLAL_HIGH_N : SOpInst<"vmlal_high_n", "(>Q)(>Q)Q1", "siUsUi", OP_MLALHi_N>; +def VMLSL_HIGH : SOpInst<"vmlsl_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_MLSLHi>; +def VMLSL_HIGH_N : SOpInst<"vmlsl_high_n", "(>Q)(>Q)Q1", "siUsUi", OP_MLSLHi_N>; -def VADDHN_HIGH : SOpInst<"vaddhn_high", "qhkk", "silUsUiUl", OP_ADDHNHi>; -def VRADDHN_HIGH : SOpInst<"vraddhn_high", "qhkk", "silUsUiUl", OP_RADDHNHi>; -def VSUBHN_HIGH : SOpInst<"vsubhn_high", "qhkk", "silUsUiUl", OP_SUBHNHi>; -def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "qhkk", "silUsUiUl", OP_RSUBHNHi>; +def VADDHN_HIGH : SOpInst<"vaddhn_high", "(; +def VRADDHN_HIGH : SOpInst<"vraddhn_high", "(; +def VSUBHN_HIGH : SOpInst<"vsubhn_high", "(; +def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "(; -def VQDMULL_HIGH : SOpInst<"vqdmull_high", "wkk", "si", OP_QDMULLHi>; -def VQDMULL_HIGH_N : SOpInst<"vqdmull_high_n", "wks", "si", OP_QDMULLHi_N>; -def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "wwkk", "si", OP_QDMLALHi>; -def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "wwks", "si", OP_QDMLALHi_N>; -def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>; -def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "wwks", "si", OP_QDMLSLHi_N>; -def VMULL_P64 : SInst<"vmull", "rss", "Pl">; -def VMULL_HIGH_P64 : SOpInst<"vmull_high", "rdd", "HPl", OP_MULLHi_P64>; +def VQDMULL_HIGH : SOpInst<"vqdmull_high", "(>Q)QQ", "si", OP_QDMULLHi>; +def VQDMULL_HIGH_N : SOpInst<"vqdmull_high_n", "(>Q)Q1", "si", OP_QDMULLHi_N>; +def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "(>Q)(>Q)QQ", "si", OP_QDMLALHi>; +def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "(>Q)(>Q)Q1", "si", OP_QDMLALHi_N>; +def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "(>Q)(>Q)QQ", "si", OP_QDMLSLHi>; +def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "(>Q)(>Q)Q1", "si", OP_QDMLSLHi_N>; +def VMULL_P64 : SInst<"vmull", "(1>)11", "Pl">; +def VMULL_HIGH_P64 : SOpInst<"vmull_high", "(1>)..", "HPl", OP_MULLHi_P64>; //////////////////////////////////////////////////////////////////////////////// // Extract or insert element from vector -def GET_LANE : IInst<"vget_lane", "sdi", "dQdPlQPl">; -def SET_LANE : IInst<"vset_lane", "dsdi", "dQdPlQPl">; -def COPY_LANE : IOpInst<"vcopy_lane", "ddidi", +def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl">; +def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl">; +def COPY_LANE : IOpInst<"vcopy_lane", "..I.I", "csilUcUsUiUlPcPsPlfd", OP_COPY_LN>; -def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi", +def COPYQ_LANE : IOpInst<"vcopy_lane", "..IqI", "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>; -def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki", +def COPY_LANEQ : IOpInst<"vcopy_laneq", "..IQI", "csilPcPsPlUcUsUiUlfd", OP_COPY_LN>; -def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "ddidi", +def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "..I.I", "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>; //////////////////////////////////////////////////////////////////////////////// // Set all lanes to same value -def VDUP_LANE1: WOpInst<"vdup_lane", "dgi", "hdQhQdPlQPl", OP_DUP_LN>; -def VDUP_LANE2: WOpInst<"vdup_laneq", "dji", +def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "hdQhQdPlQPl", OP_DUP_LN>; +def VDUP_LANE2: WOpInst<"vdup_laneq", ".QI", "csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl", OP_DUP_LN>; -def DUP_N : WOpInst<"vdup_n", "ds", "dQdPlQPl", OP_DUP>; -def MOV_N : WOpInst<"vmov_n", "ds", "dQdPlQPl", OP_DUP>; +def DUP_N : WOpInst<"vdup_n", ".1", "dQdPlQPl", OP_DUP>; +def MOV_N : WOpInst<"vmov_n", ".1", "dQdPlQPl", OP_DUP>; //////////////////////////////////////////////////////////////////////////////// -def COMBINE : NoTestOpInst<"vcombine", "kdd", "dPl", OP_CONC>; +def COMBINE : NoTestOpInst<"vcombine", "Q..", "dPl", OP_CONC>; //////////////////////////////////////////////////////////////////////////////// //Initialize a vector from bit pattern -def CREATE : NoTestOpInst<"vcreate", "dl", "dPl", OP_CAST> { +def CREATE : NoTestOpInst<"vcreate", ".(IU>)", "dPl", OP_CAST> { let BigEndianSafe = 1; } //////////////////////////////////////////////////////////////////////////////// -def VMLA_LANEQ : IOpInst<"vmla_laneq", "dddji", +def VMLA_LANEQ : IOpInst<"vmla_laneq", "...QI", "siUsUifQsQiQUsQUiQf", OP_MLA_LN>; -def VMLS_LANEQ : IOpInst<"vmls_laneq", "dddji", +def VMLS_LANEQ : IOpInst<"vmls_laneq", "...QI", "siUsUifQsQiQUsQUiQf", OP_MLS_LN>; -def VFMA_LANE : IInst<"vfma_lane", "dddgi", "fdQfQd">; -def VFMA_LANEQ : IInst<"vfma_laneq", "dddji", "fdQfQd"> { +def VFMA_LANE : IInst<"vfma_lane", "...qI", "fdQfQd">; +def VFMA_LANEQ : IInst<"vfma_laneq", "...QI", "fdQfQd"> { let isLaneQ = 1; } -def VFMS_LANE : IOpInst<"vfms_lane", "dddgi", "fdQfQd", OP_FMS_LN>; -def VFMS_LANEQ : IOpInst<"vfms_laneq", "dddji", "fdQfQd", OP_FMS_LNQ>; +def VFMS_LANE : IOpInst<"vfms_lane", "...qI", "fdQfQd", OP_FMS_LN>; +def VFMS_LANEQ : IOpInst<"vfms_laneq", "...QI", "fdQfQd", OP_FMS_LNQ>; -def VMLAL_LANEQ : SOpInst<"vmlal_laneq", "wwdki", "siUsUi", OP_MLAL_LN>; -def VMLAL_HIGH_LANE : SOpInst<"vmlal_high_lane", "wwkdi", "siUsUi", +def VMLAL_LANEQ : SOpInst<"vmlal_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLAL_LN>; +def VMLAL_HIGH_LANE : SOpInst<"vmlal_high_lane", "(>Q)(>Q)Q.I", "siUsUi", OP_MLALHi_LN>; -def VMLAL_HIGH_LANEQ : SOpInst<"vmlal_high_laneq", "wwkki", "siUsUi", +def VMLAL_HIGH_LANEQ : SOpInst<"vmlal_high_laneq", "(>Q)(>Q)QQI", "siUsUi", OP_MLALHi_LN>; -def VMLSL_LANEQ : SOpInst<"vmlsl_laneq", "wwdki", "siUsUi", OP_MLSL_LN>; -def VMLSL_HIGH_LANE : SOpInst<"vmlsl_high_lane", "wwkdi", "siUsUi", +def VMLSL_LANEQ : SOpInst<"vmlsl_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLSL_LN>; +def VMLSL_HIGH_LANE : SOpInst<"vmlsl_high_lane", "(>Q)(>Q)Q.I", "siUsUi", OP_MLSLHi_LN>; -def VMLSL_HIGH_LANEQ : SOpInst<"vmlsl_high_laneq", "wwkki", "siUsUi", +def VMLSL_HIGH_LANEQ : SOpInst<"vmlsl_high_laneq", "(>Q)(>Q)QQI", "siUsUi", OP_MLSLHi_LN>; -def VQDMLAL_LANEQ : SOpInst<"vqdmlal_laneq", "wwdki", "si", OP_QDMLAL_LN>; -def VQDMLAL_HIGH_LANE : SOpInst<"vqdmlal_high_lane", "wwkdi", "si", +def VQDMLAL_LANEQ : SOpInst<"vqdmlal_laneq", "(>Q)(>Q).QI", "si", OP_QDMLAL_LN>; +def VQDMLAL_HIGH_LANE : SOpInst<"vqdmlal_high_lane", "(>Q)(>Q)Q.I", "si", OP_QDMLALHi_LN>; -def VQDMLAL_HIGH_LANEQ : SOpInst<"vqdmlal_high_laneq", "wwkki", "si", +def VQDMLAL_HIGH_LANEQ : SOpInst<"vqdmlal_high_laneq", "(>Q)(>Q)QQI", "si", OP_QDMLALHi_LN>; -def VQDMLSL_LANEQ : SOpInst<"vqdmlsl_laneq", "wwdki", "si", OP_QDMLSL_LN>; -def VQDMLSL_HIGH_LANE : SOpInst<"vqdmlsl_high_lane", "wwkdi", "si", +def VQDMLSL_LANEQ : SOpInst<"vqdmlsl_laneq", "(>Q)(>Q).QI", "si", OP_QDMLSL_LN>; +def VQDMLSL_HIGH_LANE : SOpInst<"vqdmlsl_high_lane", "(>Q)(>Q)Q.I", "si", OP_QDMLSLHi_LN>; -def VQDMLSL_HIGH_LANEQ : SOpInst<"vqdmlsl_high_laneq", "wwkki", "si", +def VQDMLSL_HIGH_LANEQ : SOpInst<"vqdmlsl_high_laneq", "(>Q)(>Q)QQI", "si", OP_QDMLSLHi_LN>; // Newly add double parameter for vmul_lane in aarch64 // Note: d type is handled by SCALAR_VMUL_LANE -def VMUL_LANE_A64 : IOpInst<"vmul_lane", "ddgi", "Qd", OP_MUL_LN>; +def VMUL_LANE_A64 : IOpInst<"vmul_lane", "..qI", "Qd", OP_MUL_LN>; // Note: d type is handled by SCALAR_VMUL_LANEQ -def VMUL_LANEQ : IOpInst<"vmul_laneq", "ddji", +def VMUL_LANEQ : IOpInst<"vmul_laneq", "..QI", "sifUsUiQsQiQUsQUiQfQd", OP_MUL_LN>; -def VMULL_LANEQ : SOpInst<"vmull_laneq", "wdki", "siUsUi", OP_MULL_LN>; -def VMULL_HIGH_LANE : SOpInst<"vmull_high_lane", "wkdi", "siUsUi", +def VMULL_LANEQ : SOpInst<"vmull_laneq", "(>Q).QI", "siUsUi", OP_MULL_LN>; +def VMULL_HIGH_LANE : SOpInst<"vmull_high_lane", "(>Q)Q.I", "siUsUi", OP_MULLHi_LN>; -def VMULL_HIGH_LANEQ : SOpInst<"vmull_high_laneq", "wkki", "siUsUi", +def VMULL_HIGH_LANEQ : SOpInst<"vmull_high_laneq", "(>Q)QQI", "siUsUi", OP_MULLHi_LN>; -def VQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "wdki", "si", OP_QDMULL_LN>; -def VQDMULL_HIGH_LANE : SOpInst<"vqdmull_high_lane", "wkdi", "si", +def VQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(>Q).QI", "si", OP_QDMULL_LN>; +def VQDMULL_HIGH_LANE : SOpInst<"vqdmull_high_lane", "(>Q)Q.I", "si", OP_QDMULLHi_LN>; -def VQDMULL_HIGH_LANEQ : SOpInst<"vqdmull_high_laneq", "wkki", "si", +def VQDMULL_HIGH_LANEQ : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si", OP_QDMULLHi_LN>; -def VQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ddji", "siQsQi", OP_QDMULH_LN>; -def VQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ddji", "siQsQi", OP_QRDMULH_LN>; +def VQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "..QI", "siQsQi", OP_QDMULH_LN>; +def VQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "..QI", "siQsQi", OP_QRDMULH_LN>; let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { -def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "dddji", "siQsQi", OP_QRDMLAH_LN>; -def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "dddji", "siQsQi", OP_QRDMLSH_LN>; +def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN>; +def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN>; } // Note: d type implemented by SCALAR_VMULX_LANE -def VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "fQfQd", OP_MULX_LN>; +def VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "fQfQd", OP_MULX_LN>; // Note: d type is implemented by SCALAR_VMULX_LANEQ -def VMULX_LANEQ : IOpInst<"vmulx_laneq", "ddji", "fQfQd", OP_MULX_LN>; +def VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "fQfQd", OP_MULX_LN>; //////////////////////////////////////////////////////////////////////////////// // Across vectors class -def VADDLV : SInst<"vaddlv", "rd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VMAXV : SInst<"vmaxv", "sd", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; -def VMINV : SInst<"vminv", "sd", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; -def VADDV : SInst<"vaddv", "sd", "csifUcUsUiQcQsQiQUcQUsQUiQfQdQlQUl">; -def FMAXNMV : SInst<"vmaxnmv", "sd", "fQfQd">; -def FMINNMV : SInst<"vminnmv", "sd", "fQfQd">; +def VADDLV : SInst<"vaddlv", "(1>).", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VMAXV : SInst<"vmaxv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; +def VMINV : SInst<"vminv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; +def VADDV : SInst<"vaddv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQdQlQUl">; +def FMAXNMV : SInst<"vmaxnmv", "1.", "fQfQd">; +def FMINNMV : SInst<"vminnmv", "1.", "fQfQd">; //////////////////////////////////////////////////////////////////////////////// // Newly added Vector Extract for f64 -def VEXT_A64 : WInst<"vext", "dddi", "dQdPlQPl">; +def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl">; //////////////////////////////////////////////////////////////////////////////// // Crypto let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO)" in { -def AESE : SInst<"vaese", "ddd", "QUc">; -def AESD : SInst<"vaesd", "ddd", "QUc">; -def AESMC : SInst<"vaesmc", "dd", "QUc">; -def AESIMC : SInst<"vaesimc", "dd", "QUc">; +def AESE : SInst<"vaese", "...", "QUc">; +def AESD : SInst<"vaesd", "...", "QUc">; +def AESMC : SInst<"vaesmc", "..", "QUc">; +def AESIMC : SInst<"vaesimc", "..", "QUc">; -def SHA1H : SInst<"vsha1h", "ss", "Ui">; -def SHA1SU1 : SInst<"vsha1su1", "ddd", "QUi">; -def SHA256SU0 : SInst<"vsha256su0", "ddd", "QUi">; +def SHA1H : SInst<"vsha1h", "11", "Ui">; +def SHA1SU1 : SInst<"vsha1su1", "...", "QUi">; +def SHA256SU0 : SInst<"vsha256su0", "...", "QUi">; -def SHA1C : SInst<"vsha1c", "ddsd", "QUi">; -def SHA1P : SInst<"vsha1p", "ddsd", "QUi">; -def SHA1M : SInst<"vsha1m", "ddsd", "QUi">; -def SHA1SU0 : SInst<"vsha1su0", "dddd", "QUi">; -def SHA256H : SInst<"vsha256h", "dddd", "QUi">; -def SHA256H2 : SInst<"vsha256h2", "dddd", "QUi">; -def SHA256SU1 : SInst<"vsha256su1", "dddd", "QUi">; +def SHA1C : SInst<"vsha1c", "..1.", "QUi">; +def SHA1P : SInst<"vsha1p", "..1.", "QUi">; +def SHA1M : SInst<"vsha1m", "..1.", "QUi">; +def SHA1SU0 : SInst<"vsha1su0", "....", "QUi">; +def SHA256H : SInst<"vsha256h", "....", "QUi">; +def SHA256H2 : SInst<"vsha256h2", "....", "QUi">; +def SHA256SU1 : SInst<"vsha256su1", "....", "QUi">; } //////////////////////////////////////////////////////////////////////////////// // Float -> Int conversions with explicit rounding mode let ArchGuard = "__ARM_ARCH >= 8" in { -def FCVTNS_S32 : SInst<"vcvtn_s32", "xd", "fQf">; -def FCVTNU_S32 : SInst<"vcvtn_u32", "ud", "fQf">; -def FCVTPS_S32 : SInst<"vcvtp_s32", "xd", "fQf">; -def FCVTPU_S32 : SInst<"vcvtp_u32", "ud", "fQf">; -def FCVTMS_S32 : SInst<"vcvtm_s32", "xd", "fQf">; -def FCVTMU_S32 : SInst<"vcvtm_u32", "ud", "fQf">; -def FCVTAS_S32 : SInst<"vcvta_s32", "xd", "fQf">; -def FCVTAU_S32 : SInst<"vcvta_u32", "ud", "fQf">; +def FCVTNS_S32 : SInst<"vcvtn_s32", "S.", "fQf">; +def FCVTNU_S32 : SInst<"vcvtn_u32", "U.", "fQf">; +def FCVTPS_S32 : SInst<"vcvtp_s32", "S.", "fQf">; +def FCVTPU_S32 : SInst<"vcvtp_u32", "U.", "fQf">; +def FCVTMS_S32 : SInst<"vcvtm_s32", "S.", "fQf">; +def FCVTMU_S32 : SInst<"vcvtm_u32", "U.", "fQf">; +def FCVTAS_S32 : SInst<"vcvta_s32", "S.", "fQf">; +def FCVTAU_S32 : SInst<"vcvta_u32", "U.", "fQf">; } let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)" in { -def FCVTNS_S64 : SInst<"vcvtn_s64", "xd", "dQd">; -def FCVTNU_S64 : SInst<"vcvtn_u64", "ud", "dQd">; -def FCVTPS_S64 : SInst<"vcvtp_s64", "xd", "dQd">; -def FCVTPU_S64 : SInst<"vcvtp_u64", "ud", "dQd">; -def FCVTMS_S64 : SInst<"vcvtm_s64", "xd", "dQd">; -def FCVTMU_S64 : SInst<"vcvtm_u64", "ud", "dQd">; -def FCVTAS_S64 : SInst<"vcvta_s64", "xd", "dQd">; -def FCVTAU_S64 : SInst<"vcvta_u64", "ud", "dQd">; +def FCVTNS_S64 : SInst<"vcvtn_s64", "S.", "dQd">; +def FCVTNU_S64 : SInst<"vcvtn_u64", "U.", "dQd">; +def FCVTPS_S64 : SInst<"vcvtp_s64", "S.", "dQd">; +def FCVTPU_S64 : SInst<"vcvtp_u64", "U.", "dQd">; +def FCVTMS_S64 : SInst<"vcvtm_s64", "S.", "dQd">; +def FCVTMU_S64 : SInst<"vcvtm_u64", "U.", "dQd">; +def FCVTAS_S64 : SInst<"vcvta_s64", "S.", "dQd">; +def FCVTAU_S64 : SInst<"vcvta_u64", "U.", "dQd">; } //////////////////////////////////////////////////////////////////////////////// // Round to Integral let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { -def FRINTN_S32 : SInst<"vrndn", "dd", "fQf">; -def FRINTA_S32 : SInst<"vrnda", "dd", "fQf">; -def FRINTP_S32 : SInst<"vrndp", "dd", "fQf">; -def FRINTM_S32 : SInst<"vrndm", "dd", "fQf">; -def FRINTX_S32 : SInst<"vrndx", "dd", "fQf">; -def FRINTZ_S32 : SInst<"vrnd", "dd", "fQf">; -def FRINTI_S32 : SInst<"vrndi", "dd", "fQf">; +def FRINTN_S32 : SInst<"vrndn", "..", "fQf">; +def FRINTA_S32 : SInst<"vrnda", "..", "fQf">; +def FRINTP_S32 : SInst<"vrndp", "..", "fQf">; +def FRINTM_S32 : SInst<"vrndm", "..", "fQf">; +def FRINTX_S32 : SInst<"vrndx", "..", "fQf">; +def FRINTZ_S32 : SInst<"vrnd", "..", "fQf">; +def FRINTI_S32 : SInst<"vrndi", "..", "fQf">; } let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { -def FRINTN_S64 : SInst<"vrndn", "dd", "dQd">; -def FRINTA_S64 : SInst<"vrnda", "dd", "dQd">; -def FRINTP_S64 : SInst<"vrndp", "dd", "dQd">; -def FRINTM_S64 : SInst<"vrndm", "dd", "dQd">; -def FRINTX_S64 : SInst<"vrndx", "dd", "dQd">; -def FRINTZ_S64 : SInst<"vrnd", "dd", "dQd">; -def FRINTI_S64 : SInst<"vrndi", "dd", "dQd">; +def FRINTN_S64 : SInst<"vrndn", "..", "dQd">; +def FRINTA_S64 : SInst<"vrnda", "..", "dQd">; +def FRINTP_S64 : SInst<"vrndp", "..", "dQd">; +def FRINTM_S64 : SInst<"vrndm", "..", "dQd">; +def FRINTX_S64 : SInst<"vrndx", "..", "dQd">; +def FRINTZ_S64 : SInst<"vrnd", "..", "dQd">; +def FRINTI_S64 : SInst<"vrndi", "..", "dQd">; } //////////////////////////////////////////////////////////////////////////////// // MaxNum/MinNum Floating Point let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in { -def FMAXNM_S32 : SInst<"vmaxnm", "ddd", "fQf">; -def FMINNM_S32 : SInst<"vminnm", "ddd", "fQf">; +def FMAXNM_S32 : SInst<"vmaxnm", "...", "fQf">; +def FMINNM_S32 : SInst<"vminnm", "...", "fQf">; } let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in { -def FMAXNM_S64 : SInst<"vmaxnm", "ddd", "dQd">; -def FMINNM_S64 : SInst<"vminnm", "ddd", "dQd">; +def FMAXNM_S64 : SInst<"vmaxnm", "...", "dQd">; +def FMINNM_S64 : SInst<"vminnm", "...", "dQd">; } //////////////////////////////////////////////////////////////////////////////// // Permutation -def VTRN1 : SOpInst<"vtrn1", "ddd", +def VTRN1 : SOpInst<"vtrn1", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN1>; -def VZIP1 : SOpInst<"vzip1", "ddd", +def VZIP1 : SOpInst<"vzip1", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP1>; -def VUZP1 : SOpInst<"vuzp1", "ddd", +def VUZP1 : SOpInst<"vuzp1", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP1>; -def VTRN2 : SOpInst<"vtrn2", "ddd", +def VTRN2 : SOpInst<"vtrn2", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN2>; -def VZIP2 : SOpInst<"vzip2", "ddd", +def VZIP2 : SOpInst<"vzip2", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP2>; -def VUZP2 : SOpInst<"vuzp2", "ddd", +def VUZP2 : SOpInst<"vuzp2", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP2>; //////////////////////////////////////////////////////////////////////////////// // Table lookup let InstName = "vtbl" in { -def VQTBL1_A64 : WInst<"vqtbl1", "dju", "UccPcQUcQcQPc">; -def VQTBL2_A64 : WInst<"vqtbl2", "dBu", "UccPcQUcQcQPc">; -def VQTBL3_A64 : WInst<"vqtbl3", "dCu", "UccPcQUcQcQPc">; -def VQTBL4_A64 : WInst<"vqtbl4", "dDu", "UccPcQUcQcQPc">; +def VQTBL1_A64 : WInst<"vqtbl1", ".QU", "UccPcQUcQcQPc">; +def VQTBL2_A64 : WInst<"vqtbl2", ".(2Q)U", "UccPcQUcQcQPc">; +def VQTBL3_A64 : WInst<"vqtbl3", ".(3Q)U", "UccPcQUcQcQPc">; +def VQTBL4_A64 : WInst<"vqtbl4", ".(4Q)U", "UccPcQUcQcQPc">; } let InstName = "vtbx" in { -def VQTBX1_A64 : WInst<"vqtbx1", "ddju", "UccPcQUcQcQPc">; -def VQTBX2_A64 : WInst<"vqtbx2", "ddBu", "UccPcQUcQcQPc">; -def VQTBX3_A64 : WInst<"vqtbx3", "ddCu", "UccPcQUcQcQPc">; -def VQTBX4_A64 : WInst<"vqtbx4", "ddDu", "UccPcQUcQcQPc">; +def VQTBX1_A64 : WInst<"vqtbx1", "..QU", "UccPcQUcQcQPc">; +def VQTBX2_A64 : WInst<"vqtbx2", "..(2Q)U", "UccPcQUcQcQPc">; +def VQTBX3_A64 : WInst<"vqtbx3", "..(3Q)U", "UccPcQUcQcQPc">; +def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPc">; } //////////////////////////////////////////////////////////////////////////////// @@ -1095,7 +1095,7 @@ def VQTBX4_A64 : WInst<"vqtbx4", "ddDu", "UccPcQUcQcQPc">; // itself during generation so, unlike all other intrinsics, this one should // include *all* types, not just additional ones. def VVREINTERPRET - : NoTestOpInst<"vreinterpret", "dd", + : NoTestOpInst<"vreinterpret", "..", "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", OP_REINT> { let CartesianProductOfTypes = 1; let BigEndianSafe = 1; @@ -1107,332 +1107,332 @@ def VVREINTERPRET // Scalar Arithmetic // Scalar Addition -def SCALAR_ADD : SInst<"vadd", "sss", "SlSUl">; +def SCALAR_ADD : SInst<"vadd", "111", "SlSUl">; // Scalar Saturating Add -def SCALAR_QADD : SInst<"vqadd", "sss", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_QADD : SInst<"vqadd", "111", "ScSsSiSlSUcSUsSUiSUl">; // Scalar Subtraction -def SCALAR_SUB : SInst<"vsub", "sss", "SlSUl">; +def SCALAR_SUB : SInst<"vsub", "111", "SlSUl">; // Scalar Saturating Sub -def SCALAR_QSUB : SInst<"vqsub", "sss", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_QSUB : SInst<"vqsub", "111", "ScSsSiSlSUcSUsSUiSUl">; let InstName = "vmov" in { -def VGET_HIGH_A64 : NoTestOpInst<"vget_high", "dk", "dPl", OP_HI>; -def VGET_LOW_A64 : NoTestOpInst<"vget_low", "dk", "dPl", OP_LO>; +def VGET_HIGH_A64 : NoTestOpInst<"vget_high", ".Q", "dPl", OP_HI>; +def VGET_LOW_A64 : NoTestOpInst<"vget_low", ".Q", "dPl", OP_LO>; } //////////////////////////////////////////////////////////////////////////////// // Scalar Shift // Scalar Shift Left -def SCALAR_SHL: SInst<"vshl", "sss", "SlSUl">; +def SCALAR_SHL: SInst<"vshl", "111", "SlSUl">; // Scalar Saturating Shift Left -def SCALAR_QSHL: SInst<"vqshl", "sss", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_QSHL: SInst<"vqshl", "111", "ScSsSiSlSUcSUsSUiSUl">; // Scalar Saturating Rounding Shift Left -def SCALAR_QRSHL: SInst<"vqrshl", "sss", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_QRSHL: SInst<"vqrshl", "111", "ScSsSiSlSUcSUsSUiSUl">; // Scalar Shift Rounding Left -def SCALAR_RSHL: SInst<"vrshl", "sss", "SlSUl">; +def SCALAR_RSHL: SInst<"vrshl", "111", "SlSUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Shift (Immediate) let isScalarShift = 1 in { // Signed/Unsigned Shift Right (Immediate) -def SCALAR_SSHR_N: SInst<"vshr_n", "ssi", "SlSUl">; +def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl">; // Signed/Unsigned Rounding Shift Right (Immediate) -def SCALAR_SRSHR_N: SInst<"vrshr_n", "ssi", "SlSUl">; +def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl">; // Signed/Unsigned Shift Right and Accumulate (Immediate) -def SCALAR_SSRA_N: SInst<"vsra_n", "sssi", "SlSUl">; +def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl">; // Signed/Unsigned Rounding Shift Right and Accumulate (Immediate) -def SCALAR_SRSRA_N: SInst<"vrsra_n", "sssi", "SlSUl">; +def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl">; // Shift Left (Immediate) -def SCALAR_SHL_N: SInst<"vshl_n", "ssi", "SlSUl">; +def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl">; // Signed/Unsigned Saturating Shift Left (Immediate) -def SCALAR_SQSHL_N: SInst<"vqshl_n", "ssi", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl">; // Signed Saturating Shift Left Unsigned (Immediate) -def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "ssi", "ScSsSiSl">; +def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl">; // Shift Right And Insert (Immediate) -def SCALAR_SRI_N: SInst<"vsri_n", "sssi", "SlSUl">; +def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl">; // Shift Left And Insert (Immediate) -def SCALAR_SLI_N: SInst<"vsli_n", "sssi", "SlSUl">; +def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl">; let isScalarNarrowShift = 1 in { // Signed/Unsigned Saturating Shift Right Narrow (Immediate) - def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "zsi", "SsSiSlSUsSUiSUl">; + def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">; // Signed/Unsigned Saturating Rounded Shift Right Narrow (Immediate) - def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "zsi", "SsSiSlSUsSUiSUl">; + def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">; // Signed Saturating Shift Right Unsigned Narrow (Immediate) - def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "zsi", "SsSiSl">; + def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<)1I", "SsSiSl">; // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate) - def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "zsi", "SsSiSl">; + def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<)1I", "SsSiSl">; } //////////////////////////////////////////////////////////////////////////////// // Scalar Signed/Unsigned Fixed-point Convert To Floating-Point (Immediate) -def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "ysi", "SiSUi">; -def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "osi", "SlSUl">; +def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi">; +def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Convert To Signed/Unsigned Fixed-point (Immediate) -def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "$si", "Sf">; -def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "bsi", "Sf">; -def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "$si", "Sd">; -def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "bsi", "Sd">; +def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf">; +def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf">; +def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd">; +def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd">; } //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Round to Integral let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { -def SCALAR_FRINTN_S32 : SInst<"vrndn", "ss", "Sf">; +def SCALAR_FRINTN_S32 : SInst<"vrndn", "11", "Sf">; } //////////////////////////////////////////////////////////////////////////////// // Scalar Reduce Pairwise Addition (Scalar and Floating Point) -def SCALAR_ADDP : SInst<"vpadd", "sd", "SfSHlSHdSHUl">; +def SCALAR_ADDP : SInst<"vpadd", "1.", "SfSHlSHdSHUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Reduce Floating Point Pairwise Max/Min -def SCALAR_FMAXP : SInst<"vpmax", "sd", "SfSQd">; +def SCALAR_FMAXP : SInst<"vpmax", "1.", "SfSQd">; -def SCALAR_FMINP : SInst<"vpmin", "sd", "SfSQd">; +def SCALAR_FMINP : SInst<"vpmin", "1.", "SfSQd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Reduce Floating Point Pairwise maxNum/minNum -def SCALAR_FMAXNMP : SInst<"vpmaxnm", "sd", "SfSQd">; -def SCALAR_FMINNMP : SInst<"vpminnm", "sd", "SfSQd">; +def SCALAR_FMAXNMP : SInst<"vpmaxnm", "1.", "SfSQd">; +def SCALAR_FMINNMP : SInst<"vpminnm", "1.", "SfSQd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Integer Saturating Doubling Multiply Half High -def SCALAR_SQDMULH : SInst<"vqdmulh", "sss", "SsSi">; +def SCALAR_SQDMULH : SInst<"vqdmulh", "111", "SsSi">; //////////////////////////////////////////////////////////////////////////////// // Scalar Integer Saturating Rounding Doubling Multiply Half High -def SCALAR_SQRDMULH : SInst<"vqrdmulh", "sss", "SsSi">; +def SCALAR_SQRDMULH : SInst<"vqrdmulh", "111", "SsSi">; let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half -def SCALAR_SQRDMLAH : SOpInst<"vqrdmlah", "ssss", "SsSi", OP_QRDMLAH>; +def SCALAR_SQRDMLAH : SOpInst<"vqrdmlah", "1111", "SsSi", OP_QRDMLAH>; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half -def SCALAR_SQRDMLSH : SOpInst<"vqrdmlsh", "ssss", "SsSi", OP_QRDMLSH>; +def SCALAR_SQRDMLSH : SOpInst<"vqrdmlsh", "1111", "SsSi", OP_QRDMLSH>; } //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Multiply Extended -def SCALAR_FMULX : IInst<"vmulx", "sss", "SfSd">; +def SCALAR_FMULX : IInst<"vmulx", "111", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Step -def SCALAR_FRECPS : IInst<"vrecps", "sss", "SfSd">; +def SCALAR_FRECPS : IInst<"vrecps", "111", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Square Root Step -def SCALAR_FRSQRTS : IInst<"vrsqrts", "sss", "SfSd">; +def SCALAR_FRSQRTS : IInst<"vrsqrts", "111", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Integer Convert To Floating-point -def SCALAR_SCVTFS : SInst<"vcvt_f32", "ys", "Si">; -def SCALAR_SCVTFD : SInst<"vcvt_f64", "os", "Sl">; +def SCALAR_SCVTFS : SInst<"vcvt_f32", "(1F)(1!)", "Si">; +def SCALAR_SCVTFD : SInst<"vcvt_f64", "(1F)(1!)", "Sl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Unsigned Integer Convert To Floating-point -def SCALAR_UCVTFS : SInst<"vcvt_f32", "ys", "SUi">; -def SCALAR_UCVTFD : SInst<"vcvt_f64", "os", "SUl">; +def SCALAR_UCVTFS : SInst<"vcvt_f32", "(1F)(1!)", "SUi">; +def SCALAR_UCVTFD : SInst<"vcvt_f64", "(1F)(1!)", "SUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Converts -def SCALAR_FCVTXN : IInst<"vcvtx_f32", "ys", "Sd">; -def SCALAR_FCVTNSS : SInst<"vcvtn_s32", "$s", "Sf">; -def SCALAR_FCVTNUS : SInst<"vcvtn_u32", "bs", "Sf">; -def SCALAR_FCVTNSD : SInst<"vcvtn_s64", "$s", "Sd">; -def SCALAR_FCVTNUD : SInst<"vcvtn_u64", "bs", "Sd">; -def SCALAR_FCVTMSS : SInst<"vcvtm_s32", "$s", "Sf">; -def SCALAR_FCVTMUS : SInst<"vcvtm_u32", "bs", "Sf">; -def SCALAR_FCVTMSD : SInst<"vcvtm_s64", "$s", "Sd">; -def SCALAR_FCVTMUD : SInst<"vcvtm_u64", "bs", "Sd">; -def SCALAR_FCVTASS : SInst<"vcvta_s32", "$s", "Sf">; -def SCALAR_FCVTAUS : SInst<"vcvta_u32", "bs", "Sf">; -def SCALAR_FCVTASD : SInst<"vcvta_s64", "$s", "Sd">; -def SCALAR_FCVTAUD : SInst<"vcvta_u64", "bs", "Sd">; -def SCALAR_FCVTPSS : SInst<"vcvtp_s32", "$s", "Sf">; -def SCALAR_FCVTPUS : SInst<"vcvtp_u32", "bs", "Sf">; -def SCALAR_FCVTPSD : SInst<"vcvtp_s64", "$s", "Sd">; -def SCALAR_FCVTPUD : SInst<"vcvtp_u64", "bs", "Sd">; -def SCALAR_FCVTZSS : SInst<"vcvt_s32", "$s", "Sf">; -def SCALAR_FCVTZUS : SInst<"vcvt_u32", "bs", "Sf">; -def SCALAR_FCVTZSD : SInst<"vcvt_s64", "$s", "Sd">; -def SCALAR_FCVTZUD : SInst<"vcvt_u64", "bs", "Sd">; +def SCALAR_FCVTXN : IInst<"vcvtx_f32", "(1F<)(1!)", "Sd">; +def SCALAR_FCVTNSS : SInst<"vcvtn_s32", "(1S)1", "Sf">; +def SCALAR_FCVTNUS : SInst<"vcvtn_u32", "(1U)1", "Sf">; +def SCALAR_FCVTNSD : SInst<"vcvtn_s64", "(1S)1", "Sd">; +def SCALAR_FCVTNUD : SInst<"vcvtn_u64", "(1U)1", "Sd">; +def SCALAR_FCVTMSS : SInst<"vcvtm_s32", "(1S)1", "Sf">; +def SCALAR_FCVTMUS : SInst<"vcvtm_u32", "(1U)1", "Sf">; +def SCALAR_FCVTMSD : SInst<"vcvtm_s64", "(1S)1", "Sd">; +def SCALAR_FCVTMUD : SInst<"vcvtm_u64", "(1U)1", "Sd">; +def SCALAR_FCVTASS : SInst<"vcvta_s32", "(1S)1", "Sf">; +def SCALAR_FCVTAUS : SInst<"vcvta_u32", "(1U)1", "Sf">; +def SCALAR_FCVTASD : SInst<"vcvta_s64", "(1S)1", "Sd">; +def SCALAR_FCVTAUD : SInst<"vcvta_u64", "(1U)1", "Sd">; +def SCALAR_FCVTPSS : SInst<"vcvtp_s32", "(1S)1", "Sf">; +def SCALAR_FCVTPUS : SInst<"vcvtp_u32", "(1U)1", "Sf">; +def SCALAR_FCVTPSD : SInst<"vcvtp_s64", "(1S)1", "Sd">; +def SCALAR_FCVTPUD : SInst<"vcvtp_u64", "(1U)1", "Sd">; +def SCALAR_FCVTZSS : SInst<"vcvt_s32", "(1S)1", "Sf">; +def SCALAR_FCVTZUS : SInst<"vcvt_u32", "(1U)1", "Sf">; +def SCALAR_FCVTZSD : SInst<"vcvt_s64", "(1S)1", "Sd">; +def SCALAR_FCVTZUD : SInst<"vcvt_u64", "(1U)1", "Sd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Estimate -def SCALAR_FRECPE : IInst<"vrecpe", "ss", "SfSd">; +def SCALAR_FRECPE : IInst<"vrecpe", "11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Exponent -def SCALAR_FRECPX : IInst<"vrecpx", "ss", "SfSd">; +def SCALAR_FRECPX : IInst<"vrecpx", "11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Square Root Estimate -def SCALAR_FRSQRTE : IInst<"vrsqrte", "ss", "SfSd">; +def SCALAR_FRSQRTE : IInst<"vrsqrte", "11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Integer Comparison -def SCALAR_CMEQ : SInst<"vceq", "sss", "SlSUl">; -def SCALAR_CMEQZ : SInst<"vceqz", "ss", "SlSUl">; -def SCALAR_CMGE : SInst<"vcge", "sss", "Sl">; -def SCALAR_CMGEZ : SInst<"vcgez", "ss", "Sl">; -def SCALAR_CMHS : SInst<"vcge", "sss", "SUl">; -def SCALAR_CMLE : SInst<"vcle", "sss", "SlSUl">; -def SCALAR_CMLEZ : SInst<"vclez", "ss", "Sl">; -def SCALAR_CMLT : SInst<"vclt", "sss", "SlSUl">; -def SCALAR_CMLTZ : SInst<"vcltz", "ss", "Sl">; -def SCALAR_CMGT : SInst<"vcgt", "sss", "Sl">; -def SCALAR_CMGTZ : SInst<"vcgtz", "ss", "Sl">; -def SCALAR_CMHI : SInst<"vcgt", "sss", "SUl">; -def SCALAR_CMTST : SInst<"vtst", "sss", "SlSUl">; +def SCALAR_CMEQ : SInst<"vceq", "111", "SlSUl">; +def SCALAR_CMEQZ : SInst<"vceqz", "11", "SlSUl">; +def SCALAR_CMGE : SInst<"vcge", "111", "Sl">; +def SCALAR_CMGEZ : SInst<"vcgez", "11", "Sl">; +def SCALAR_CMHS : SInst<"vcge", "111", "SUl">; +def SCALAR_CMLE : SInst<"vcle", "111", "SlSUl">; +def SCALAR_CMLEZ : SInst<"vclez", "11", "Sl">; +def SCALAR_CMLT : SInst<"vclt", "111", "SlSUl">; +def SCALAR_CMLTZ : SInst<"vcltz", "11", "Sl">; +def SCALAR_CMGT : SInst<"vcgt", "111", "Sl">; +def SCALAR_CMGTZ : SInst<"vcgtz", "11", "Sl">; +def SCALAR_CMHI : SInst<"vcgt", "111", "SUl">; +def SCALAR_CMTST : SInst<"vtst", "111", "SlSUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Comparison -def SCALAR_FCMEQ : IInst<"vceq", "bss", "SfSd">; -def SCALAR_FCMEQZ : IInst<"vceqz", "bs", "SfSd">; -def SCALAR_FCMGE : IInst<"vcge", "bss", "SfSd">; -def SCALAR_FCMGEZ : IInst<"vcgez", "bs", "SfSd">; -def SCALAR_FCMGT : IInst<"vcgt", "bss", "SfSd">; -def SCALAR_FCMGTZ : IInst<"vcgtz", "bs", "SfSd">; -def SCALAR_FCMLE : IInst<"vcle", "bss", "SfSd">; -def SCALAR_FCMLEZ : IInst<"vclez", "bs", "SfSd">; -def SCALAR_FCMLT : IInst<"vclt", "bss", "SfSd">; -def SCALAR_FCMLTZ : IInst<"vcltz", "bs", "SfSd">; +def SCALAR_FCMEQ : IInst<"vceq", "(1U)11", "SfSd">; +def SCALAR_FCMEQZ : IInst<"vceqz", "(1U)1", "SfSd">; +def SCALAR_FCMGE : IInst<"vcge", "(1U)11", "SfSd">; +def SCALAR_FCMGEZ : IInst<"vcgez", "(1U)1", "SfSd">; +def SCALAR_FCMGT : IInst<"vcgt", "(1U)11", "SfSd">; +def SCALAR_FCMGTZ : IInst<"vcgtz", "(1U)1", "SfSd">; +def SCALAR_FCMLE : IInst<"vcle", "(1U)11", "SfSd">; +def SCALAR_FCMLEZ : IInst<"vclez", "(1U)1", "SfSd">; +def SCALAR_FCMLT : IInst<"vclt", "(1U)11", "SfSd">; +def SCALAR_FCMLTZ : IInst<"vcltz", "(1U)1", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal -def SCALAR_FACGE : IInst<"vcage", "bss", "SfSd">; -def SCALAR_FACLE : IInst<"vcale", "bss", "SfSd">; +def SCALAR_FACGE : IInst<"vcage", "(1U)11", "SfSd">; +def SCALAR_FACLE : IInst<"vcale", "(1U)11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Absolute Compare Mask Greater Than -def SCALAR_FACGT : IInst<"vcagt", "bss", "SfSd">; -def SCALAR_FACLT : IInst<"vcalt", "bss", "SfSd">; +def SCALAR_FACGT : IInst<"vcagt", "(1U)11", "SfSd">; +def SCALAR_FACLT : IInst<"vcalt", "(1U)11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Absolute Value -def SCALAR_ABS : SInst<"vabs", "ss", "Sl">; +def SCALAR_ABS : SInst<"vabs", "11", "Sl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Absolute Difference -def SCALAR_ABD : IInst<"vabd", "sss", "SfSd">; +def SCALAR_ABD : IInst<"vabd", "111", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Absolute Value -def SCALAR_SQABS : SInst<"vqabs", "ss", "ScSsSiSl">; +def SCALAR_SQABS : SInst<"vqabs", "11", "ScSsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Negate -def SCALAR_NEG : SInst<"vneg", "ss", "Sl">; +def SCALAR_NEG : SInst<"vneg", "11", "Sl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Negate -def SCALAR_SQNEG : SInst<"vqneg", "ss", "ScSsSiSl">; +def SCALAR_SQNEG : SInst<"vqneg", "11", "ScSsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Accumulated of Unsigned Value -def SCALAR_SUQADD : SInst<"vuqadd", "ssb", "ScSsSiSl">; +def SCALAR_SUQADD : SInst<"vuqadd", "11(1U)", "ScSsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Unsigned Saturating Accumulated of Signed Value -def SCALAR_USQADD : SInst<"vsqadd", "ss$", "SUcSUsSUiSUl">; +def SCALAR_USQADD : SInst<"vsqadd", "11(1S)", "SUcSUsSUiSUl">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Doubling Multiply-Add Long -def SCALAR_SQDMLAL : SInst<"vqdmlal", "rrss", "SsSi">; +def SCALAR_SQDMLAL : SInst<"vqdmlal", "(1>)(1>)11", "SsSi">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Doubling Multiply-Subtract Long -def SCALAR_SQDMLSL : SInst<"vqdmlsl", "rrss", "SsSi">; +def SCALAR_SQDMLSL : SInst<"vqdmlsl", "(1>)(1>)11", "SsSi">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Doubling Multiply Long -def SCALAR_SQDMULL : SInst<"vqdmull", "rss", "SsSi">; +def SCALAR_SQDMULL : SInst<"vqdmull", "(1>)11", "SsSi">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Extract Unsigned Narrow -def SCALAR_SQXTUN : SInst<"vqmovun", "zs", "SsSiSl">; +def SCALAR_SQXTUN : SInst<"vqmovun", "(1<)1", "SsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Extract Narrow -def SCALAR_SQXTN : SInst<"vqmovn", "zs", "SsSiSl">; +def SCALAR_SQXTN : SInst<"vqmovn", "(1<)1", "SsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Unsigned Saturating Extract Narrow -def SCALAR_UQXTN : SInst<"vqmovn", "zs", "SUsSUiSUl">; +def SCALAR_UQXTN : SInst<"vqmovn", "(1<)1", "SUsSUiSUl">; // Scalar Floating Point multiply (scalar, by element) -def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "ssdi", "SfSd", OP_SCALAR_MUL_LN>; -def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "ssji", "SfSd", OP_SCALAR_MUL_LN>; +def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "11.I", "SfSd", OP_SCALAR_MUL_LN>; +def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "11QI", "SfSd", OP_SCALAR_MUL_LN>; // Scalar Floating Point multiply extended (scalar, by element) -def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "ssdi", "SfSd", OP_SCALAR_MULX_LN>; -def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "ssji", "SfSd", OP_SCALAR_MULX_LN>; +def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "11.I", "SfSd", OP_SCALAR_MULX_LN>; +def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_LN>; -def SCALAR_VMUL_N : IInst<"vmul_n", "dds", "d">; +def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">; // VMUL_LANE_A64 d type implemented using scalar mul lane -def SCALAR_VMUL_LANE : IInst<"vmul_lane", "ddgi", "d">; +def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d">; // VMUL_LANEQ d type implemented using scalar mul lane -def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "ddji", "d"> { +def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d"> { let isLaneQ = 1; } // VMULX_LANE d type implemented using scalar vmulx_lane -def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "d", OP_SCALAR_VMULX_LN>; +def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "d", OP_SCALAR_VMULX_LN>; // VMULX_LANEQ d type implemented using scalar vmulx_laneq -def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "ddji", "d", OP_SCALAR_VMULX_LNQ>; +def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ>; // Scalar Floating Point fused multiply-add (scalar, by element) -def SCALAR_FMLA_LANE : IInst<"vfma_lane", "sssdi", "SfSd">; -def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "sssji", "SfSd">; +def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd">; +def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd">; // Scalar Floating Point fused multiply-subtract (scalar, by element) -def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "sssdi", "SfSd", OP_FMS_LN>; -def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "sssji", "SfSd", OP_FMS_LNQ>; +def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "111.I", "SfSd", OP_FMS_LN>; +def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "111QI", "SfSd", OP_FMS_LNQ>; // Signed Saturating Doubling Multiply Long (scalar by element) -def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "rsdi", "SsSi", OP_SCALAR_QDMULL_LN>; -def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "rsji", "SsSi", OP_SCALAR_QDMULL_LN>; +def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "(1>)1.I", "SsSi", OP_SCALAR_QDMULL_LN>; +def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR_QDMULL_LN>; // Signed Saturating Doubling Multiply-Add Long (scalar by element) -def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "rrsdi", "SsSi">; -def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "rrsji", "SsSi">; +def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi">; +def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi">; // Signed Saturating Doubling Multiply-Subtract Long (scalar by element) -def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "rrsdi", "SsSi">; -def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "rrsji", "SsSi">; +def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi">; +def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi">; // Scalar Integer Saturating Doubling Multiply Half High (scalar by element) -def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QDMULH_LN>; -def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QDMULH_LN>; +def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "11.I", "SsSi", OP_SCALAR_QDMULH_LN>; +def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QDMULH_LN>; // Scalar Integer Saturating Rounding Doubling Multiply Half High -def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QRDMULH_LN>; -def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LN>; +def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "11.I", "SsSi", OP_SCALAR_QRDMULH_LN>; +def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QRDMULH_LN>; let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half -def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLAH_LN>; -def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLAH_LN>; +def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "111.I", "SsSi", OP_SCALAR_QRDMLAH_LN>; +def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLAH_LN>; // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half -def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLSH_LN>; -def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLSH_LN>; +def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "111.I", "SsSi", OP_SCALAR_QRDMLSH_LN>; +def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN>; } -def SCALAR_VDUP_LANE : IInst<"vdup_lane", "sdi", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; -def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; +def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; +def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; } // ARMv8.2-A FP16 vector intrinsics for A32/A64. @@ -1441,234 +1441,234 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { // ARMv8.2-A FP16 one-operand vector intrinsics. // Comparison - def CMEQH : SInst<"vceqz", "ud", "hQh">; - def CMGEH : SInst<"vcgez", "ud", "hQh">; - def CMGTH : SInst<"vcgtz", "ud", "hQh">; - def CMLEH : SInst<"vclez", "ud", "hQh">; - def CMLTH : SInst<"vcltz", "ud", "hQh">; + def CMEQH : SInst<"vceqz", "U.", "hQh">; + def CMGEH : SInst<"vcgez", "U.", "hQh">; + def CMGTH : SInst<"vcgtz", "U.", "hQh">; + def CMLEH : SInst<"vclez", "U.", "hQh">; + def CMLTH : SInst<"vcltz", "U.", "hQh">; // Vector conversion - def VCVT_F16 : SInst<"vcvt_f16", "Hd", "sUsQsQUs">; - def VCVT_S16 : SInst<"vcvt_s16", "xd", "hQh">; - def VCVT_U16 : SInst<"vcvt_u16", "ud", "hQh">; - def VCVTA_S16 : SInst<"vcvta_s16", "xd", "hQh">; - def VCVTA_U16 : SInst<"vcvta_u16", "ud", "hQh">; - def VCVTM_S16 : SInst<"vcvtm_s16", "xd", "hQh">; - def VCVTM_U16 : SInst<"vcvtm_u16", "ud", "hQh">; - def VCVTN_S16 : SInst<"vcvtn_s16", "xd", "hQh">; - def VCVTN_U16 : SInst<"vcvtn_u16", "ud", "hQh">; - def VCVTP_S16 : SInst<"vcvtp_s16", "xd", "hQh">; - def VCVTP_U16 : SInst<"vcvtp_u16", "ud", "hQh">; + def VCVT_F16 : SInst<"vcvt_f16", "F(.!)", "sUsQsQUs">; + def VCVT_S16 : SInst<"vcvt_s16", "S.", "hQh">; + def VCVT_U16 : SInst<"vcvt_u16", "U.", "hQh">; + def VCVTA_S16 : SInst<"vcvta_s16", "S.", "hQh">; + def VCVTA_U16 : SInst<"vcvta_u16", "U.", "hQh">; + def VCVTM_S16 : SInst<"vcvtm_s16", "S.", "hQh">; + def VCVTM_U16 : SInst<"vcvtm_u16", "U.", "hQh">; + def VCVTN_S16 : SInst<"vcvtn_s16", "S.", "hQh">; + def VCVTN_U16 : SInst<"vcvtn_u16", "U.", "hQh">; + def VCVTP_S16 : SInst<"vcvtp_s16", "S.", "hQh">; + def VCVTP_U16 : SInst<"vcvtp_u16", "U.", "hQh">; // Vector rounding let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { - def FRINTZH : SInst<"vrnd", "dd", "hQh">; - def FRINTNH : SInst<"vrndn", "dd", "hQh">; - def FRINTAH : SInst<"vrnda", "dd", "hQh">; - def FRINTPH : SInst<"vrndp", "dd", "hQh">; - def FRINTMH : SInst<"vrndm", "dd", "hQh">; - def FRINTXH : SInst<"vrndx", "dd", "hQh">; + def FRINTZH : SInst<"vrnd", "..", "hQh">; + def FRINTNH : SInst<"vrndn", "..", "hQh">; + def FRINTAH : SInst<"vrnda", "..", "hQh">; + def FRINTPH : SInst<"vrndp", "..", "hQh">; + def FRINTMH : SInst<"vrndm", "..", "hQh">; + def FRINTXH : SInst<"vrndx", "..", "hQh">; } // Misc. - def VABSH : SInst<"vabs", "dd", "hQh">; - def VNEGH : SOpInst<"vneg", "dd", "hQh", OP_NEG>; - def VRECPEH : SInst<"vrecpe", "dd", "hQh">; - def FRSQRTEH : SInst<"vrsqrte", "dd", "hQh">; + def VABSH : SInst<"vabs", "..", "hQh">; + def VNEGH : SOpInst<"vneg", "..", "hQh", OP_NEG>; + def VRECPEH : SInst<"vrecpe", "..", "hQh">; + def FRSQRTEH : SInst<"vrsqrte", "..", "hQh">; // ARMv8.2-A FP16 two-operands vector intrinsics. // Misc. - def VADDH : SOpInst<"vadd", "ddd", "hQh", OP_ADD>; - def VABDH : SInst<"vabd", "ddd", "hQh">; - def VSUBH : SOpInst<"vsub", "ddd", "hQh", OP_SUB>; + def VADDH : SOpInst<"vadd", "...", "hQh", OP_ADD>; + def VABDH : SInst<"vabd", "...", "hQh">; + def VSUBH : SOpInst<"vsub", "...", "hQh", OP_SUB>; // Comparison let InstName = "vacge" in { - def VCAGEH : SInst<"vcage", "udd", "hQh">; - def VCALEH : SInst<"vcale", "udd", "hQh">; + def VCAGEH : SInst<"vcage", "U..", "hQh">; + def VCALEH : SInst<"vcale", "U..", "hQh">; } let InstName = "vacgt" in { - def VCAGTH : SInst<"vcagt", "udd", "hQh">; - def VCALTH : SInst<"vcalt", "udd", "hQh">; + def VCAGTH : SInst<"vcagt", "U..", "hQh">; + def VCALTH : SInst<"vcalt", "U..", "hQh">; } - def VCEQH : SOpInst<"vceq", "udd", "hQh", OP_EQ>; - def VCGEH : SOpInst<"vcge", "udd", "hQh", OP_GE>; - def VCGTH : SOpInst<"vcgt", "udd", "hQh", OP_GT>; + def VCEQH : SOpInst<"vceq", "U..", "hQh", OP_EQ>; + def VCGEH : SOpInst<"vcge", "U..", "hQh", OP_GE>; + def VCGTH : SOpInst<"vcgt", "U..", "hQh", OP_GT>; let InstName = "vcge" in - def VCLEH : SOpInst<"vcle", "udd", "hQh", OP_LE>; + def VCLEH : SOpInst<"vcle", "U..", "hQh", OP_LE>; let InstName = "vcgt" in - def VCLTH : SOpInst<"vclt", "udd", "hQh", OP_LT>; + def VCLTH : SOpInst<"vclt", "U..", "hQh", OP_LT>; // Vector conversion let isVCVT_N = 1 in { - def VCVT_N_F16 : SInst<"vcvt_n_f16", "Hdi", "sUsQsQUs">; - def VCVT_N_S16 : SInst<"vcvt_n_s16", "xdi", "hQh">; - def VCVT_N_U16 : SInst<"vcvt_n_u16", "udi", "hQh">; + def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs">; + def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh">; + def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh">; } // Max/Min - def VMAXH : SInst<"vmax", "ddd", "hQh">; - def VMINH : SInst<"vmin", "ddd", "hQh">; + def VMAXH : SInst<"vmax", "...", "hQh">; + def VMINH : SInst<"vmin", "...", "hQh">; let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { - def FMAXNMH : SInst<"vmaxnm", "ddd", "hQh">; - def FMINNMH : SInst<"vminnm", "ddd", "hQh">; + def FMAXNMH : SInst<"vmaxnm", "...", "hQh">; + def FMINNMH : SInst<"vminnm", "...", "hQh">; } // Multiplication/Division - def VMULH : SOpInst<"vmul", "ddd", "hQh", OP_MUL>; + def VMULH : SOpInst<"vmul", "...", "hQh", OP_MUL>; // Pairwise addition - def VPADDH : SInst<"vpadd", "ddd", "h">; + def VPADDH : SInst<"vpadd", "...", "h">; // Pairwise Max/Min - def VPMAXH : SInst<"vpmax", "ddd", "h">; - def VPMINH : SInst<"vpmin", "ddd", "h">; + def VPMAXH : SInst<"vpmax", "...", "h">; + def VPMINH : SInst<"vpmin", "...", "h">; // Reciprocal/Sqrt - def VRECPSH : SInst<"vrecps", "ddd", "hQh">; - def VRSQRTSH : SInst<"vrsqrts", "ddd", "hQh">; + def VRECPSH : SInst<"vrecps", "...", "hQh">; + def VRSQRTSH : SInst<"vrsqrts", "...", "hQh">; // ARMv8.2-A FP16 three-operands vector intrinsics. // Vector fused multiply-add operations - def VFMAH : SInst<"vfma", "dddd", "hQh">; - def VFMSH : SOpInst<"vfms", "dddd", "hQh", OP_FMLS>; + def VFMAH : SInst<"vfma", "....", "hQh">; + def VFMSH : SOpInst<"vfms", "....", "hQh", OP_FMLS>; // ARMv8.2-A FP16 lane vector intrinsics. // Mul lane - def VMUL_LANEH : IOpInst<"vmul_lane", "ddgi", "hQh", OP_MUL_LN>; - def VMUL_NH : IOpInst<"vmul_n", "dds", "hQh", OP_MUL_N>; + def VMUL_LANEH : IOpInst<"vmul_lane", "..qI", "hQh", OP_MUL_LN>; + def VMUL_NH : IOpInst<"vmul_n", "..1", "hQh", OP_MUL_N>; // Data processing intrinsics - section 5 // Logical operations let isHiddenLInst = 1 in - def VBSLH : SInst<"vbsl", "dudd", "hQh">; + def VBSLH : SInst<"vbsl", ".U..", "hQh">; // Transposition operations - def VZIPH : WInst<"vzip", "2dd", "hQh">; - def VUZPH : WInst<"vuzp", "2dd", "hQh">; - def VTRNH : WInst<"vtrn", "2dd", "hQh">; + def VZIPH : WInst<"vzip", "2..", "hQh">; + def VUZPH : WInst<"vuzp", "2..", "hQh">; + def VTRNH : WInst<"vtrn", "2..", "hQh">; let ArchGuard = "!defined(__aarch64__)" in { // Set all lanes to same value. // Already implemented prior to ARMv8.2-A. - def VMOV_NH : WOpInst<"vmov_n", "ds", "hQh", OP_DUP>; - def VDUP_NH : WOpInst<"vdup_n", "ds", "hQh", OP_DUP>; - def VDUP_LANE1H : WOpInst<"vdup_lane", "dgi", "hQh", OP_DUP_LN>; + def VMOV_NH : WOpInst<"vmov_n", ".1", "hQh", OP_DUP>; + def VDUP_NH : WOpInst<"vdup_n", ".1", "hQh", OP_DUP>; + def VDUP_LANE1H : WOpInst<"vdup_lane", ".qI", "hQh", OP_DUP_LN>; } // Vector Extract - def VEXTH : WInst<"vext", "dddi", "hQh">; + def VEXTH : WInst<"vext", "...I", "hQh">; // Reverse vector elements - def VREV64H : WOpInst<"vrev64", "dd", "hQh", OP_REV64>; + def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>; } // ARMv8.2-A FP16 vector intrinsics for A64 only. let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__)" in { // Vector rounding - def FRINTIH : SInst<"vrndi", "dd", "hQh">; + def FRINTIH : SInst<"vrndi", "..", "hQh">; // Misc. - def FSQRTH : SInst<"vsqrt", "dd", "hQh">; + def FSQRTH : SInst<"vsqrt", "..", "hQh">; // Multiplication/Division - def MULXH : SInst<"vmulx", "ddd", "hQh">; - def FDIVH : IOpInst<"vdiv", "ddd", "hQh", OP_DIV>; + def MULXH : SInst<"vmulx", "...", "hQh">; + def FDIVH : IOpInst<"vdiv", "...", "hQh", OP_DIV>; // Pairwise addition - def VPADDH1 : SInst<"vpadd", "ddd", "Qh">; + def VPADDH1 : SInst<"vpadd", "...", "Qh">; // Pairwise Max/Min - def VPMAXH1 : SInst<"vpmax", "ddd", "Qh">; - def VPMINH1 : SInst<"vpmin", "ddd", "Qh">; + def VPMAXH1 : SInst<"vpmax", "...", "Qh">; + def VPMINH1 : SInst<"vpmin", "...", "Qh">; // Pairwise MaxNum/MinNum - def FMAXNMPH : SInst<"vpmaxnm", "ddd", "hQh">; - def FMINNMPH : SInst<"vpminnm", "ddd", "hQh">; + def FMAXNMPH : SInst<"vpmaxnm", "...", "hQh">; + def FMINNMPH : SInst<"vpminnm", "...", "hQh">; // ARMv8.2-A FP16 lane vector intrinsics. // FMA lane - def VFMA_LANEH : IInst<"vfma_lane", "dddgi", "hQh">; - def VFMA_LANEQH : IInst<"vfma_laneq", "dddji", "hQh">; + def VFMA_LANEH : IInst<"vfma_lane", "...qI", "hQh">; + def VFMA_LANEQH : IInst<"vfma_laneq", "...QI", "hQh">; // FMA lane with scalar argument - def FMLA_NH : SOpInst<"vfma_n", "ddds", "hQh", OP_FMLA_N>; + def FMLA_NH : SOpInst<"vfma_n", "...1", "hQh", OP_FMLA_N>; // Scalar floating point fused multiply-add (scalar, by element) - def SCALAR_FMLA_LANEH : IInst<"vfma_lane", "sssdi", "Sh">; - def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "sssji", "Sh">; + def SCALAR_FMLA_LANEH : IInst<"vfma_lane", "111.I", "Sh">; + def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh">; // FMS lane - def VFMS_LANEH : IOpInst<"vfms_lane", "dddgi", "hQh", OP_FMS_LN>; - def VFMS_LANEQH : IOpInst<"vfms_laneq", "dddji", "hQh", OP_FMS_LNQ>; + def VFMS_LANEH : IOpInst<"vfms_lane", "...qI", "hQh", OP_FMS_LN>; + def VFMS_LANEQH : IOpInst<"vfms_laneq", "...QI", "hQh", OP_FMS_LNQ>; // FMS lane with scalar argument - def FMLS_NH : SOpInst<"vfms_n", "ddds", "hQh", OP_FMLS_N>; + def FMLS_NH : SOpInst<"vfms_n", "...1", "hQh", OP_FMLS_N>; // Scalar floating foint fused multiply-subtract (scalar, by element) - def SCALAR_FMLS_LANEH : IOpInst<"vfms_lane", "sssdi", "Sh", OP_FMS_LN>; - def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "sssji", "Sh", OP_FMS_LNQ>; + def SCALAR_FMLS_LANEH : IOpInst<"vfms_lane", "111.I", "Sh", OP_FMS_LN>; + def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "111QI", "Sh", OP_FMS_LNQ>; // Mul lane - def VMUL_LANEQH : IOpInst<"vmul_laneq", "ddji", "hQh", OP_MUL_LN>; + def VMUL_LANEQH : IOpInst<"vmul_laneq", "..QI", "hQh", OP_MUL_LN>; // Scalar floating point multiply (scalar, by element) - def SCALAR_FMUL_LANEH : IOpInst<"vmul_lane", "ssdi", "Sh", OP_SCALAR_MUL_LN>; - def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "ssji", "Sh", OP_SCALAR_MUL_LN>; + def SCALAR_FMUL_LANEH : IOpInst<"vmul_lane", "11.I", "Sh", OP_SCALAR_MUL_LN>; + def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "11QI", "Sh", OP_SCALAR_MUL_LN>; // Mulx lane - def VMULX_LANEH : IOpInst<"vmulx_lane", "ddgi", "hQh", OP_MULX_LN>; - def VMULX_LANEQH : IOpInst<"vmulx_laneq", "ddji", "hQh", OP_MULX_LN>; - def VMULX_NH : IOpInst<"vmulx_n", "dds", "hQh", OP_MULX_N>; + def VMULX_LANEH : IOpInst<"vmulx_lane", "..qI", "hQh", OP_MULX_LN>; + def VMULX_LANEQH : IOpInst<"vmulx_laneq", "..QI", "hQh", OP_MULX_LN>; + def VMULX_NH : IOpInst<"vmulx_n", "..1", "hQh", OP_MULX_N>; // Scalar floating point mulx (scalar, by element) - def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "ssdi", "Sh">; - def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "ssji", "Sh">; + def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh">; + def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh">; // ARMv8.2-A FP16 reduction vector intrinsics. - def VMAXVH : SInst<"vmaxv", "sd", "hQh">; - def VMINVH : SInst<"vminv", "sd", "hQh">; - def FMAXNMVH : SInst<"vmaxnmv", "sd", "hQh">; - def FMINNMVH : SInst<"vminnmv", "sd", "hQh">; + def VMAXVH : SInst<"vmaxv", "1.", "hQh">; + def VMINVH : SInst<"vminv", "1.", "hQh">; + def FMAXNMVH : SInst<"vmaxnmv", "1.", "hQh">; + def FMINNMVH : SInst<"vminnmv", "1.", "hQh">; // Permutation - def VTRN1H : SOpInst<"vtrn1", "ddd", "hQh", OP_TRN1>; - def VZIP1H : SOpInst<"vzip1", "ddd", "hQh", OP_ZIP1>; - def VUZP1H : SOpInst<"vuzp1", "ddd", "hQh", OP_UZP1>; - def VTRN2H : SOpInst<"vtrn2", "ddd", "hQh", OP_TRN2>; - def VZIP2H : SOpInst<"vzip2", "ddd", "hQh", OP_ZIP2>; - def VUZP2H : SOpInst<"vuzp2", "ddd", "hQh", OP_UZP2>; + def VTRN1H : SOpInst<"vtrn1", "...", "hQh", OP_TRN1>; + def VZIP1H : SOpInst<"vzip1", "...", "hQh", OP_ZIP1>; + def VUZP1H : SOpInst<"vuzp1", "...", "hQh", OP_UZP1>; + def VTRN2H : SOpInst<"vtrn2", "...", "hQh", OP_TRN2>; + def VZIP2H : SOpInst<"vzip2", "...", "hQh", OP_ZIP2>; + def VUZP2H : SOpInst<"vuzp2", "...", "hQh", OP_UZP2>; - def SCALAR_VDUP_LANEH : IInst<"vdup_lane", "sdi", "Sh">; - def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "sji", "Sh">; + def SCALAR_VDUP_LANEH : IInst<"vdup_lane", "1.I", "Sh">; + def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh">; } // v8.2-A dot product instructions. let ArchGuard = "defined(__ARM_FEATURE_DOTPROD)" in { - def DOT : SInst<"vdot", "dd88", "iQiUiQUi">; - def DOT_LANE : SOpInst<"vdot_lane", "dd87i", "iUiQiQUi", OP_DOT_LN>; + def DOT : SInst<"vdot", "..(<<)(<<)", "iQiUiQUi">; + def DOT_LANE : SOpInst<"vdot_lane", "..(<<)(<; } let ArchGuard = "defined(__ARM_FEATURE_DOTPROD) && defined(__aarch64__)" in { // Variants indexing into a 128-bit vector are A64 only. - def UDOT_LANEQ : SOpInst<"vdot_laneq", "dd89i", "iUiQiQUi", OP_DOT_LNQ>; + def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(<; } // v8.2-A FP16 fused multiply-add long instructions. let ArchGuard = "defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__)" in { - def VFMLAL_LOW : SInst<"vfmlal_low", "nndd", "hQh">; - def VFMLSL_LOW : SInst<"vfmlsl_low", "nndd", "hQh">; - def VFMLAL_HIGH : SInst<"vfmlal_high", "nndd", "hQh">; - def VFMLSL_HIGH : SInst<"vfmlsl_high", "nndd", "hQh">; + def VFMLAL_LOW : SInst<"vfmlal_low", ">>..", "hQh">; + def VFMLSL_LOW : SInst<"vfmlsl_low", ">>..", "hQh">; + def VFMLAL_HIGH : SInst<"vfmlal_high", ">>..", "hQh">; + def VFMLSL_HIGH : SInst<"vfmlsl_high", ">>..", "hQh">; - def VFMLAL_LANE_LOW : SOpInst<"vfmlal_lane_low", "ffH0i", "hQh", OP_FMLAL_LN>; - def VFMLSL_LANE_LOW : SOpInst<"vfmlsl_lane_low", "ffH0i", "hQh", OP_FMLSL_LN>; - def VFMLAL_LANE_HIGH : SOpInst<"vfmlal_lane_high", "ffH0i", "hQh", OP_FMLAL_LN_Hi>; - def VFMLSL_LANE_HIGH : SOpInst<"vfmlsl_lane_high", "ffH0i", "hQh", OP_FMLSL_LN_Hi>; + def VFMLAL_LANE_LOW : SOpInst<"vfmlal_lane_low", "(F>)(F>)F(Fq)I", "hQh", OP_FMLAL_LN>; + def VFMLSL_LANE_LOW : SOpInst<"vfmlsl_lane_low", "(F>)(F>)F(Fq)I", "hQh", OP_FMLSL_LN>; + def VFMLAL_LANE_HIGH : SOpInst<"vfmlal_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLAL_LN_Hi>; + def VFMLSL_LANE_HIGH : SOpInst<"vfmlsl_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLSL_LN_Hi>; - def VFMLAL_LANEQ_LOW : SOpInst<"vfmlal_laneq_low", "ffH1i", "hQh", OP_FMLAL_LN>; - def VFMLSL_LANEQ_LOW : SOpInst<"vfmlsl_laneq_low", "ffH1i", "hQh", OP_FMLSL_LN>; - def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "ffH1i", "hQh", OP_FMLAL_LN_Hi>; - def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "ffH1i", "hQh", OP_FMLSL_LN_Hi>; + def VFMLAL_LANEQ_LOW : SOpInst<"vfmlal_laneq_low", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN>; + def VFMLSL_LANEQ_LOW : SOpInst<"vfmlsl_laneq_low", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN>; + def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN_Hi>; + def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi>; } diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td index 984ed787037f..28b00d162a00 100644 --- a/clang/include/clang/Basic/arm_neon_incl.td +++ b/clang/include/clang/Basic/arm_neon_incl.td @@ -198,10 +198,8 @@ def OP_UNAVAILABLE : Operation { // // The prototype is a string that defines the return type of the intrinsic // and the type of each argument. The return type and every argument gets a -// "modifier" that can change in some way the "base type" of the intrinsic. -// -// The modifier 'd' means "default" and does not modify the base type in any -// way. The available modifiers are given below. +// set of "modifiers" that can change in some way the "base type" of the +// intrinsic. // // Typespecs // --------- @@ -226,41 +224,34 @@ def OP_UNAVAILABLE : Operation { // ------------------- // prototype: return (arg, arg, ...) // -// v: void -// t: best-fit integer (int/poly args) -// x: signed integer (int/float args) -// u: unsigned integer (int/float args) -// f: float (int args) -// F: double (int args) -// H: half (int args) -// 0: half (int args), ignore 'Q' size modifier. -// 1: half (int args), force 'Q' size modifier. -// d: default -// g: default, ignore 'Q' size modifier. -// j: default, force 'Q' size modifier. -// w: double width elements, same num elts -// n: double width elements, half num elts -// h: half width elements, double num elts -// q: half width elements, quad num elts -// e: half width elements, double num elts, unsigned -// m: half width elements, same num elts -// i: constant int -// l: constant uint64 -// s: scalar of element type -// z: scalar of half width element type, signed -// r: scalar of double width element type, signed -// b: scalar of unsigned integer/long type (int/float args) -// $: scalar of signed integer/long type (int/float args) -// y: scalar of float -// o: scalar of double -// k: default elt width, double num elts -// 2,3,4: array of default vectors -// B,C,D: array of default elts, force 'Q' size modifier. -// p: pointer type -// c: const pointer type -// 7: vector of 8-bit elements, ignore 'Q' size modifier -// 8: vector of 8-bit elements, same width as default type -// 9: vector of 8-bit elements, force 'Q' size modifier +// Each type modifier is either a single character, or a group surrounded by +// parentheses. +// +// .: default +// v: change to void category. +// S: change to signed integer category. +// U: change to unsigned integer category. +// F: change to floating category. +// P: change to polynomial category. +// p: change polynomial to equivalent integer category. Otherwise nop. +// +// >: double element width (vector size unchanged). +// <: half element width (vector size unchanged). +// +// 1: change to scalar. +// 2: change to struct of two vectors. +// 3: change to struct of three vectors. +// 4: change to struct of four vectors. +// +// *: make a pointer argument. +// c: make a constant argument (for pointers). +// +// Q: force 128-bit width. +// q: force 64-bit width. +// +// I: make 32-bit signed scalar immediate +// !: make this the key type passed to CGBuiltin.cpp in a polymorphic call. + // Every intrinsic subclasses Inst. class Inst { diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index b29d877dd8ec..7744b4f4a159 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -17756,8 +17756,6 @@ float32_t test_vminnmv_f32(float32x2_t a) { } // CHECK-LABEL: @test_vpaddq_s64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VPADDQ_V2_I]] @@ -17766,8 +17764,6 @@ int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vpaddq_u64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VPADDQ_V2_I]] diff --git a/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c b/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c index 836e4dbd9917..d2e3bec4e001 100644 --- a/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c +++ b/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c @@ -407,12 +407,10 @@ int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) { } // CHECK-LABEL: define <1 x double> @test_vmulx_lane_f64_0() #0 { -// CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double> -// CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0 -// CHECK: [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> , i32 0 +// CHECK: [[VGET_LANE7:%.*]] = extractelement <1 x double> , i32 0 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE7]]) -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0 +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> , double [[VMULXD_F64_I]], i32 0 // CHECK: ret <1 x double> [[VSET_LANE]] float64x1_t test_vmulx_lane_f64_0() { float64x1_t arg1; @@ -426,13 +424,11 @@ float64x1_t test_vmulx_lane_f64_0() { } // CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_2() #1 { -// CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double> -// CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double> -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0 +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> , <1 x double> , <2 x i32> +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> , i32 0 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[SHUFFLE_I]], i32 1 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0 +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> , double [[VMULXD_F64_I]], i32 0 // CHECK: ret <1 x double> [[VSET_LANE]] float64x1_t test_vmulx_laneq_f64_2() { float64x1_t arg1; diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index cdf761b00c61..a0f3fb2ddc08 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -161,11 +161,11 @@ public: Pointer(false), ScalarForMangling(false), NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} - Type(TypeSpec TS, char CharMod) + Type(TypeSpec TS, StringRef CharMods) : TS(std::move(TS)), Kind(Void), Immediate(false), Constant(false), Pointer(false), ScalarForMangling(false), NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { - applyModifier(CharMod); + applyModifiers(CharMods); } /// Returns a type representing "void". @@ -181,13 +181,15 @@ public: bool noManglingQ() const { return NoManglingQ; } bool isPointer() const { return Pointer; } + bool isValue() const { return !isVoid() && !isPointer(); } + bool isScalar() const { return isValue() && NumVectors == 0; } + bool isVector() const { return isValue() && NumVectors > 0; } + bool isConstPointer() const { return Constant; } bool isFloating() const { return Kind == Float; } bool isInteger() const { return Kind == SInt || Kind == UInt; } bool isPoly() const { return Kind == Poly; } bool isSigned() const { return Kind == SInt; } bool isImmediate() const { return Immediate; } - bool isScalar() const { return NumVectors == 0; } - bool isVector() const { return NumVectors > 0; } bool isFloat() const { return isFloating() && ElementBitwidth == 32; } bool isDouble() const { return isFloating() && ElementBitwidth == 64; } bool isHalf() const { return isFloating() && ElementBitwidth == 16; } @@ -205,11 +207,11 @@ public: // Mutator functions // void makeUnsigned() { - assert(isInteger() && "not a potentially signed type"); + assert(!isVoid() && "not a potentially signed type"); Kind = UInt; } void makeSigned() { - assert(isInteger() && "not a potentially signed type"); + assert(!isVoid() && "not a potentially signed type"); Kind = SInt; } @@ -267,8 +269,8 @@ private: /// seen. This is needed by applyModifier as some modifiers /// only take effect if the type size was changed by "Q" or "H". void applyTypespec(bool &Quad); - /// Applies a prototype modifiers to the type. - void applyModifier(char Mod); + /// Applies prototype modifiers to the type. + void applyModifiers(StringRef Mods); }; //===----------------------------------------------------------------------===// @@ -299,8 +301,8 @@ class Intrinsic { /// The Record this intrinsic was created from. Record *R; - /// The unmangled name and prototype. - std::string Name, Proto; + /// The unmangled name. + std::string Name; /// The input and output typespecs. InTS == OutTS except when /// CartesianProductOfTypes is 1 - this is the case for vreinterpret. TypeSpec OutTS, InTS; @@ -323,6 +325,8 @@ class Intrinsic { /// The types of return value [0] and parameters [1..]. std::vector Types; + /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls. + int PolymorphicKeyType; /// The local variables defined. std::map Variables; /// NeededEarly - set if any other intrinsic depends on this intrinsic. @@ -358,34 +362,39 @@ public: Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, StringRef Guard, bool IsUnavailable, bool BigEndianSafe) - : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS), - CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable), - BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false), - BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) { - // If this builtin takes an immediate argument, we need to #define it rather - // than use a standard declaration, so that SemaChecking can range check - // the immediate passed by the user. - if (Proto.find('i') != std::string::npos) - UseMacro = true; - - // Pointer arguments need to use macros to avoid hiding aligned attributes - // from the pointer type. - if (Proto.find('p') != std::string::npos || - Proto.find('c') != std::string::npos) - UseMacro = true; - - // It is not permitted to pass or return an __fp16 by value, so intrinsics - // taking a scalar float16_t must be implemented as macros. - if (OutTS.find('h') != std::string::npos && - Proto.find('s') != std::string::npos) - UseMacro = true; - + : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body), + Guard(Guard.str()), IsUnavailable(IsUnavailable), + BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false), + UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."), + Emitter(Emitter) { // Modify the TypeSpec per-argument to get a concrete Type, and create // known variables for each. // Types[0] is the return value. - Types.emplace_back(OutTS, Proto[0]); - for (unsigned I = 1; I < Proto.size(); ++I) - Types.emplace_back(InTS, Proto[I]); + unsigned Pos = 0; + Types.emplace_back(OutTS, getNextModifiers(Proto, Pos)); + StringRef Mods = getNextModifiers(Proto, Pos); + while (!Mods.empty()) { + Types.emplace_back(InTS, Mods); + if (Mods.find("!") != StringRef::npos) + PolymorphicKeyType = Types.size() - 1; + + Mods = getNextModifiers(Proto, Pos); + } + + for (auto Type : Types) { + // If this builtin takes an immediate argument, we need to #define it rather + // than use a standard declaration, so that SemaChecking can range check + // the immediate passed by the user. + + // Pointer arguments need to use macros to avoid hiding aligned attributes + // from the pointer type. + + // It is not permitted to pass or return an __fp16 by value, so intrinsics + // taking a scalar float16_t must be implemented as macros. + if (Type.isImmediate() || Type.isPointer() || + (Type.isScalar() && Type.isHalf())) + UseMacro = true; + } } /// Get the Record that this intrinsic is based off. @@ -401,23 +410,24 @@ public: /// Return true if the intrinsic takes an immediate operand. bool hasImmediate() const { - return Proto.find('i') != std::string::npos; + return std::any_of(Types.begin(), Types.end(), + [](const Type &T) { return T.isImmediate(); }); } /// Return the parameter index of the immediate operand. unsigned getImmediateIdx() const { - assert(hasImmediate()); - unsigned Idx = Proto.find('i'); - assert(Idx > 0 && "Can't return an immediate!"); - return Idx - 1; + for (unsigned Idx = 0; Idx < Types.size(); ++Idx) + if (Types[Idx].isImmediate()) + return Idx - 1; + llvm_unreachable("Intrinsic has no immediate"); } - unsigned getNumParams() const { return Proto.size() - 1; } + + unsigned getNumParams() const { return Types.size() - 1; } Type getReturnType() const { return Types[0]; } Type getParamType(unsigned I) const { return Types[I + 1]; } Type getBaseType() const { return BaseType; } - /// Return the raw prototype string. - std::string getProto() const { return Proto; } + Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; } /// Return true if the prototype has a scalar argument. bool protoHasScalar() const; @@ -471,6 +481,8 @@ public: void indexBody(); private: + StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const; + std::string mangleName(std::string Name, ClassKind CK) const; void initVariables(); @@ -614,10 +626,14 @@ std::string Type::builtin_str() const { if (isVoid()) return "v"; - if (Pointer) + if (isPointer()) { // All pointers are void pointers. - S += "v"; - else if (isInteger()) + S = "v"; + if (isConstPointer()) + S += "C"; + S += "*"; + return S; + } else if (isInteger()) switch (ElementBitwidth) { case 8: S += "c"; break; case 16: S += "s"; break; @@ -634,10 +650,11 @@ std::string Type::builtin_str() const { default: llvm_unreachable("Unhandled case!"); } + // FIXME: NECESSARY??????????????????????????????????????????????????????????????????????? if (isChar() && !isPointer() && isSigned()) // Make chars explicitly signed. S = "S" + S; - else if (!isPointer() && isInteger() && !isSigned()) + else if (isInteger() && !isSigned()) S = "U" + S; // Constant indices are "int", but have the "constant expression" modifier. @@ -646,11 +663,8 @@ std::string Type::builtin_str() const { S = "I" + S; } - if (isScalar()) { - if (Constant) S += "C"; - if (Pointer) S += "*"; + if (isScalar()) return S; - } std::string Ret; for (unsigned I = 0; I < NumVectors; ++I) @@ -812,202 +826,77 @@ void Type::applyTypespec(bool &Quad) { Bitwidth = Quad ? 128 : 64; } -void Type::applyModifier(char Mod) { +void Type::applyModifiers(StringRef Mods) { bool AppliedQuad = false; applyTypespec(AppliedQuad); - switch (Mod) { - case 'v': - Kind = Void; - break; - case 't': - if (isPoly()) + for (char Mod : Mods) { + switch (Mod) { + case '.': + break; + case 'v': + Kind = Void; + break; + case 'S': + Kind = SInt; + break; + case 'U': Kind = UInt; - break; - case 'b': - Kind = UInt; - NumVectors = 0; - Bitwidth = ElementBitwidth; - break; - case '$': - Kind = SInt; - NumVectors = 0; - Bitwidth = ElementBitwidth; - break; - case 'u': - Kind = UInt; - break; - case 'x': - assert(!isPoly() && "'u' can't be used with poly types!"); - Kind = SInt; - break; - case 'o': - Bitwidth = ElementBitwidth = 64; - NumVectors = 0; - Kind = Float; - break; - case 'y': - Bitwidth = ElementBitwidth = 32; - NumVectors = 0; - Kind = Float; - break; - case 'Y': - Bitwidth = ElementBitwidth = 16; - NumVectors = 0; - Kind = Float; - break; - case 'I': - Bitwidth = ElementBitwidth = 32; - NumVectors = 0; - Kind = SInt; - break; - case 'L': - Bitwidth = ElementBitwidth = 64; - NumVectors = 0; - Kind = SInt; - break; - case 'U': - Bitwidth = ElementBitwidth = 32; - NumVectors = 0; - Kind = UInt; - break; - case 'O': - Bitwidth = ElementBitwidth = 64; - NumVectors = 0; - Kind = UInt; - break; - case 'f': - Kind = Float; - ElementBitwidth = 32; - break; - case 'F': - Kind = Float; - ElementBitwidth = 64; - break; - case 'H': - Kind = Float; - ElementBitwidth = 16; - break; - case '0': - Kind = Float; - if (AppliedQuad) - Bitwidth /= 2; - ElementBitwidth = 16; - break; - case '1': - Kind = Float; - if (!AppliedQuad) - Bitwidth *= 2; - ElementBitwidth = 16; - break; - case 'g': - if (AppliedQuad) - Bitwidth /= 2; - break; - case 'j': - if (!AppliedQuad) - Bitwidth *= 2; - break; - case 'w': - ElementBitwidth *= 2; - Bitwidth *= 2; - break; - case 'n': - ElementBitwidth *= 2; - break; - case 'i': - Kind = SInt; - ElementBitwidth = Bitwidth = 32; - NumVectors = 0; - Immediate = true; - break; - case 'l': - Kind = UInt; - ElementBitwidth = Bitwidth = 64; - NumVectors = 0; - Immediate = true; - break; - case 'z': - ElementBitwidth /= 2; - Bitwidth = ElementBitwidth; - NumVectors = 0; - break; - case 'r': - ElementBitwidth *= 2; - Bitwidth = ElementBitwidth; - NumVectors = 0; - break; - case 's': - Bitwidth = ElementBitwidth; - NumVectors = 0; - break; - case 'k': - Bitwidth *= 2; - break; - case 'c': - Constant = true; - LLVM_FALLTHROUGH; - case 'p': - Pointer = true; - Bitwidth = ElementBitwidth; - NumVectors = 0; - break; - case 'h': - ElementBitwidth /= 2; - break; - case 'q': - ElementBitwidth /= 2; - Bitwidth *= 2; - break; - case 'e': - ElementBitwidth /= 2; - Kind = UInt; - break; - case 'm': - ElementBitwidth /= 2; - Bitwidth /= 2; - break; - case 'd': - break; - case '2': - NumVectors = 2; - break; - case '3': - NumVectors = 3; - break; - case '4': - NumVectors = 4; - break; - case 'B': - NumVectors = 2; - if (!AppliedQuad) - Bitwidth *= 2; - break; - case 'C': - NumVectors = 3; - if (!AppliedQuad) - Bitwidth *= 2; - break; - case 'D': - NumVectors = 4; - if (!AppliedQuad) - Bitwidth *= 2; - break; - case '7': - if (AppliedQuad) - Bitwidth /= 2; - ElementBitwidth = 8; - break; - case '8': - ElementBitwidth = 8; - break; - case '9': - if (!AppliedQuad) - Bitwidth *= 2; - ElementBitwidth = 8; - break; - default: - llvm_unreachable("Unhandled character!"); + break; + case 'F': + Kind = Float; + break; + case 'P': + Kind = Poly; + break; + case '>': + assert(ElementBitwidth < 128); + ElementBitwidth *= 2; + break; + case '<': + assert(ElementBitwidth > 8); + ElementBitwidth /= 2; + break; + case '1': + NumVectors = 0; + break; + case '2': + NumVectors = 2; + break; + case '3': + NumVectors = 3; + break; + case '4': + NumVectors = 4; + break; + case '*': + Pointer = true; + break; + case 'c': + Constant = true; + break; + case 'Q': + Bitwidth = 128; + break; + case 'q': + Bitwidth = 64; + break; + case 'I': + Kind = SInt; + ElementBitwidth = Bitwidth = 32; + NumVectors = 0; + Immediate = true; + break; + case 'p': + if (isPoly()) + Kind = UInt; + break; + case '!': + // Key type, handled elsewhere. + break; + default: + llvm_unreachable("Unhandled character!"); + } } } @@ -1015,6 +904,19 @@ void Type::applyModifier(char Mod) { // Intrinsic implementation //===----------------------------------------------------------------------===// +StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const { + if (Proto.size() == Pos) + return StringRef(); + else if (Proto[Pos] != '(') + return Proto.substr(Pos++, 1); + + size_t Start = Pos + 1; + size_t End = Proto.find(')', Start); + assert_with_loc(End != StringRef::npos, "unmatched modifier group paren"); + Pos = End + 1; + return Proto.slice(Start, End); +} + std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { char typeCode = '\0'; bool printNumber = true; @@ -1053,17 +955,13 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { return S; } -static bool isFloatingPointProtoModifier(char Mod) { - return Mod == 'F' || Mod == 'f' || Mod == 'H' || Mod == 'Y' || Mod == 'I'; -} - std::string Intrinsic::getBuiltinTypeStr() { ClassKind LocalCK = getClassKind(true); std::string S; Type RetT = getReturnType(); if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && - !RetT.isFloating() && !RetT.isVoid()) + !RetT.isFloating()) RetT.makeInteger(RetT.getElementSizeInBits(), false); // Since the return value must be one type, return a vector type of the @@ -1078,7 +976,7 @@ std::string Intrinsic::getBuiltinTypeStr() { if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned()) RetT.makeSigned(); - if (LocalCK == ClassB && !RetT.isVoid() && !RetT.isScalar()) + if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar()) // Cast to vector of 8-bit elements. RetT.makeInteger(8, true); @@ -1194,7 +1092,7 @@ void Intrinsic::initVariables() { // Modify the TypeSpec per-argument to get a concrete Type, and create // known variables for each. - for (unsigned I = 1; I < Proto.size(); ++I) { + for (unsigned I = 1; I < Types.size(); ++I) { char NameC = '0' + (I - 1); std::string Name = "p"; Name.push_back(NameC); @@ -1315,7 +1213,7 @@ void Intrinsic::emitShadowedArgs() { for (unsigned I = 0; I < getNumParams(); ++I) { // Do not create a temporary for an immediate argument. // That would defeat the whole point of using a macro! - if (hasImmediate() && Proto[I+1] == 'i') + if (getParamType(I).isImmediate()) continue; // Do not create a temporary for pointer arguments. The input // pointer may have an alignment hint. @@ -1339,13 +1237,9 @@ void Intrinsic::emitShadowedArgs() { } bool Intrinsic::protoHasScalar() const { - return (Proto.find('s') != std::string::npos || - Proto.find('z') != std::string::npos || - Proto.find('r') != std::string::npos || - Proto.find('b') != std::string::npos || - Proto.find('$') != std::string::npos || - Proto.find('y') != std::string::npos || - Proto.find('o') != std::string::npos); + return std::any_of(Types.begin(), Types.end(), [](const Type &T) { + return T.isScalar() && !T.isImmediate(); + }); } void Intrinsic::emitBodyAsBuiltinCall() { @@ -1408,13 +1302,7 @@ void Intrinsic::emitBodyAsBuiltinCall() { // Extra constant integer to hold type class enum for this function, e.g. s8 if (getClassKind(true) == ClassB) { - Type ThisTy = getReturnType(); - if (Proto[0] == 'v' || isFloatingPointProtoModifier(Proto[0])) - ThisTy = getParamType(0); - if (ThisTy.isPointer()) - ThisTy = getParamType(1); - - S += utostr(ThisTy.getNeonEnum()); + S += utostr(getPolymorphicKeyType().getNeonEnum()); } else { // Remove extraneous ", ". S.pop_back(); @@ -2019,9 +1907,9 @@ void NeonEmitter::createIntrinsic(Record *R, std::vector> NewTypeSpecs; for (auto TS : TypeSpecs) { if (CartesianProductOfTypes) { - Type DefaultT(TS, 'd'); + Type DefaultT(TS, "."); for (auto SrcTS : TypeSpecs) { - Type DefaultSrcT(SrcTS, 'd'); + Type DefaultSrcT(SrcTS, "."); if (TS == SrcTS || DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) continue; @@ -2101,31 +1989,19 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, continue; uint64_t Mask = 0ULL; - Type Ty = Def->getReturnType(); - if (Def->getProto()[0] == 'v' || - isFloatingPointProtoModifier(Def->getProto()[0])) - Ty = Def->getParamType(0); - if (Ty.isPointer()) - Ty = Def->getParamType(1); - - Mask |= 1ULL << Ty.getNeonEnum(); + Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum(); // Check if the function has a pointer or const pointer argument. - std::string Proto = Def->getProto(); int PtrArgNum = -1; bool HasConstPtr = false; for (unsigned I = 0; I < Def->getNumParams(); ++I) { - char ArgType = Proto[I + 1]; - if (ArgType == 'c') { - HasConstPtr = true; + const auto &Type = Def->getParamType(I); + if (Type.isPointer()) { PtrArgNum = I; - break; - } - if (ArgType == 'p') { - PtrArgNum = I; - break; + HasConstPtr = Type.isConstPointer(); } } + // For sret builtins, adjust the pointer argument index. if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) PtrArgNum += 1; @@ -2349,7 +2225,7 @@ void NeonEmitter::run(raw_ostream &OS) { bool InIfdef = false; for (auto &TS : TDTypeVec) { bool IsA64 = false; - Type T(TS, 'd'); + Type T(TS, "."); if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64)) IsA64 = true; @@ -2382,7 +2258,7 @@ void NeonEmitter::run(raw_ostream &OS) { for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { for (auto &TS : TDTypeVec) { bool IsA64 = false; - Type T(TS, 'd'); + Type T(TS, "."); if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64)) IsA64 = true; @@ -2395,8 +2271,8 @@ void NeonEmitter::run(raw_ostream &OS) { InIfdef = true; } - char M = '2' + (NumMembers - 2); - Type VT(TS, M); + const char Mods[] = { static_cast('2' + (NumMembers - 2)), 0}; + Type VT(TS, Mods); OS << "typedef struct " << VT.str() << " {\n"; OS << " " << T.str() << " val"; OS << "[" << NumMembers << "]"; diff --git a/clang/utils/convert_arm_neon.py b/clang/utils/convert_arm_neon.py new file mode 100644 index 000000000000..c4b364529457 --- /dev/null +++ b/clang/utils/convert_arm_neon.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 + +# This script was committed on 20/11/2019 and it would probably make sense to remove +# it after the next release branches. + +# This script is pipe based and converts an arm_neon.td (or arm_fp16.td) file +# using the old single-char type modifiers to an equivalent new-style form where +# each modifier is orthogonal and they can be composed. +# +# It was used to directly generate the .td files on master, so if you have any +# local additions I would suggest implementing any modifiers here, and running +# it over your entire pre-merge .td files rather than trying to resolve any +# conflicts manually. + +import re, sys +MOD_MAP = { + 'v': 'v', + 'x': 'S', + 'u': 'U', + 'd': '.', + 'g': 'q', + 'j': 'Q', + 'w': '>Q', + 'n': '>', + 'h': '<', + 'q': '', + 's': '1', + 'z': '1<', + 'r': '1>', + 'b': '1U', + '$': '1S', + 'k': 'Q', + '2': '2', + '3': '3', + '4': '4', + 'B': '2Q', + 'C': '3Q', + 'D': '4Q', + 'p': '*', + 'c': 'c*', + '7': '< desired: + res += '<' + cur /= 2 + return res + + +def remap_protocol(proto, typespec, name): + key_type = 0 + + # Conversions like to see the integer type so they know signedness. + if 'vcvt' in name and '_f' in name and name != 'vcvt_f32_f64' and name != 'vcvt_f64_f32': + key_type = 1 + default_width = typespec_elt_size(typespec) + inconsistent_width = False + for elt in typespec: + new_width = typespec_elt_size(elt) + if new_width and new_width != default_width: + inconsistent_width = True + + res = '' + for i, c in enumerate(proto): + # void and pointers make for bad discriminators in CGBuiltin.cpp. + if c in 'vcp': + key_type += 1 + + if c in MOD_MAP: + cur_mod = MOD_MAP[c] + elif inconsistent_width: + # Otherwise it's a fixed output width modifier. + sys.stderr.write(f'warning: {name} uses fixed output size but has inconsistent input widths: {proto} {typespec}\n') + + if c == 'Y': + # y: scalar of half float + resize = get_resize(default_width, 16) + cur_mod = f'1F{resize}' + elif c == 'y': + # y: scalar of float + resize = get_resize(default_width, 32) + cur_mod = f'1F{resize}' + elif c == 'o': + # o: scalar of double + resize = get_resize(default_width, 64) + cur_mod = f'1F{resize}' + elif c == 'I': + # I: scalar of 32-bit signed + resize = get_resize(default_width, 32) + cur_mod = f'1S{resize}' + elif c == 'L': + # L: scalar of 64-bit signed + resize = get_resize(default_width, 64) + cur_mod = f'1S{resize}' + elif c == 'U': + # I: scalar of 32-bit unsigned + resize = get_resize(default_width, 32) + cur_mod = f'1U{resize}' + elif c == 'O': + # O: scalar of 64-bit unsigned + resize = get_resize(default_width, 64) + cur_mod = f'1U{resize}' + elif c == 'f': + # f: float (int args) + resize = get_resize(default_width, 32) + cur_mod = f'F{resize}' + elif c == 'F': + # F: double (int args) + resize = get_resize(default_width, 64) + cur_mod = f'F{resize}' + elif c == 'H': + # H: half (int args) + resize = get_resize(default_width, 16) + cur_mod = f'F{resize}' + elif c == '0': + # 0: half (int args), ignore 'Q' size modifier. + resize = get_resize(default_width, 16) + cur_mod = f'Fq{resize}' + elif c == '1': + # 1: half (int args), force 'Q' size modifier. + resize = get_resize(default_width, 16) + cur_mod = f'FQ{resize}' + + if len(cur_mod) == 0: + raise Exception(f'WTF: {c} in {name}') + + if key_type != 0 and key_type == i: + cur_mod += '!' + + if len(cur_mod) == 1: + res += cur_mod + else: + res += '(' + cur_mod + ')' + + return res + +def replace_insts(m): + start, end = m.span('proto') + start -= m.start() + end -= m.start() + new_proto = remap_protocol(m['proto'], m['kinds'], m['name']) + return m.group()[:start] + new_proto + m.group()[end:] + +INST = re.compile(r'Inst<"(?P.*?)",\s*"(?P.*?)",\s*"(?P.*?)"') + +new_td = INST.sub(replace_insts, sys.stdin.read()) +sys.stdout.write(new_td)