[RISCV] Support Xsfvfwmaccqqq extensions (#68296)

Bfloat16 Matrix Multiply Accumulate Instruction https://sifive.cdn.prismic.io/sifive/c391d53e-ffcf-4091-82f6-c37bf3e883ed_xsfvfwmaccqqq-spec.pdf
2024-11-27 23:51:56 +00:00 · 2023-11-03 10:08:26 +08:00 · 2023-11-03 10:08:26 +08:00 · 945d2e6e60
commit 945d2e6e60
parent 801a30aa8f
19 changed files with 523 additions and 12 deletions
--- a/clang/include/clang/Basic/riscv_sifive_vector.td
+++ b/clang/include/clang/Basic/riscv_sifive_vector.td
@ -104,6 +104,14 @@ let SupportOverloading = false in {
  }
 }

+multiclass RVVVFWMACCBuiltinSet<list<list<string>> suffixes_prototypes> {
+  let OverloadedName = NAME,
+      Name = NAME,
+      HasMasked = false,
+      Log2LMUL = [-2, -1, 0, 1, 2] in
+    defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "x", suffixes_prototypes>;
+}
+
 multiclass RVVVQMACCBuiltinSet<list<list<string>> suffixes_prototypes> {
  let OverloadedName = NAME,
      Name = NAME,
@ -127,3 +135,7 @@ let UnMaskedPolicyScheme = HasPolicyOperand in
    defm sf_vqmaccus_4x8x4 : RVVVQMACCBuiltinSet<[["", "v", "vv(FixedSEW:8)SUv(FixedSEW:8)v"]]>;
    defm sf_vqmaccsu_4x8x4 : RVVVQMACCBuiltinSet<[["", "v", "vv(FixedSEW:8)Sv(FixedSEW:8)Uv"]]>;
  }
+
+let UnMaskedPolicyScheme = HasPolicyOperand in
+  let RequiredFeatures = ["Xsfvfwmaccqqq"] in
+    defm sf_vfwmacc_4x4x4 : RVVVFWMACCBuiltinSet<[["", "w", "wwSvv"]]>;
--- a/clang/include/clang/Support/RISCVVIntrinsicUtils.h
+++ b/clang/include/clang/Support/RISCVVIntrinsicUtils.h
@ -485,17 +485,18 @@ enum RVVRequire : uint16_t {
  RVV_REQ_RV64 = 1 << 0,
  RVV_REQ_ZvfhminOrZvfh = 1 << 1,
  RVV_REQ_Xsfvcp = 1 << 2,
-  RVV_REQ_Xsfvqmaccdod = 1 << 3,
-  RVV_REQ_Xsfvqmaccqoq = 1 << 4,
-  RVV_REQ_Zvbb = 1 << 5,
-  RVV_REQ_Zvbc = 1 << 6,
-  RVV_REQ_Zvkb = 1 << 7,
-  RVV_REQ_Zvkg = 1 << 8,
-  RVV_REQ_Zvkned = 1 << 9,
-  RVV_REQ_Zvknha = 1 << 10,
-  RVV_REQ_Zvknhb = 1 << 11,
-  RVV_REQ_Zvksed = 1 << 12,
-  RVV_REQ_Zvksh = 1 << 13,
+  RVV_REQ_Xsfvfwmaccqqq = 1 << 3,
+  RVV_REQ_Xsfvqmaccdod = 1 << 4,
+  RVV_REQ_Xsfvqmaccqoq = 1 << 5,
+  RVV_REQ_Zvbb = 1 << 6,
+  RVV_REQ_Zvbc = 1 << 7,
+  RVV_REQ_Zvkb = 1 << 8,
+  RVV_REQ_Zvkg = 1 << 9,
+  RVV_REQ_Zvkned = 1 << 10,
+  RVV_REQ_Zvknha = 1 << 11,
+  RVV_REQ_Zvknhb = 1 << 12,
+  RVV_REQ_Zvksed = 1 << 13,
+  RVV_REQ_Zvksh = 1 << 14,

  LLVM_MARK_AS_BITMASK_ENUM(RVV_REQ_Zvksh)
 };
--- a/clang/lib/Sema/SemaRISCVVectorLookup.cpp
+++ b/clang/lib/Sema/SemaRISCVVectorLookup.cpp
@ -205,6 +205,7 @@ void RISCVIntrinsicManagerImpl::ConstructRVVIntrinsics(
  static const std::pair<const char *, RVVRequire> FeatureCheckList[] = {
      {"64bit", RVV_REQ_RV64},
      {"xsfvcp", RVV_REQ_Xsfvcp},
+      {"xsfvfwmaccqqq", RVV_REQ_Xsfvfwmaccqqq},
      {"xsfvqmaccdod", RVV_REQ_Xsfvqmaccdod},
      {"xsfvqmaccqoq", RVV_REQ_Xsfvqmaccqoq},
      {"experimental-zvbb", RVV_REQ_Zvbb},
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c
@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
+//
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32mf2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
+//
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m1(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
+//
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
+//
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m8(vd, vs1, vs2, vl);
+}
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c
@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
+//
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
+//
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
+//
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
+//
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c
@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2_tu
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
+//
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2_tu(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32mf2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
+//
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m1_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
+//
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
+//
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m8_tu(vd, vs1, vs2, vl);
+}
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c
@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2_tu
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
+//
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2_tu(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
+//
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
+//
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
+//
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
+}
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@ -33,6 +33,7 @@
 // CHECK-NOT: __riscv_xcvsimd {{.*$}}
 // CHECK-NOT: __riscv_xsfcie {{.*$}}
 // CHECK-NOT: __riscv_xsfvcp {{.*$}}
+// CHECK-NOT: __riscv_xsfvfwmaccqqq {{.*$}}
 // CHECK-NOT: __riscv_xsfqmaccdod {{.*$}}
 // CHECK-NOT: __riscv_xsfvqmaccqoq {{.*$}}
 // CHECK-NOT: __riscv_xtheadba {{.*$}}
@ -325,6 +326,14 @@
 // RUN: -o - | FileCheck --check-prefix=CHECK-XSFVCP-EXT %s
 // CHECK-XSFVCP-EXT: __riscv_xsfvcp 1000000{{$}}

+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN: -march=rv32ixsfvfwmaccqqq -x c -E -dM %s \
+// RUN: -o - | FileCheck --check-prefix=CHECK-XSFVFWMACCQQQ-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN: -march=rv64ixsfvfwmaccqqq -x c -E -dM %s \
+// RUN: -o - | FileCheck --check-prefix=CHECK-XSFVFWMACCQQQ-EXT %s
+// CHECK-XSFVFWMACCQQQ-EXT: __riscv_xsfvfwmaccqqq 1000000{{$}}
+
 // RUN: %clang --target=riscv32-unknown-linux-gnu \
 // RUN: -march=rv32ixsfvqmaccdod -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-XSFVQMACCDOD-EXT %s
--- a/clang/test/Sema/rvv-required-features-invalid.c
+++ b/clang/test/Sema/rvv-required-features-invalid.c
@ -23,3 +23,7 @@ void test_xsfvqmaccdod() {
 void test_xsfvqmaccqoq() {
  __riscv_sf_vqmacc_4x8x4(); // expected-error {{call to undeclared function '__riscv_sf_vqmacc_4x8x4'}}
 }
+
+void test_xsfvfwmaccqqq() {
+  __riscv_sf_vfwmacc_4x4x4(); // expected-error {{call to undeclared function '__riscv_sf_vfwmacc_4x4x4'}}
+}
--- a/clang/test/Sema/rvv-required-features.c
+++ b/clang/test/Sema/rvv-required-features.c
@ -1,6 +1,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +xsfvcp \
-// RUN:     -target-feature +xsfvqmaccdod -target-feature +xsfvqmaccqoq %s -fsyntax-only -verify
+// RUN:     -target-feature +xsfvqmaccdod -target-feature +xsfvqmaccqoq \
+// RUN:     -target-feature +zvfh -target-feature +xsfvfwmaccqqq %s -fsyntax-only -verify

 // expected-no-diagnostics

@ -26,3 +27,7 @@ void test_xsfvqmaccdod(vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
 void test_xsfvqmaccqoq(vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
  __riscv_sf_vqmacc_4x8x4(vd, vs1, vs2, vl);
 }
+
+void test_xsfvfwmaccqqq(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+  __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@ -656,6 +656,7 @@ void RVVEmitter::createRVVIntrinsics(
                                  .Case("RV64", RVV_REQ_RV64)
                                  .Case("ZvfhminOrZvfh", RVV_REQ_ZvfhminOrZvfh)
                                  .Case("Xsfvcp", RVV_REQ_Xsfvcp)
+                                  .Case("Xsfvfwmaccqqq", RVV_REQ_Xsfvfwmaccqqq)
                                  .Case("Xsfvqmaccdod", RVV_REQ_Xsfvqmaccdod)
                                  .Case("Xsfvqmaccqoq", RVV_REQ_Xsfvqmaccqoq)
                                  .Case("Zvbb", RVV_REQ_Zvbb)
--- a/llvm/include/llvm/IR/IntrinsicsRISCVXsf.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCVXsf.td
@ -152,4 +152,7 @@ let TargetPrefix = "riscv" in {
  def int_riscv_sf_vqmacc_4x8x4   : RISCVSFCustomVMACC;
  def int_riscv_sf_vqmaccus_4x8x4 : RISCVSFCustomVMACC;
  def int_riscv_sf_vqmaccsu_4x8x4 : RISCVSFCustomVMACC;
+
+  // XSfvfwmaccqqq
+  def int_riscv_sf_vfwmacc_4x4x4 : RISCVSFCustomVMACC;
 } // TargetPrefix = "riscv"
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@ -73,6 +73,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
    {"xcvsimd", RISCVExtensionVersion{1, 0}},
    {"xsfcie", RISCVExtensionVersion{1, 0}},
    {"xsfvcp", RISCVExtensionVersion{1, 0}},
+    {"xsfvfwmaccqqq", RISCVExtensionVersion{1, 0}},
    {"xsfvqmaccdod", RISCVExtensionVersion{1, 0}},
    {"xsfvqmaccqoq", RISCVExtensionVersion{1, 0}},
    {"xtheadba", RISCVExtensionVersion{1, 0}},
@ -993,6 +994,7 @@ static const char *ImpliedExtsF[] = {"zicsr"};
 static const char *ImpliedExtsV[] = {"zvl128b", "zve64d"};
 static const char *ImpliedExtsXTHeadVdot[] = {"v"};
 static const char *ImpliedExtsXsfvcp[] = {"zve32x"};
+static const char *ImpliedExtsXsfvfwmaccqqq[] = {"zve32f"};
 static const char *ImpliedExtsXsfvqmaccdod[] = {"zve32x"};
 static const char *ImpliedExtsXsfvqmaccqoq[] = {"zve32x"};
 static const char *ImpliedExtsZacas[] = {"a"};
@ -1062,6 +1064,7 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = {
    {{"f"}, {ImpliedExtsF}},
    {{"v"}, {ImpliedExtsV}},
    {{"xsfvcp"}, {ImpliedExtsXsfvcp}},
+    {{"xsfvfwmaccqqq"}, {ImpliedExtsXsfvfwmaccqqq}},
    {{"xsfvqmaccdod"}, {ImpliedExtsXsfvqmaccdod}},
    {{"xsfvqmaccqoq"}, {ImpliedExtsXsfvqmaccqoq}},
    {{"xtheadvdot"}, {ImpliedExtsXTHeadVdot}},
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@ -550,6 +550,9 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
    TRY_TO_DECODE_FEATURE(
        RISCV::FeatureVendorXSfvqmaccqoq, DecoderTableXSfvqmaccqoq32,
        "SiFive Matrix Multiplication (4x8 and 8x4) Instruction opcode table");
+    TRY_TO_DECODE_FEATURE(
+        RISCV::FeatureVendorXSfvfwmaccqqq, DecoderTableXSfvfwmaccqqq32,
+        "SiFive Matrix Multiplication Instruction opcode table");
    TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfcie, DecoderTableXSfcie32,
                          "Sifive CIE custom opcode table");
    TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVbitmanip,
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@ -836,6 +836,14 @@ def HasVendorXSfvqmaccqoq : Predicate<"Subtarget->hasVendorXSfvqmaccqoq()">,
                         AssemblerPredicate<(all_of FeatureVendorXSfvqmaccqoq),
                         "'XSfvqmaccqoq' (SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4))">;

+def FeatureVendorXSfvfwmaccqqq
+    : SubtargetFeature<"xsfvfwmaccqqq", "HasVendorXSfvfwmaccqqq", "true",
+                       "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))",
+                       [FeatureStdExtZve32f]>;
+def HasVendorXSfvfwmaccqqq : Predicate<"Subtarget->hasVendorXSfvfwmaccqqq()">,
+                         AssemblerPredicate<(all_of FeatureVendorXSfvfwmaccqqq),
+                         "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))">;
+
 def FeatureVendorXCVbitmanip
    : SubtargetFeature<"xcvbitmanip", "HasVendorXCVbitmanip", "true",
                       "'XCVbitmanip' (CORE-V Bit Manipulation)">;
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@ -208,6 +208,10 @@ let Predicates = [HasVendorXSfvqmaccqoq], DecoderNamespace = "XSfvqmaccqoq" in {
  def VQMACCSU_4x8x4 : CustomSiFiveVMACC<0b111111, OPMVV, "sf.vqmaccsu.4x8x4">;
 }

+let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvfwmaccqqq" in {
+  def VFWMACC_4x4x4 : CustomSiFiveVMACC<0b111100, OPFVV, "sf.vfwmacc.4x4x4">;
+}
+
 class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class,
                  bit HasSideEffect = 1> :
      Pseudo<(outs),
@ -341,6 +345,11 @@ multiclass VPseudoSiFiveVQMACC<string Constraint = ""> {
    defm NAME : VPseudoSiFiveVMACC<m.MX, m.vrclass, m.vrclass, Constraint>;
 }

+multiclass VPseudoSiFiveVFWMACC<string Constraint = ""> {
+  foreach m = MxListFW in
+    defm NAME : VPseudoSiFiveVMACC<m.MX, m.wvrclass, m.vrclass, Constraint>;
+}
+
 let Predicates = [HasVendorXSfvcp] in {
  foreach m = MxList in {
    defm X : VPseudoVC_X<m, GPR>;
@ -383,6 +392,10 @@ let Predicates = [HasVendorXSfvqmaccqoq], DecoderNamespace = "XSfvqmaccqoq" in {
  defm VQMACCSU_4x8x4 : VPseudoSiFiveVQMACC;
 }

+let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvfwmaccqqq" in {
+  defm VFWMACC_4x4x4 : VPseudoSiFiveVFWMACC;
+}
+
 class VPatVC_OP4<string intrinsic_name,
                 string inst,
                 ValueType op2_type,
@ -537,6 +550,11 @@ defset list<VTypeInfoToWide> VQMACCInfoPairs = {
 multiclass VPatVQMACC<string intrinsic, string instruction, string kind>
    : VPatVMACC<intrinsic, instruction, kind, VQMACCInfoPairs, vint8m1_t>;

+
+multiclass VPatVFWMACC<string intrinsic, string instruction, string kind>
+    : VPatVMACC<intrinsic, instruction, kind, AllWidenableBFloatToFloatVectors,
+                vbfloat16m1_t>;
+
 let Predicates = [HasVendorXSfvcp] in {
  foreach vti = AllVectors in {
    defm : VPatVC_X<"x", "X", vti, XLenVT, GPR>;
@ -586,6 +604,10 @@ let Predicates = [HasVendorXSfvqmaccqoq] in {
  defm : VPatVQMACC<"vqmaccsu_4x8x4", "VQMACCSU", "4x8x4">;
 }

+let Predicates = [HasVendorXSfvfwmaccqqq] in {
+  defm : VPatVFWMACC<"vfwmacc_4x4x4", "VFWMACC", "4x4x4">;
+}
+
 let Predicates = [HasVendorXSfcie] in {
 let hasSideEffects = 1, mayLoad = 0, mayStore = 0, DecoderNamespace = "XSfcie" in {
 def SF_CFLUSH_D_L1 : RVInstI<0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1), "cflush.d.l1","$rs1">,
--- a/llvm/test/CodeGen/RISCV/rvv/sf_vfwmacc_4x4x4.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sf_vfwmacc_4x4x4.ll
@ -0,0 +1,195 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfmin,+xsfvfwmaccqqq \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfmin,+xsfvfwmaccqqq \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16(
+  <vscale x 1 x float>,
+  <vscale x 4 x bfloat>,
+  <vscale x 1 x bfloat>,
+  iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmacc_4x4x4_tu_f32mf2(<vscale x 1 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_tu_f32mf2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v9, v10
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16(
+    <vscale x 1 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 1 x bfloat> %2,
+    iXLen %3, iXLen 2)
+
+  ret <vscale x 1 x float> %a
+}
+
+define <vscale x 1 x float> @intrinsic_vfwmacc_4x4x4_ta_f32mf2(<vscale x 1 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_ta_f32mf2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v9, v10
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16(
+    <vscale x 1 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 1 x bfloat> %2,
+    iXLen %3, iXLen 3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16(
+  <vscale x 2 x float>,
+  <vscale x 4 x bfloat>,
+  <vscale x 2 x bfloat>,
+  iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmacc_4x4x4_tu_f32m1(<vscale x 2 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_tu_f32m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v9, v10
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16(
+    <vscale x 2 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 2 x bfloat> %2,
+    iXLen %3, iXLen 2)
+
+  ret <vscale x 2 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwmacc_4x4x4_ta_f32m1(<vscale x 2 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_ta_f32m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v9, v10
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16(
+    <vscale x 2 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 2 x bfloat> %2,
+    iXLen %3, iXLen 3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16(
+  <vscale x 4 x float>,
+  <vscale x 4 x bfloat>,
+  <vscale x 4 x bfloat>,
+  iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmacc_4x4x4_tu_f32m2(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_tu_f32m2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v10, v11
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16(
+    <vscale x 4 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 4 x bfloat> %2,
+    iXLen %3, iXLen 2)
+
+  ret <vscale x 4 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwmacc_4x4x4_ta_f32m2(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_ta_f32m2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v10, v11
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16(
+    <vscale x 4 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 4 x bfloat> %2,
+    iXLen %3, iXLen 3)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16(
+  <vscale x 8 x float>,
+  <vscale x 4 x bfloat>,
+  <vscale x 8 x bfloat>,
+  iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmacc_4x4x4_tu_f32m4(<vscale x 8 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_tu_f32m4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v12, v14
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16(
+    <vscale x 8 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 8 x bfloat> %2,
+    iXLen %3, iXLen 2)
+
+  ret <vscale x 8 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwmacc_4x4x4_ta_f32m4(<vscale x 8 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_ta_f32m4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v12, v14
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16(
+    <vscale x 8 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 8 x bfloat> %2,
+    iXLen %3, iXLen 3)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16(
+  <vscale x 16 x float>,
+  <vscale x 4 x bfloat>,
+  <vscale x 16 x bfloat>,
+  iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmacc_4x4x4_tu_f32m8(<vscale x 16 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_tu_f32m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v16, v20
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16(
+    <vscale x 16 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 16 x bfloat> %2,
+    iXLen %3, iXLen 2)
+
+  ret <vscale x 16 x float> %a
+}
+
+define <vscale x 16 x float> @intrinsic_vfwmacc_4x4x4_ta_f32m8(<vscale x 16 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_ta_f32m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v16, v20
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16(
+    <vscale x 16 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 16 x bfloat> %2,
+    iXLen %3, iXLen 3)
+
+  ret <vscale x 16 x float> %a
+}
--- a/llvm/test/MC/RISCV/rvv/xsfvfwmacc.s
+++ b/llvm/test/MC/RISCV/rvv/xsfvfwmacc.s
@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v,+xsfvfwmaccqqq %s \
+# RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+# RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v,+xsfvfwmaccqqq %s \
+# RUN:        | llvm-objdump -d --mattr=+v,+xsfvfwmaccqqq - \
+# RUN:        | FileCheck %s --check-prefix=CHECK-INST
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v,+xsfvfwmaccqqq %s \
+# RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sf.vfwmacc.4x4x4 v8, v4, v20
+# CHECK-INST: sf.vfwmacc.4x4x4 v8, v4, v20
+# CHECK-ENCODING: [0x5b,0x14,0x42,0xf3]
+# CHECK-ERROR: instruction requires the following: 'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))
+# CHECK-UNKNOWN: 5b 14 42 f3 <unknown>
--- a/llvm/unittests/Support/RISCVISAInfoTest.cpp
+++ b/llvm/unittests/Support/RISCVISAInfoTest.cpp
@ -714,6 +714,7 @@ R"(All available -march extensions for RISC-V
    xcvsimd             1.0
    xsfcie              1.0
    xsfvcp              1.0
+    xsfvfwmaccqqq       1.0
    xsfvqmaccdod        1.0
    xsfvqmaccqoq        1.0
    xtheadba            1.0