[flang] Add PowerPC vec_lxv, vec_lvsl, vec_lvsr, vec_xl, vec_xl_be and vec_xlds intrinsic

Differential Revision: https://reviews.llvm.org/D157920
2025-02-19 17:31:55 +00:00 · 2023-07-31 23:49:20 -04:00 · 2023-07-31 23:49:20 -04:00 · 2de024ef31
commit 2de024ef31
parent f0221fb1d7
5 changed files with 1629 additions and 7 deletions
--- a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
@ -32,6 +32,9 @@ enum class VecOp {
  Ld,
  Lde,
  Ldl,
+  Lvsl,
+  Lvsr,
+  Lxv,
  Lxvp,
  Mergeh,
  Mergel,
@ -57,6 +60,8 @@ enum class VecOp {
  Stxv,
  Stxvp,
  Sub,
+  Xl,
+  Xlbe,
  Xld2,
  Xlw4,
  Xor,
@ -275,10 +280,21 @@ struct PPCIntrinsicLibrary : IntrinsicLibrary {
  fir::ExtendedValue genVecPerm(mlir::Type resultType,
                                llvm::ArrayRef<fir::ExtendedValue> args);

+  fir::ExtendedValue genVecXlGrp(mlir::Type resultType,
+                                 llvm::ArrayRef<fir::ExtendedValue> args);
+
  template <VecOp>
  fir::ExtendedValue genVecLdCallGrp(mlir::Type resultType,
                                     llvm::ArrayRef<fir::ExtendedValue> args);

+  template <VecOp>
+  fir::ExtendedValue genVecLdNoCallGrp(mlir::Type resultType,
+                                       llvm::ArrayRef<fir::ExtendedValue> args);
+
+  template <VecOp>
+  fir::ExtendedValue genVecLvsGrp(mlir::Type resultType,
+                                  llvm::ArrayRef<fir::ExtendedValue> args);
+
  template <VecOp>
  fir::ExtendedValue genVecNmaddMsub(mlir::Type resultType,
                                     llvm::ArrayRef<fir::ExtendedValue> args);
@ -299,6 +315,9 @@ struct PPCIntrinsicLibrary : IntrinsicLibrary {
  template <VecOp vop>
  fir::ExtendedValue genVecSplat(mlir::Type resultType,
                                 llvm::ArrayRef<fir::ExtendedValue> args);
+
+  fir::ExtendedValue genVecXlds(mlir::Type resultType,
+                                llvm::ArrayRef<fir::ExtendedValue> args);
 };

 const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name);
--- a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
@ -589,6 +589,21 @@ static constexpr IntrinsicHandler ppcHandlers[]{
         &PI::genVecLdCallGrp<VecOp::Ldl>),
     {{{"arg1", asValue}, {"arg2", asAddr}}},
     /*isElemental=*/false},
+    {"__ppc_vec_lvsl",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLvsGrp<VecOp::Lvsl>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
+    {"__ppc_vec_lvsr",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLvsGrp<VecOp::Lvsr>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
+    {"__ppc_vec_lxv",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdNoCallGrp<VecOp::Lxv>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
    {"__ppc_vec_lxvp",
     static_cast<IntrinsicLibrary::ExtendedGenerator>(
         &PI::genVecLdCallGrp<VecOp::Lxvp>),
@ -713,11 +728,24 @@ static constexpr IntrinsicHandler ppcHandlers[]{
         &PI::genVecAddAndMulSubXor<VecOp::Sub>),
     {{{"arg1", asValue}, {"arg2", asValue}}},
     /*isElemental=*/true},
+    {"__ppc_vec_xl",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlGrp),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
+    {"__ppc_vec_xl_be",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdNoCallGrp<VecOp::Xlbe>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
    {"__ppc_vec_xld2_",
     static_cast<IntrinsicLibrary::ExtendedGenerator>(
         &PI::genVecLdCallGrp<VecOp::Xld2>),
     {{{"arg1", asValue}, {"arg2", asAddr}}},
     /*isElemental=*/false},
+    {"__ppc_vec_xlds",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlds),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
    {"__ppc_vec_xlw4_",
     static_cast<IntrinsicLibrary::ExtendedGenerator>(
         &PI::genVecLdCallGrp<VecOp::Xlw4>),
@ -1797,6 +1825,62 @@ static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder,
  return builder.create<mlir::vector::ShuffleOp>(loc, v, undefVec, mask);
 }

+static mlir::NamedAttribute getAlignmentAttr(fir::FirOpBuilder &builder,
+                                             const int val) {
+  auto i64ty{mlir::IntegerType::get(builder.getContext(), 64)};
+  auto alignAttr{mlir::IntegerAttr::get(i64ty, val)};
+  return builder.getNamedAttr("alignment", alignAttr);
+}
+
+fir::ExtendedValue
+PPCIntrinsicLibrary::genVecXlGrp(mlir::Type resultType,
+                                 llvm::ArrayRef<fir::ExtendedValue> args) {
+  VecTypeInfo vecTyInfo{getVecTypeFromFirType(resultType)};
+  switch (vecTyInfo.eleTy.getIntOrFloatBitWidth()) {
+  case 8:
+    // vec_xlb1
+    return genVecLdNoCallGrp<VecOp::Xl>(resultType, args);
+  case 16:
+    // vec_xlh8
+    return genVecLdNoCallGrp<VecOp::Xl>(resultType, args);
+  case 32:
+    // vec_xlw4
+    return genVecLdCallGrp<VecOp::Xlw4>(resultType, args);
+  case 64:
+    // vec_xld2
+    return genVecLdCallGrp<VecOp::Xld2>(resultType, args);
+  default:
+    llvm_unreachable("invalid kind");
+  }
+  llvm_unreachable("invalid vector operation for generator");
+}
+
+template <VecOp vop>
+fir::ExtendedValue PPCIntrinsicLibrary::genVecLdNoCallGrp(
+    mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 2);
+  auto arg0{getBase(args[0])};
+  auto arg1{getBase(args[1])};
+
+  auto vecTyInfo{getVecTypeFromFirType(resultType)};
+  auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
+  auto firTy{vecTyInfo.toFirVectorType()};
+
+  // Add the %val of arg0 to %addr of arg1
+  auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
+
+  const auto triple{fir::getTargetTriple(builder.getModule())};
+  // Need to get align 1.
+  auto result{builder.create<fir::LoadOp>(loc, mlirTy, addr,
+                                          getAlignmentAttr(builder, 1))};
+  if ((vop == VecOp::Xl && isBEVecElemOrderOnLE()) ||
+      (vop == VecOp::Xlbe && triple.isLittleEndian()))
+    return builder.createConvert(
+        loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len));
+
+  return builder.createConvert(loc, firTy, result);
+}
+
 // VEC_LD, VEC_LDE, VEC_LDL, VEC_LXVP, VEC_XLD2, VEC_XLW4
 template <VecOp vop>
 fir::ExtendedValue
@ -1897,6 +1981,58 @@ PPCIntrinsicLibrary::genVecLdCallGrp(mlir::Type resultType,
  return builder.createConvert(loc, firTy, result);
 }

+// VEC_LVSL, VEC_LVSR
+template <VecOp vop>
+fir::ExtendedValue
+PPCIntrinsicLibrary::genVecLvsGrp(mlir::Type resultType,
+                                  llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 2);
+  auto context{builder.getContext()};
+  auto arg0{getBase(args[0])};
+  auto arg1{getBase(args[1])};
+
+  auto vecTyInfo{getVecTypeFromFirType(resultType)};
+  auto mlirTy{vecTyInfo.toMlirVectorType(context)};
+  auto firTy{vecTyInfo.toFirVectorType()};
+
+  // Convert arg0 to i64 type if needed
+  auto i64ty{mlir::IntegerType::get(context, 64)};
+  if (arg0.getType() != i64ty)
+    arg0 = builder.create<fir::ConvertOp>(loc, i64ty, arg0);
+
+  // offset is modulo 16, so shift left 56 bits and then right 56 bits to clear
+  //   upper 56 bit while preserving sign
+  auto shiftVal{builder.createIntegerConstant(loc, i64ty, 56)};
+  auto offset{builder.create<mlir::arith::ShLIOp>(loc, arg0, shiftVal)};
+  auto offset2{builder.create<mlir::arith::ShRSIOp>(loc, offset, shiftVal)};
+
+  // Add the offsetArg to %addr of arg1
+  auto addr{addOffsetToAddress(builder, loc, arg1, offset2)};
+  llvm::SmallVector<mlir::Value, 4> parsedArgs{addr};
+
+  llvm::StringRef fname{};
+  switch (vop) {
+  case VecOp::Lvsl:
+    fname = "llvm.ppc.altivec.lvsl";
+    break;
+  case VecOp::Lvsr:
+    fname = "llvm.ppc.altivec.lvsr";
+    break;
+  default:
+    llvm_unreachable("invalid vector operation for generator");
+  }
+  auto funcType{mlir::FunctionType::get(context, {addr.getType()}, {mlirTy})};
+  auto funcOp{builder.addNamedFunction(loc, fname, funcType)};
+  auto result{
+      builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)};
+
+  if (isNativeVecElemOrderOnLE())
+    return builder.createConvert(
+        loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len));
+
+  return builder.createConvert(loc, firTy, result);
+}
+
 // VEC_NMADD, VEC_MSUB
 template <VecOp vop>
 fir::ExtendedValue
@ -2281,6 +2417,38 @@ PPCIntrinsicLibrary::genVecSplat(mlir::Type resultType,
  return builder.createConvert(loc, retTy, splatOp);
 }

+fir::ExtendedValue
+PPCIntrinsicLibrary::genVecXlds(mlir::Type resultType,
+                                llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 2);
+  auto arg0{getBase(args[0])};
+  auto arg1{getBase(args[1])};
+
+  // Prepare the return type in FIR.
+  auto vecTyInfo{getVecTypeFromFirType(resultType)};
+  auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
+  auto firTy{vecTyInfo.toFirVectorType()};
+
+  // Add the %val of arg0 to %addr of arg1
+  auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
+
+  auto i64Ty{mlir::IntegerType::get(builder.getContext(), 64)};
+  auto i64VecTy{mlir::VectorType::get(2, i64Ty)};
+  auto i64RefTy{builder.getRefType(i64Ty)};
+  auto addrConv{builder.create<fir::ConvertOp>(loc, i64RefTy, addr)};
+
+  auto addrVal{builder.create<fir::LoadOp>(loc, addrConv)};
+  auto splatRes{builder.create<mlir::vector::SplatOp>(loc, addrVal, i64VecTy)};
+
+  mlir::Value result{nullptr};
+  if (mlirTy != splatRes.getType()) {
+    result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, splatRes);
+  } else
+    result = splatRes;
+
+  return builder.createConvert(loc, firTy, result);
+}
+
 const char *getMmaIrIntrName(MMAOp mmaOp) {
  switch (mmaOp) {
  case MMAOp::AssembleAcc:
@ -2755,13 +2923,6 @@ void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef<fir::ExtendedValue> args) {
  builder.create<fir::CallOp>(loc, funcOp, biArgs);
 }

-static mlir::NamedAttribute getAlignmentAttr(fir::FirOpBuilder &builder,
-                                             const int val) {
-  auto i64ty{mlir::IntegerType::get(builder.getContext(), 64)};
-  auto alignAttr{mlir::IntegerAttr::get(i64ty, val)};
-  return builder.getNamedAttr("alignment", alignAttr);
-}
-
 // VEC_XST, VEC_XST_BE, VEC_STXV, VEC_XSTD2, VEC_XSTW4
 template <VecOp vop>
 void PPCIntrinsicLibrary::genVecXStore(
--- a/flang/module/__ppc_intrinsics.f90
+++ b/flang/module/__ppc_intrinsics.f90
@ -238,6 +238,24 @@ module __ppc_intrinsics
    !dir$ ignore_tkr(r) arg2; \
  end function ;

+! vector(u(1)) function f(i, i)
+#define FUNC_VU1I0I(KIND) \
+  vector(unsigned(1)) function func_vu1i0i##KIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    integer(KIND), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function ;
+
+! vector(u(1)) function f(i, r)
+#define FUNC_VU1I0R(KIND) \
+  vector(unsigned(1)) function func_vu1i0r##KIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    real(KIND), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function ;
+
 ! __vector_pair function f(i, vector(i))
 #define FUNC_VPI0VI(VKIND) \
  pure __vector_pair function func_vpi0vi##VKIND(arg1, arg2); \
@ -339,6 +357,8 @@ module __ppc_intrinsics
  FUNC_VPI0VU(1) FUNC_VPI0VU(2) FUNC_VPI0VU(4) FUNC_VPI0VU(8)
  FUNC_VPI0VR(4) FUNC_VPI0VR(8)
  FUNC_VPI0VP
+  FUNC_VU1I0I(1) FUNC_VU1I0I(2) FUNC_VU1I0I(4)
+  FUNC_VU1I0R(4)

 #undef FUNC_VEC_CONVERT_VRVIVR
 #undef FUNC_VEC_CONVERT_VUVIVU
@ -347,6 +367,8 @@ module __ppc_intrinsics
 #undef FUNC_VPI0VR
 #undef FUNC_VPI0VU
 #undef FUNC_VPI0VI
+#undef FUNC_VU1I0R
+#undef FUNC_VU1I0I
 #undef FUNC_VRI0VR
 #undef FUNC_VUI0VU
 #undef FUNC_VII0VI
@ -1154,6 +1176,40 @@ module __ppc_intrinsics
 #undef VU_VI_VI
 #undef VI_VI_VI

+!-------------------------------------------------------
+! vector(unsigned(1)) function(integer, i/r)
+!-------------------------------------------------------
+#define VU1_I0_I(NAME, KIND) __ppc_##NAME##_vu1i0i##KIND
+#define VU1_I0_R(NAME, KIND) __ppc_##NAME##_vu1i0r##KIND
+
+#define VEC_VU1_I0_I(NAME, KIND) \
+  procedure(func_vu1i0i##KIND) :: VU1_I0_I(NAME, KIND);
+#define VEC_VU1_I0_R(NAME, KIND) \
+  procedure(func_vu1i0r##KIND) :: VU1_I0_R(NAME, KIND);
+
+! vec_lvsl
+  VEC_VU1_I0_I(vec_lvsl,1) VEC_VU1_I0_I(vec_lvsl,2) VEC_VU1_I0_I(vec_lvsl,4)
+  VEC_VU1_I0_R(vec_lvsl,4)
+  interface vec_lvsl
+    procedure :: VU1_I0_I(vec_lvsl,1), VU1_I0_I(vec_lvsl,2), VU1_I0_I(vec_lvsl,4)
+    procedure :: VU1_I0_R(vec_lvsl,4)
+  end interface
+  public :: vec_lvsl
+
+! vec_lvsr
+  VEC_VU1_I0_I(vec_lvsr,1) VEC_VU1_I0_I(vec_lvsr,2) VEC_VU1_I0_I(vec_lvsr,4)
+  VEC_VU1_I0_R(vec_lvsr,4)
+  interface vec_lvsr
+    procedure :: VU1_I0_I(vec_lvsr,1), VU1_I0_I(vec_lvsr,2), VU1_I0_I(vec_lvsr,4)
+    procedure :: VU1_I0_R(vec_lvsr,4)
+  end interface
+  public :: vec_lvsr
+
+#undef VEC_VU1_I0_R
+#undef VEC_VU1_I0_I
+#undef VU1_I0_R
+#undef VU1_I0_I
+
 !-------------------------------------------------------
 ! vector function(integer, i/u/r/vector)
 !-------------------------------------------------------
@ -1214,6 +1270,51 @@ module __ppc_intrinsics
  end interface
  public :: vec_ldl

+! vec_lxv
+  VEC_VI_I0_VI(vec_lxv,1) VEC_VI_I0_VI(vec_lxv,2) VEC_VI_I0_VI(vec_lxv,4) VEC_VI_I0_VI(vec_lxv,8)
+  VEC_VU_I0_VU(vec_lxv,1) VEC_VU_I0_VU(vec_lxv,2) VEC_VU_I0_VU(vec_lxv,4) VEC_VU_I0_VU(vec_lxv,8)
+  VEC_VR_I0_VR(vec_lxv,4) VEC_VR_I0_VR(vec_lxv,8)
+  VEC_VI_I0_I(vec_lxv,1) VEC_VI_I0_I(vec_lxv,2) VEC_VI_I0_I(vec_lxv,4) VEC_VI_I0_I(vec_lxv,8)
+  VEC_VR_I0_R(vec_lxv,4) VEC_VR_I0_R(vec_lxv,8)
+  interface vec_lxv
+    procedure :: VI_I0_VI(vec_lxv,1), VI_I0_VI(vec_lxv,2), VI_I0_VI(vec_lxv,4), VI_I0_VI(vec_lxv,8)
+    procedure :: VU_I0_VU(vec_lxv,1), VU_I0_VU(vec_lxv,2), VU_I0_VU(vec_lxv,4), VU_I0_VU(vec_lxv,8)
+    procedure :: VR_I0_VR(vec_lxv,4), VR_I0_VR(vec_lxv,8)
+    procedure :: VI_I0_I(vec_lxv,1), VI_I0_I(vec_lxv,2), VI_I0_I(vec_lxv,4), VI_I0_I(vec_lxv,8)
+    procedure :: VR_I0_R(vec_lxv,4), VR_I0_R(vec_lxv,8)
+  end interface
+  public :: vec_lxv
+
+! vec_xl
+  VEC_VI_I0_VI(vec_xl,1) VEC_VI_I0_VI(vec_xl,2) VEC_VI_I0_VI(vec_xl,4) VEC_VI_I0_VI(vec_xl,8)
+  VEC_VU_I0_VU(vec_xl,1) VEC_VU_I0_VU(vec_xl,2) VEC_VU_I0_VU(vec_xl,4) VEC_VU_I0_VU(vec_xl,8)
+  VEC_VR_I0_VR(vec_xl,4) VEC_VR_I0_VR(vec_xl,8)
+  VEC_VI_I0_I(vec_xl,1) VEC_VI_I0_I(vec_xl,2) VEC_VI_I0_I(vec_xl,4) VEC_VI_I0_I(vec_xl,8)
+  VEC_VR_I0_R(vec_xl,4) VEC_VR_I0_R(vec_xl,8)
+  interface vec_xl
+    procedure :: VI_I0_VI(vec_xl,1), VI_I0_VI(vec_xl,2), VI_I0_VI(vec_xl,4), VI_I0_VI(vec_xl,8)
+    procedure :: VU_I0_VU(vec_xl,1), VU_I0_VU(vec_xl,2), VU_I0_VU(vec_xl,4), VU_I0_VU(vec_xl,8)
+    procedure :: VR_I0_VR(vec_xl,4), VR_I0_VR(vec_xl,8)
+    procedure :: VI_I0_I(vec_xl,1), VI_I0_I(vec_xl,2), VI_I0_I(vec_xl,4), VI_I0_I(vec_xl,8)
+    procedure :: VR_I0_R(vec_xl,4), VR_I0_R(vec_xl,8)
+  end interface
+  public :: vec_xl
+
+! vec_xl_be
+  VEC_VI_I0_VI(vec_xl_be,1) VEC_VI_I0_VI(vec_xl_be,2) VEC_VI_I0_VI(vec_xl_be,4) VEC_VI_I0_VI(vec_xl_be,8)
+  VEC_VU_I0_VU(vec_xl_be,1) VEC_VU_I0_VU(vec_xl_be,2) VEC_VU_I0_VU(vec_xl_be,4) VEC_VU_I0_VU(vec_xl_be,8)
+  VEC_VR_I0_VR(vec_xl_be,4) VEC_VR_I0_VR(vec_xl_be,8)
+  VEC_VI_I0_I(vec_xl_be,1) VEC_VI_I0_I(vec_xl_be,2) VEC_VI_I0_I(vec_xl_be,4) VEC_VI_I0_I(vec_xl_be,8)
+  VEC_VR_I0_R(vec_xl_be,4) VEC_VR_I0_R(vec_xl_be,8)
+  interface vec_xl_be
+    procedure :: VI_I0_VI(vec_xl_be,1), VI_I0_VI(vec_xl_be,2), VI_I0_VI(vec_xl_be,4), VI_I0_VI(vec_xl_be,8)
+    procedure :: VU_I0_VU(vec_xl_be,1), VU_I0_VU(vec_xl_be,2), VU_I0_VU(vec_xl_be,4), VU_I0_VU(vec_xl_be,8)
+    procedure :: VR_I0_VR(vec_xl_be,4), VR_I0_VR(vec_xl_be,8)
+    procedure :: VI_I0_I(vec_xl_be,1), VI_I0_I(vec_xl_be,2), VI_I0_I(vec_xl_be,4) , VI_I0_I(vec_xl_be,8)
+    procedure :: VR_I0_R(vec_xl_be,4), VR_I0_R(vec_xl_be,8)
+  end interface
+  public :: vec_xl_be
+
 ! vec_xld2
  VEC_VI_I0_VI(vec_xld2_,1) VEC_VI_I0_VI(vec_xld2_,2) VEC_VI_I0_VI(vec_xld2_,4) VEC_VI_I0_VI(vec_xld2_,8)
  VEC_VU_I0_VU(vec_xld2_,1) VEC_VU_I0_VU(vec_xld2_,2) VEC_VU_I0_VU(vec_xld2_,4) VEC_VU_I0_VU(vec_xld2_,8)
@ -1229,6 +1330,21 @@ module __ppc_intrinsics
  end interface
  public :: vec_xld2

+! vec_xlds
+  VEC_VI_I0_VI(vec_xlds,8)
+  VEC_VU_I0_VU(vec_xlds,8)
+  VEC_VR_I0_VR(vec_xlds,8)
+  VEC_VI_I0_I(vec_xlds,8)
+  VEC_VR_I0_R(vec_xlds,8)
+  interface vec_xlds
+    procedure :: VI_I0_VI(vec_xlds,8)
+    procedure :: VU_I0_VU(vec_xlds,8)
+    procedure :: VR_I0_VR(vec_xlds,8)
+    procedure :: VI_I0_I(vec_xlds,8)
+    procedure :: VR_I0_R(vec_xlds,8)
+  end interface
+  public :: vec_xlds
+
 ! vec_xlw4
  VEC_VI_I0_VI(vec_xlw4_,1) VEC_VI_I0_VI(vec_xlw4_,2)
  VEC_VU_I0_VU(vec_xlw4_,1) VEC_VU_I0_VU(vec_xlw4_,2) VEC_VU_I0_VU(vec_xlw4_,4)
--- a/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
@ -297,6 +297,606 @@ subroutine vec_lde_testf32a(arg1, arg2, res)
 ! LLVMIR: store <4 x float> %[[shflv]], ptr %2, align 16
 end subroutine vec_lde_testf32a

+!-------------------
+! vec_lvsl
+!-------------------
+
+! CHECK-LABEL: @vec_lvsl_testi8s
+subroutine vec_lvsl_testi8s(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i8) -> i64
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[iext:.*]] = sext i8 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsl_testi8s
+
+! CHECK-LABEL: @vec_lvsl_testi16a
+subroutine vec_lvsl_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i16) -> i64
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[iext:.*]] = sext i16 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsl_testi16a
+
+! CHECK-LABEL: @vec_lvsl_testi32a
+subroutine vec_lvsl_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(11, 3, 4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i32) -> i64
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<11x3x4xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsl_testi32a
+
+! CHECK-LABEL: @vec_lvsl_testf32a
+subroutine vec_lvsl_testf32a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(4) :: arg2(51)
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<51xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[arg1]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsl_testf32a
+
+!-------------------
+! vec_lvsr
+!-------------------
+
+! CHECK-LABEL: @vec_lvsr_testi8s
+subroutine vec_lvsr_testi8s(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i8) -> i64
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[iext:.*]] = sext i8 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsr_testi8s
+
+! CHECK-LABEL: @vec_lvsr_testi16a
+subroutine vec_lvsr_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(41)
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i16) -> i64
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<41xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[iext:.*]] = sext i16 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsr_testi16a
+
+! CHECK-LABEL: @vec_lvsr_testi32a
+subroutine vec_lvsr_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(23, 31, 47)
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i32) -> i64
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<23x31x47xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsr_testi32a
+
+! CHECK-LABEL: @vec_lvsr_testf32a
+subroutine vec_lvsr_testf32a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(4) :: arg2
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[arg1]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsr_testf32a
+
+!-------------------
+! vec_lxv
+!-------------------
+
+! CHECK-LABEL: @vec_lxv_testi8a
+subroutine vec_lxv_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2(4)
+  vector(integer(1)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[offset:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[offset]]
+! LLVMIR: %[[res:.*]] = load <16 x i8>, ptr %[[addr]], align 1
+! LLVMIR: store <16 x i8> %[[res]], ptr %2, align 16
+end subroutine vec_lxv_testi8a
+
+! CHECK-LABEL: @vec_lxv_testi16a
+subroutine vec_lxv_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(2, 4, 8)
+  vector(integer(2)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[offset:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[offset]]
+! LLVMIR: %[[res:.*]] = load <8 x i16>, ptr %[[addr]], align 1
+! LLVMIR: store <8 x i16> %[[res]], ptr %2, align 16
+end subroutine vec_lxv_testi16a
+
+! CHECK-LABEL: @vec_lxv_testi32a
+subroutine vec_lxv_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 4, 8)
+  vector(integer(4)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[offset:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[offset]]
+! LLVMIR: %[[res:.*]] = load <4 x i32>, ptr %[[addr]], align 1
+! LLVMIR: store <4 x i32> %[[res]], ptr %2, align 16
+end subroutine vec_lxv_testi32a
+
+! CHECK-LABEL: @vec_lxv_testf32a
+subroutine vec_lxv_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  real(4) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[offset:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[offset]]
+! LLVMIR: %[[res:.*]] = load <4 x float>, ptr %[[addr]], align 1
+! LLVMIR: store <4 x float> %[[res]], ptr %2, align 16
+end subroutine vec_lxv_testf32a
+
+! CHECK-LABEL: @vec_lxv_testf64a
+subroutine vec_lxv_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(8) :: arg2(4)
+  vector(real(8)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[offset:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[offset]]
+! LLVMIR: %[[res:.*]] = load <2 x double>, ptr %[[addr]], align 1
+! LLVMIR: store <2 x double> %[[res]], ptr %2, align 16
+end subroutine vec_lxv_testf64a
+
+!-------------------
+! vec_xl
+!-------------------
+
+! CHECK-LABEL: @vec_xl_testi8a
+subroutine vec_xl_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2
+  vector(integer(1)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+  
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1
+! LLVMIR: %[[shflv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[shflv]], ptr %2, align 16
+end subroutine vec_xl_testi8a
+
+! CHECK-LABEL: @vec_xl_testi16a
+subroutine vec_xl_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(2, 8)
+  vector(integer(2)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<8xi16>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load <8 x i16>, ptr %[[addr]], align 1
+! LLVMIR: %[[shflv:.*]] = shufflevector <8 x i16> %[[ld]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <8 x i16> %[[shflv]], ptr %2, align 16
+end subroutine vec_xl_testi16a
+
+! CHECK-LABEL: @vec_xl_testi32a
+subroutine vec_xl_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 4, 8)
+  vector(integer(4)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[ld]], ptr %2, align 16
+end subroutine vec_xl_testi32a
+
+! CHECK-LABEL: @vec_xl_testi64a
+subroutine vec_xl_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  integer(8) :: arg2(2, 4, 1)
+  vector(integer(8)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x1xi64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[bc]], ptr %2, align 16
+end subroutine vec_xl_testi64a
+
+! CHECK-LABEL: @vec_xl_testf32a
+subroutine vec_xl_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  real(4) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_xl_testf32a
+
+! CHECK-LABEL: @vec_xl_testf64a
+subroutine vec_xl_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(8) :: arg2(2)
+  vector(real(8)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2xf64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: store <2 x double> %[[ld]], ptr %2, align 16
+end subroutine vec_xl_testf64a
+
+!-------------------
+! vec_xl_be
+!-------------------
+
+! CHECK-LABEL: @vec_xl_be_testi8a
+subroutine vec_xl_be_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2(2, 4, 8)
+  vector(integer(1)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+  
+! LLVMIR: %4 = load i8, ptr %0, align 1
+! LLVMIR: %5 = getelementptr i8, ptr %1, i8 %4
+! LLVMIR: %6 = load <16 x i8>, ptr %5, align 1
+! LLVMIR: %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %7, ptr %2, align 16
+end subroutine vec_xl_be_testi8a
+
+! CHECK-LABEL: @vec_xl_be_testi16a
+subroutine vec_xl_be_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(8,2)
+  vector(integer(2)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<8x2xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<8xi16>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %4 = load i16, ptr %0, align 2
+! LLVMIR: %5 = getelementptr i8, ptr %1, i16 %4
+! LLVMIR: %6 = load <8 x i16>, ptr %5, align 1
+! LLVMIR: %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <8 x i16> %7, ptr %2, align 16
+end subroutine vec_xl_be_testi16a
+
+! CHECK-LABEL: @vec_xl_be_testi32a
+subroutine vec_xl_be_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 4)
+  vector(integer(4)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %4 = load i32, ptr %0, align 4
+! LLVMIR: %5 = getelementptr i8, ptr %1, i32 %4
+! LLVMIR: %6 = load <4 x i32>, ptr %5, align 1
+! LLVMIR: %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x i32> %7, ptr %2, align 16
+end subroutine vec_xl_be_testi32a
+
+! CHECK-LABEL: @vec_xl_be_testi64a
+subroutine vec_xl_be_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  integer(8) :: arg2(2, 4, 8)
+  vector(integer(8)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<2xi64>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [1, 0] : vector<2xi64>, vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %4 = load i64, ptr %0, align 8
+! LLVMIR: %5 = getelementptr i8, ptr %1, i64 %4
+! LLVMIR: %6 = load <2 x i64>, ptr %5, align 1
+! LLVMIR: %7 = shufflevector <2 x i64> %6, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+! LLVMIR: store <2 x i64> %7, ptr %2, align 16
+end subroutine vec_xl_be_testi64a
+
+! CHECK-LABEL: @vec_xl_be_testf32a
+subroutine vec_xl_be_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  real(4) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %4 = load i16, ptr %0, align 2
+! LLVMIR: %5 = getelementptr i8, ptr %1, i16 %4
+! LLVMIR: %6 = load <4 x float>, ptr %5, align 1
+! LLVMIR: %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x float> %7, ptr %2, align 16
+end subroutine vec_xl_be_testf32a
+
+! CHECK-LABEL: @vec_xl_be_testf64a
+subroutine vec_xl_be_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(8) :: arg2(4)
+  vector(real(8)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<2xf64>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [1, 0] : vector<2xf64>, vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %4 = load i64, ptr %0, align 8
+! LLVMIR: %5 = getelementptr i8, ptr %1, i64 %4
+! LLVMIR: %6 = load <2 x double>, ptr %5, align 1
+! LLVMIR: %7 = shufflevector <2 x double> %6, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+! LLVMIR: store <2 x double> %7, ptr %2, align 16
+end subroutine vec_xl_be_testf64a
+
 !-------------------
 ! vec_xld2
 !-------------------
@ -520,3 +1120,57 @@ subroutine vec_xlw4_testf32a(arg1, arg2, res)
 ! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
 ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
 end subroutine vec_xlw4_testf32a
+
+!-------------------
+! vec_xlds
+!-------------------
+
+! CHECK-LABEL: @vec_xlds_testi64a
+subroutine vec_xlds_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(integer(8)) :: arg2(4)
+  vector(integer(8)) :: res
+  res = vec_xlds(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[aryref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:i64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[aryref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<i64>
+! FIR: %[[val:.*]] = fir.load %[[ref]] : !fir.ref<i64>
+! FIR: %[[vsplt:.*]] = vector.splat %[[val]] : vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[vsplt]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load i64, ptr %[[addr]], align 8
+! LLVMIR: %[[insrt:.*]] = insertelement <2 x i64> undef, i64 %[[ld]], i32 0
+! LLVMIR: %[[shflv:.*]] = shufflevector <2 x i64> %[[insrt]], <2 x i64> undef, <2 x i32> zeroinitializer
+! LLVMIR: store <2 x i64> %[[shflv]], ptr %2, align 16
+end subroutine vec_xlds_testi64a
+
+! CHECK-LABEL: @vec_xlds_testf64a
+subroutine vec_xlds_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(8)) :: arg2(4)
+  vector(real(8)) :: res
+  res = vec_xlds(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[aryref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:f64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[aryref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<i64>
+! FIR: %[[val:.*]] = fir.load %[[ref]] : !fir.ref<i64>
+! FIR: %[[vsplt:.*]] = vector.splat %[[val]] : vector<2xi64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[vsplt]] : vector<2xi64> to vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load i64, ptr %[[addr]], align 8
+! LLVMIR: %[[insrt:.*]] = insertelement <2 x i64> undef, i64 %[[ld]], i32 0
+! LLVMIR: %[[shflv:.*]] = shufflevector <2 x i64> %[[insrt]], <2 x i64> undef, <2 x i32> zeroinitializer
+! LLVMIR: %[[bc:.*]] = bitcast <2 x i64> %[[shflv]] to <2 x double>
+! LLVMIR: store <2 x double> %[[bc]], ptr %2, align 16
+end subroutine vec_xlds_testf64a
--- a/flang/test/Lower/PowerPC/ppc-vec-load.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-load.f90
@ -430,6 +430,354 @@ subroutine vec_ldl_testi32s(arg1, arg2, res)
 ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
 end subroutine vec_ldl_testi32s

+!----------------------
+! vec_lvsl
+!----------------------
+
+! CHECK-LABEL: @vec_lvsl_testi8s
+subroutine vec_lvsl_testi8s(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i8) -> i64
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[ext:.*]] = sext i8 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsl_testi8s
+
+! CHECK-LABEL: @vec_lvsl_testi16a
+subroutine vec_lvsl_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i16) -> i64
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[ext:.*]] = sext i16 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR:  store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsl_testi16a
+
+! CHECK-LABEL: @vec_lvsl_testi32a
+subroutine vec_lvsl_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 3, 4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i32) -> i64
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x3x4xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[ext:.*]] = sext i32 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR:  store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsl_testi32a
+
+! CHECK-LABEL: @vec_lvsl_testf32a
+subroutine vec_lvsl_testf32a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(4) :: arg2(4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[arg1]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR:  store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsl_testf32a
+
+!----------------------
+! vec_lvsr
+!----------------------
+
+! CHECK-LABEL: @vec_lvsr_testi8s
+subroutine vec_lvsr_testi8s(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i8) -> i64
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[ext:.*]] = sext i8 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[ld:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[addr:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[ld]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[addr]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsr_testi8s
+
+! CHECK-LABEL: @vec_lvsr_testi16a
+subroutine vec_lvsr_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i16) -> i64
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[ext:.*]] = sext i16 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[ld:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[addr:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[ld]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[addr]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsr_testi16a
+
+! CHECK-LABEL: @vec_lvsr_testi32a
+subroutine vec_lvsr_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 3, 4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i32) -> i64
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x3x4xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[ext:.*]] = sext i32 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[ld:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[addr:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[ld]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[addr]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsr_testi32a
+
+! CHECK-LABEL: @vec_lvsr_testf32a
+subroutine vec_lvsr_testf32a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(4) :: arg2(4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[arg1]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[addr]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsr_testf32a
+
+!----------------------
+! vec_lxv
+!----------------------
+
+! CHECK-LABEL: @vec_lxv_testi8a
+subroutine vec_lxv_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2(4)
+  vector(integer(1)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR_P9: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR_P9: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1
+! LLVMIR_P9: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lxv_testi8a
+
+! CHECK-LABEL: @vec_lxv_testi16a
+subroutine vec_lxv_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(2, 4, 8)
+  vector(integer(2)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR_P9: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR_P9: %[[ld:.*]] = load <8 x i16>, ptr %[[addr]], align 1
+! LLVMIR_P9: store <8 x i16> %[[ld]], ptr %2, align 16
+end subroutine vec_lxv_testi16a
+
+! CHECK-LABEL: @vec_lxv_testi32a
+subroutine vec_lxv_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 4, 8)
+  vector(integer(4)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR_P9: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR_P9: %[[ld:.*]] = load <4 x i32>, ptr %[[addr]], align 1
+! LLVMIR_P9: store <4 x i32> %[[ld]], ptr %2, align 16
+end subroutine vec_lxv_testi32a
+
+! CHECK-LABEL: @vec_lxv_testf32a
+subroutine vec_lxv_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  real(4) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR_P9: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR_P9: %[[ld:.*]] = load <4 x float>, ptr %[[addr]], align 1
+! LLVMIR_P9: store <4 x float> %[[ld]], ptr %2, align 16
+end subroutine vec_lxv_testf32a
+
+! CHECK-LABEL: @vec_lxv_testf64a
+subroutine vec_lxv_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(8) :: arg2(4)
+  vector(real(8)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR_P9: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR_P9: %[[ld:.*]] = load <2 x double>, ptr %[[addr]], align 1
+! LLVMIR_P9: store <2 x double> %[[ld]], ptr %2, align 16
+end subroutine vec_lxv_testf64a
+
 !----------------------
 ! vec_xld2
 !----------------------
@ -564,6 +912,330 @@ subroutine vec_xld2_testf64a(arg1, arg2, res)
 ! LLVMIR: store <2 x double> %[[ld]], ptr %2, align 16
 end subroutine vec_xld2_testf64a

+!----------------------
+! vec_xl
+!----------------------
+
+! CHECK-LABEL: @vec_xl_testi8a
+subroutine vec_xl_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2(4)
+  vector(integer(1)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_xl_testi8a
+
+! CHECK-LABEL: @vec_xl_testi16a
+subroutine vec_xl_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(2, 4, 8)
+  vector(integer(2)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load <8 x i16>, ptr %[[addr]], align 1
+! LLVMIR: store <8 x i16> %[[ld]], ptr %2, align 16
+end subroutine vec_xl_testi16a
+
+! CHECK-LABEL: @vec_xl_testi32a
+subroutine vec_xl_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 4, 8)
+  vector(integer(4)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[ld]], ptr %2, align 16
+end subroutine vec_xl_testi32a
+
+! CHECK-LABEL: @vec_xl_testi64a
+subroutine vec_xl_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  integer(8) :: arg2(2, 4, 8)
+  vector(integer(8)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[bc]], ptr %2, align 16
+end subroutine vec_xl_testi64a
+
+! CHECK-LABEL: @vec_xl_testf32a
+subroutine vec_xl_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  real(4) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_xl_testf32a
+
+! CHECK-LABEL: @vec_xl_testf64a
+subroutine vec_xl_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(8) :: arg2
+  vector(real(8)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<f64>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: store <2 x double> %[[ld]], ptr %2, align 16
+end subroutine vec_xl_testf64a
+
+!----------------------
+! vec_xlds
+!----------------------
+
+! CHECK-LABEL: @vec_xlds_testi64a
+subroutine vec_xlds_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(integer(8)) :: arg2(4)
+  vector(integer(8)) :: res
+  res = vec_xlds(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:i64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[cnv:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<i64>
+! FIR: %[[ld:.*]] = fir.load %[[cnv]] : !fir.ref<i64>
+! FIR: %[[vsplt:.*]] = vector.splat %[[ld]] : vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[vsplt]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load i64, ptr %[[addr]], align 8
+! LLVMIR: %[[insrt:.*]] = insertelement <2 x i64> undef, i64 %[[ld]], i32 0
+! LLVMIR: %[[shfl:.*]] = shufflevector <2 x i64> %[[insrt]], <2 x i64> undef, <2 x i32> zeroinitializer
+! LLVMIR: store <2 x i64> %[[shfl]], ptr %2, align 16
+end subroutine vec_xlds_testi64a
+
+! CHECK-LABEL: @vec_xlds_testf64a
+subroutine vec_xlds_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(8)) :: arg2(4)
+  vector(real(8)) :: res
+  res = vec_xlds(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:f64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[cnv:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<i64>
+! FIR: %[[ld:.*]] = fir.load %[[cnv]] : !fir.ref<i64>
+! FIR: %[[vsplt:.*]] = vector.splat %[[ld]] : vector<2xi64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[vsplt]] : vector<2xi64> to vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load i64, ptr %[[addr]], align 8
+! LLVMIR: %[[insrt:.*]] = insertelement <2 x i64> undef, i64 %[[ld]], i32 0
+! LLVMIR: %[[shfl:.*]] = shufflevector <2 x i64> %[[insrt]], <2 x i64> undef, <2 x i32> zeroinitializer
+! LLVMIR: %[[bc:.*]] = bitcast <2 x i64> %[[shfl]] to <2 x double>
+! LLVMIR: store <2 x double> %[[bc]], ptr %2, align 16
+end subroutine vec_xlds_testf64a
+
+!----------------------
+! vec_xl_be
+!----------------------
+
+! CHECK-LABEL: @vec_xl_be_testi8a
+subroutine vec_xl_be_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2(2, 4, 8)
+  vector(integer(1)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[uv:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1
+! LLVMIR: %[[shff:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[shff]], ptr %2, align 16
+end subroutine vec_xl_be_testi8a
+
+! CHECK-LABEL: @vec_xl_be_testi16a
+subroutine vec_xl_be_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(2, 4, 8)
+  vector(integer(2)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[uv:.*]] = fir.undefined vector<8xi16>
+! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load <8 x i16>, ptr %[[addr]], align 1
+! LLVMIR: %[[shff:.*]] = shufflevector <8 x i16> %[[ld]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <8 x i16> %[[shff]], ptr %2, align 16
+end subroutine vec_xl_be_testi16a
+
+! CHECK-LABEL: @vec_xl_be_testi32a
+subroutine vec_xl_be_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 4, 8)
+  vector(integer(4)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[uv:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR:  %[[ld:.*]] = load <4 x i32>, ptr %[[addr]], align 1
+! LLVMIR:  %[[shff:.*]] = shufflevector <4 x i32> %[[ld]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR:  store <4 x i32> %[[shff]], ptr %2, align 16
+end subroutine vec_xl_be_testi32a
+
+! CHECK-LABEL: @vec_xl_be_testi64a
+subroutine vec_xl_be_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  integer(8) :: arg2(2, 4, 8)
+  vector(integer(8)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[uv:.*]] = fir.undefined vector<2xi64>
+! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [1, 0] : vector<2xi64>, vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR:  %[[ld:.*]] = load <2 x i64>, ptr %[[addr]], align 1
+! LLVMIR:  %[[shff:.*]] = shufflevector <2 x i64> %[[ld]], <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+! LLVMIR:  store <2 x i64> %[[shff]], ptr %2, align 16
+end subroutine vec_xl_be_testi64a
+
+! CHECK-LABEL: @vec_xl_be_testf32a
+subroutine vec_xl_be_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  real(4) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[uv:.*]] = fir.undefined vector<4xf32>
+! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR:  %[[ld:.*]] = load <4 x float>, ptr %[[addr]], align 1
+! LLVMIR:  %[[shff:.*]] = shufflevector <4 x float> %[[ld]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR:  store <4 x float> %[[shff]], ptr %2, align 16
+end subroutine vec_xl_be_testf32a
+
+! CHECK-LABEL: @vec_xl_be_testf64a
+subroutine vec_xl_be_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(8) :: arg2(7)
+  vector(real(8)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<7xf64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[uv:.*]] = fir.undefined vector<2xf64>
+! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [1, 0] : vector<2xf64>, vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR:  %[[ld:.*]] = load <2 x double>, ptr %[[addr]], align 1
+! LLVMIR:  %[[shff:.*]] = shufflevector <2 x double> %[[ld]], <2 x double> undef, <2 x i32> <i32 1, i32 0>
+! LLVMIR:  store <2 x double> %[[shff]], ptr %2, align 16
+end subroutine vec_xl_be_testf64a
+
 !----------------------
 ! vec_xlw4
 !----------------------