[LoongArch] Define ual feature and override allowsMisalignedMemoryAccesses

Some CPUs do not allow memory accesses to be unaligned, e.g. 2k1000la
who uses the la264 core on which misaligned access will trigger an
exception.

In this patch, a backend feature called `ual` is defined to decribe
whether the CPU supports unaligned memroy accesses. And this feature
can be toggled by clang options `-m[no-]unaligned-access` or the
aliases `-m[no-]strict-align`. When this feature is on,
`allowsMisalignedMemoryAccesses` sets the speed number to 1 and returns
true that allows the codegen to generate unaligned memory access insns.

Clang options `-m[no-]unaligned-access` are moved from `m_arm_Features_Group`
to `m_Group` because now more than one targets use them. And a test
is added to show that they remain unused on a target that does not
support them. In addition, to keep compatible with gcc, a new alias
`-mno-strict-align` is added which is equal to `-munaligned-access`.

The feature name `ual` is consistent with linux kernel [1] and the
output of `lscpu` or `/proc/cpuinfo` [2].

There is an `LLT` variant of `allowsMisalignedMemoryAccesses`, but
seems that curently it is only used in GlobalISel which LoongArch
doesn't support yet. So this variant is not implemented in this patch.

[1]: https://github.com/torvalds/linux/blob/master/arch/loongarch/include/asm/cpu.h#L77
[2]: https://github.com/torvalds/linux/blob/master/arch/loongarch/kernel/proc.c#L75

Reviewed By: xen0n

Differential Revision: https://reviews.llvm.org/D149946
This commit is contained in:
Weining Lu 2023-06-07 11:20:30 +08:00
parent 8dd28c5682
commit 47601815ec
21 changed files with 314 additions and 82 deletions

View File

@ -3697,12 +3697,14 @@ def mrvv_vector_bits_EQ : Joined<["-"], "mrvv-vector-bits=">, Group<m_Group>,
"to use the value implied by -march/-mcpu. Value will be reflected "
"in __riscv_v_fixed_vlen preprocessor define (RISC-V only)">;
def munaligned_access : Flag<["-"], "munaligned-access">, Group<m_arm_Features_Group>,
HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64 only)">;
def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group<m_arm_Features_Group>,
HelpText<"Force all memory accesses to be aligned (AArch32/AArch64 only)">;
def munaligned_access : Flag<["-"], "munaligned-access">, Group<m_Group>,
HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64/LoongArch only)">;
def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group<m_Group>,
HelpText<"Force all memory accesses to be aligned (AArch32/AArch64/LoongArch only)">;
def mstrict_align : Flag<["-"], "mstrict-align">, Alias<mno_unaligned_access>, Flags<[CC1Option,HelpHidden]>,
HelpText<"Force all memory accesses to be aligned (same as mno-unaligned-access)">;
def mno_strict_align : Flag<["-"], "mno-strict-align">, Alias<munaligned_access>, Flags<[CC1Option,HelpHidden]>,
HelpText<"Allow memory accesses to be unaligned (same as munaligned-access)">;
def mno_thumb : Flag<["-"], "mno-thumb">, Group<m_arm_Features_Group>;
def mrestrict_it: Flag<["-"], "mrestrict-it">, Group<m_arm_Features_Group>,
HelpText<"Disallow generation of complex IT blocks.">;

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "LoongArch.h"
#include "ToolChains/CommonArgs.h"
#include "clang/Basic/DiagnosticDriver.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
@ -133,4 +134,9 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D,
D.Diag(diag::err_drv_loongarch_invalid_mfpu_EQ) << FPU;
}
}
// Select the `ual` feature determined by -m[no-]unaligned-access
// or the alias -m[no-]strict-align.
AddTargetFeature(Args, Features, options::OPT_munaligned_access,
options::OPT_mno_unaligned_access, "ual");
}

View File

@ -2,7 +2,7 @@
// RUN: %clang --target=loongarch64 -S -emit-llvm %s -o - | FileCheck %s --check-prefix=LA64
// LA32: "target-features"="+32bit"
// LA64: "target-features"="+64bit,+d,+f"
// LA64: "target-features"="+64bit,+d,+f,+ual"
int foo(void) {
return 3;

View File

@ -8,17 +8,17 @@
// RUN: FileCheck %s --check-prefix=IR-LA464
// CC1-LOONGARCH64-NOT: "-target-feature"
// CC1-LOONGARCH64: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d"
// CC1-LOONGARCH64: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+ual"
// CC1-LOONGARCH64-NOT: "-target-feature"
// CC1-LOONGARCH64: "-target-abi" "lp64d"
// CC1-LA464-NOT: "-target-feature"
// CC1-LA464: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx"
// CC1-LA464: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual"
// CC1-LA464-NOT: "-target-feature"
// CC1-LA464: "-target-abi" "lp64d"
// IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f"
// IR-LA464: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx"
// IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f,+ual"
// IR-LA464: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual"
int foo(void) {
return 3;

View File

@ -8,12 +8,10 @@
// WARN: warning: argument unused during compilation: '-mfpu=0'
// WARN: warning: argument unused during compilation: '-mabi=lp64s'
// CC1-NOT: "-target-feature"
// CC1: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d"
// CC1-NOT: "-target-feature"
// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "+d"
// CC1: "-target-abi" "lp64d"
// IR: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f"
// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+d,{{(.*,)?}}+f{{(,.*)?}}"
int foo(void) {
return 3;

View File

@ -16,24 +16,18 @@
// RUN: %clang --target=loongarch64 -mfpu=none -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-FPU0
// CC1-FPU64-NOT: "-target-feature"
// CC1-FPU64: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d"
// CC1-FPU64-NOT: "-target-feature"
// CC1-FPU64: "-target-feature" "+f"{{.*}} "-target-feature" "+d"
// CC1-FPU64: "-target-abi" "lp64d"
// CC1-FPU32-NOT: "-target-feature"
// CC1-FPU32: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "-d"
// CC1-FPU32-NOT: "-target-feature"
// CC1-FPU32: "-target-feature" "+f"{{.*}} "-target-feature" "-d"
// CC1-FPU32: "-target-abi" "lp64f"
// CC1-FPU0-NOT: "-target-feature"
// CC1-FPU0: "-target-feature" "+64bit" "-target-feature" "-f" "-target-feature" "-d"
// CC1-FPU0-NOT: "-target-feature"
// CC1-FPU0: "-target-feature" "-f"{{.*}} "-target-feature" "-d"
// CC1-FPU0: "-target-abi" "lp64s"
// IR-FPU64: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f"
// IR-FPU32: attributes #[[#]] ={{.*}}"target-features"="+64bit,+f,-d"
// IR-FPU0: attributes #[[#]] ={{.*}}"target-features"="+64bit,-d,-f"
// IR-FPU64: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+d,{{(.*,)?}}+f{{(,.*)?}}"
// IR-FPU32: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d{{(,.*)?}}"
// IR-FPU0: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f{{(,.*)?}}"
int foo(void) {
return 3;

View File

@ -8,12 +8,10 @@
// WARN: warning: argument unused during compilation: '-mfpu=0'
// WARN: warning: argument unused during compilation: '-mabi=lp64s'
// CC1-NOT: "-target-feature"
// CC1: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "-d"
// CC1-NOT: "-target-feature"
// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "-d"
// CC1: "-target-abi" "lp64f"
// IR: attributes #[[#]] ={{.*}}"target-features"="+64bit,+f,-d"
// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d"
int foo(void) {
return 3;

View File

@ -8,12 +8,10 @@
// WARN: warning: argument unused during compilation: '-mfpu=64'
// WARN: warning: argument unused during compilation: '-mabi=lp64d'
// CC1-NOT: "-target-feature"
// CC1: "-target-feature" "+64bit" "-target-feature" "-f" "-target-feature" "-d"
// CC1-NOT: "-target-feature"
// CC1: "-target-feature" "-f"{{.*}} "-target-feature" "-d"
// CC1: "-target-abi" "lp64s"
// IR: attributes #[[#]] ={{.*}}"target-features"="+64bit,-d,-f"
// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f{{(,.*)?}}"
int foo(void) {
return 3;

View File

@ -0,0 +1,61 @@
/// Test -m[no-]unaligned-access and -m[no-]strict-align options.
// RUN: %clang --target=loongarch64 -munaligned-access -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED
// RUN: %clang --target=loongarch64 -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED
// RUN: %clang --target=loongarch64 -mstrict-align -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED
// RUN: %clang --target=loongarch64 -mno-strict-align -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED
// RUN: %clang --target=loongarch64 -munaligned-access -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED
// RUN: %clang --target=loongarch64 -mno-unaligned-access -munaligned-access -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED
// RUN: %clang --target=loongarch64 -mstrict-align -mno-strict-align -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED
// RUN: %clang --target=loongarch64 -mno-strict-align -mstrict-align -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED
// RUN: %clang --target=loongarch64 -munaligned-access -mstrict-align -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED
// RUN: %clang --target=loongarch64 -mstrict-align -munaligned-access -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED
// RUN: %clang --target=loongarch64 -mno-unaligned-access -mno-strict-align -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED
// RUN: %clang --target=loongarch64 -mno-strict-align -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED
// RUN: %clang --target=loongarch64 -munaligned-access -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-UNALIGNED
// RUN: %clang --target=loongarch64 -mno-unaligned-access -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED
// RUN: %clang --target=loongarch64 -mstrict-align -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED
// RUN: %clang --target=loongarch64 -mno-strict-align -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-UNALIGNED
// RUN: %clang --target=loongarch64 -munaligned-access -mno-unaligned-access -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED
// RUN: %clang --target=loongarch64 -mno-unaligned-access -munaligned-access -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-UNALIGNED
// RUN: %clang --target=loongarch64 -mstrict-align -mno-strict-align -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-UNALIGNED
// RUN: %clang --target=loongarch64 -mno-strict-align -mstrict-align -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED
// RUN: %clang --target=loongarch64 -munaligned-access -mstrict-align -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED
// RUN: %clang --target=loongarch64 -mstrict-align -munaligned-access -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-UNALIGNED
// RUN: %clang --target=loongarch64 -mno-unaligned-access -mno-strict-align -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-UNALIGNED
// RUN: %clang --target=loongarch64 -mno-strict-align -mno-unaligned-access -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED
// CC1-UNALIGNED: "-target-feature" "+ual"
// CC1-NO-UNALIGNED: "-target-feature" "-ual"
// IR-UNALIGNED: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+ual{{(,.*)?}}"
// IR-NO-UNALIGNED: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-ual{{(,.*)?}}"
int foo(void) {
return 3;
}

View File

@ -0,0 +1,8 @@
/// Check -m[no-]unaligned-access and -m[no-]strict-align are warned unused on a target that does not support them.
// RUN: %clang --target=x86_64 -munaligned-access -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=unaligned-access
// RUN: %clang --target=x86_64 -mno-unaligned-access -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=no-unaligned-access
// RUN: %clang --target=x86_64 -mstrict-align -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=strict-align
// RUN: %clang --target=x86_64 -mno-strict-align -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=no-strict-align
// CHECK: clang: warning: argument unused during compilation: '-m[[OPTION]]' [-Wunused-command-line-argument]

View File

@ -11,6 +11,7 @@ LOONGARCH_FEATURE("+lsx", FK_LSX)
LOONGARCH_FEATURE("+lasx", FK_LASX)
LOONGARCH_FEATURE("+lbt", FK_LBT)
LOONGARCH_FEATURE("+lvz", FK_LVZ)
LOONGARCH_FEATURE("+ual", FK_UAL)
#undef LOONGARCH_FEATURE
@ -19,7 +20,7 @@ LOONGARCH_FEATURE("+lvz", FK_LVZ)
#endif
LOONGARCH_ARCH("invalid", AK_INVALID, FK_INVALID)
LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64)
LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX)
LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL)
LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL)
#undef LOONGARCH_ARCH

View File

@ -46,6 +46,9 @@ enum FeatureKind : uint32_t {
// Loongson Virtualization Extension is available.
FK_LVZ = 1 << 7,
// Allow memory accesses to be unaligned.
FK_UAL = 1 << 8,
};
struct FeatureInfo {

View File

@ -115,6 +115,11 @@ def HasLaLocalWithAbs
AssemblerPredicate<(all_of LaLocalWithAbs),
"Expand la.local as la.abs">;
// Unaligned memory access
def FeatureUAL
: SubtargetFeature<"ual", "HasUAL", "true",
"Allow memory accesses to be unaligned">;
//===----------------------------------------------------------------------===//
// Registers, instruction descriptions ...
//===----------------------------------------------------------------------===//
@ -128,13 +133,14 @@ include "LoongArchInstrInfo.td"
//===----------------------------------------------------------------------===//
def : ProcessorModel<"generic-la32", NoSchedModel, [Feature32Bit]>;
def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>;
def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit, FeatureUAL]>;
// Support generic for compatibility with other targets. The triple will be used
// to change to the appropriate la32/la64 version.
def : ProcessorModel<"generic", NoSchedModel, []>;
def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit,
FeatureUAL,
FeatureExtLASX,
FeatureExtLVZ,
FeatureExtLBT]>;

View File

@ -1785,6 +1785,18 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
}
}
bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
unsigned *Fast) const {
if (!Subtarget.hasUAL())
return false;
// TODO: set reasonable speed number.
if (Fast)
*Fast = 1;
return true;
}
const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((LoongArchISD::NodeType)Opcode) {
case LoongArchISD::FIRST_NUMBER:

View File

@ -191,6 +191,11 @@ public:
bool convertSelectOfConstantsToMath(EVT VT) const override { return true; }
bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *Fast = nullptr) const override;
private:
/// Target-specific function used to lower LoongArch calling conventions.
typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,

View File

@ -42,6 +42,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
bool HasLaGlobalWithPcrel = false;
bool HasLaGlobalWithAbs = false;
bool HasLaLocalWithAbs = false;
bool HasUAL = false;
unsigned GRLen = 32;
MVT GRLenVT = MVT::i32;
LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown;
@ -91,6 +92,7 @@ public:
bool hasLaGlobalWithPcrel() const { return HasLaGlobalWithPcrel; }
bool hasLaGlobalWithAbs() const { return HasLaGlobalWithAbs; }
bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; }
bool hasUAL() const { return HasUAL; }
MVT getGRLenVT() const { return GRLenVT; }
unsigned getGRLen() const { return GRLen; }
LoongArchABI::ABI getTargetABI() const { return TargetABI; }

View File

@ -360,17 +360,13 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result
; CHECK-LABEL: callee_large_struct_ret:
; CHECK: # %bb.0:
; CHECK-NEXT: ori $a1, $zero, 4
; CHECK-NEXT: st.w $a1, $a0, 24
; CHECK-NEXT: st.d $a1, $a0, 24
; CHECK-NEXT: ori $a1, $zero, 3
; CHECK-NEXT: st.w $a1, $a0, 16
; CHECK-NEXT: st.d $a1, $a0, 16
; CHECK-NEXT: ori $a1, $zero, 2
; CHECK-NEXT: st.w $a1, $a0, 8
; CHECK-NEXT: st.w $zero, $a0, 28
; CHECK-NEXT: st.w $zero, $a0, 20
; CHECK-NEXT: st.w $zero, $a0, 12
; CHECK-NEXT: st.w $zero, $a0, 4
; CHECK-NEXT: st.d $a1, $a0, 8
; CHECK-NEXT: ori $a1, $zero, 1
; CHECK-NEXT: st.w $a1, $a0, 0
; CHECK-NEXT: st.d $a1, $a0, 0
; CHECK-NEXT: ret
%a = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 0
store i64 1, ptr %a, align 4

View File

@ -315,10 +315,7 @@ define double @double_fadd_acquire(ptr %p) nounwind {
; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill
; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.wu $a0, $a0, 0
; LA64F-NEXT: ld.wu $a1, $fp, 4
; LA64F-NEXT: slli.d $a1, $a1, 32
; LA64F-NEXT: or $a0, $a1, $a0
; LA64F-NEXT: ld.d $a0, $a0, 0
; LA64F-NEXT: ori $s0, $zero, 8
; LA64F-NEXT: addi.d $s1, $sp, 8
; LA64F-NEXT: addi.d $s2, $sp, 0
@ -360,11 +357,7 @@ define double @double_fadd_acquire(ptr %p) nounwind {
; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill
; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill
; LA64D-NEXT: move $fp, $a0
; LA64D-NEXT: ld.wu $a0, $a0, 0
; LA64D-NEXT: ld.wu $a1, $fp, 4
; LA64D-NEXT: slli.d $a1, $a1, 32
; LA64D-NEXT: or $a0, $a1, $a0
; LA64D-NEXT: movgr2fr.d $fa0, $a0
; LA64D-NEXT: fld.d $fa0, $a0, 0
; LA64D-NEXT: addi.d $a0, $zero, 1
; LA64D-NEXT: movgr2fr.d $fs0, $a0
; LA64D-NEXT: ori $s0, $zero, 8
@ -411,10 +404,7 @@ define double @double_fsub_acquire(ptr %p) nounwind {
; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill
; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.wu $a0, $a0, 0
; LA64F-NEXT: ld.wu $a1, $fp, 4
; LA64F-NEXT: slli.d $a1, $a1, 32
; LA64F-NEXT: or $a0, $a1, $a0
; LA64F-NEXT: ld.d $a0, $a0, 0
; LA64F-NEXT: ori $s0, $zero, 8
; LA64F-NEXT: addi.d $s1, $sp, 8
; LA64F-NEXT: addi.d $s2, $sp, 0
@ -456,11 +446,7 @@ define double @double_fsub_acquire(ptr %p) nounwind {
; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill
; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill
; LA64D-NEXT: move $fp, $a0
; LA64D-NEXT: ld.wu $a0, $a0, 0
; LA64D-NEXT: ld.wu $a1, $fp, 4
; LA64D-NEXT: slli.d $a1, $a1, 32
; LA64D-NEXT: or $a0, $a1, $a0
; LA64D-NEXT: movgr2fr.d $fa0, $a0
; LA64D-NEXT: fld.d $fa0, $a0, 0
; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI5_0)
; LA64D-NEXT: fld.d $fs0, $a0, 0
@ -507,10 +493,7 @@ define double @double_fmin_acquire(ptr %p) nounwind {
; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill
; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.wu $a0, $a0, 0
; LA64F-NEXT: ld.wu $a1, $fp, 4
; LA64F-NEXT: slli.d $a1, $a1, 32
; LA64F-NEXT: or $a0, $a1, $a0
; LA64F-NEXT: ld.d $a0, $a0, 0
; LA64F-NEXT: ori $s0, $zero, 8
; LA64F-NEXT: addi.d $s1, $sp, 8
; LA64F-NEXT: addi.d $s2, $sp, 0
@ -552,11 +535,7 @@ define double @double_fmin_acquire(ptr %p) nounwind {
; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill
; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill
; LA64D-NEXT: move $fp, $a0
; LA64D-NEXT: ld.wu $a0, $a0, 0
; LA64D-NEXT: ld.wu $a1, $fp, 4
; LA64D-NEXT: slli.d $a1, $a1, 32
; LA64D-NEXT: or $a0, $a1, $a0
; LA64D-NEXT: movgr2fr.d $fa0, $a0
; LA64D-NEXT: fld.d $fa0, $a0, 0
; LA64D-NEXT: addi.d $a0, $zero, 1
; LA64D-NEXT: movgr2fr.d $fs0, $a0
; LA64D-NEXT: ori $s0, $zero, 8
@ -604,10 +583,7 @@ define double @double_fmax_acquire(ptr %p) nounwind {
; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill
; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
; LA64F-NEXT: ld.wu $a0, $a0, 0
; LA64F-NEXT: ld.wu $a1, $fp, 4
; LA64F-NEXT: slli.d $a1, $a1, 32
; LA64F-NEXT: or $a0, $a1, $a0
; LA64F-NEXT: ld.d $a0, $a0, 0
; LA64F-NEXT: ori $s0, $zero, 8
; LA64F-NEXT: addi.d $s1, $sp, 8
; LA64F-NEXT: addi.d $s2, $sp, 0
@ -649,11 +625,7 @@ define double @double_fmax_acquire(ptr %p) nounwind {
; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill
; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill
; LA64D-NEXT: move $fp, $a0
; LA64D-NEXT: ld.wu $a0, $a0, 0
; LA64D-NEXT: ld.wu $a1, $fp, 4
; LA64D-NEXT: slli.d $a1, $a1, 32
; LA64D-NEXT: or $a0, $a1, $a0
; LA64D-NEXT: movgr2fr.d $fa0, $a0
; LA64D-NEXT: fld.d $fa0, $a0, 0
; LA64D-NEXT: addi.d $a0, $zero, 1
; LA64D-NEXT: movgr2fr.d $fs0, $a0
; LA64D-NEXT: ori $s0, $zero, 8

View File

@ -13,6 +13,7 @@ entry:
}
;; Perform tail call optimization for external symbol.
;; Bytes copied should be large enough, otherwise the memcpy call would be optimized to multiple ld/st insns.
@dest = global [2 x i8] zeroinitializer
declare void @llvm.memcpy.p0i8.p0i8.i32(ptr, ptr, i32, i1)
define void @caller_extern(ptr %src) optsize {
@ -21,10 +22,10 @@ define void @caller_extern(ptr %src) optsize {
; CHECK-NEXT: move $a1, $a0
; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(dest)
; CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(dest)
; CHECK-NEXT: ori $a2, $zero, 7
; CHECK-NEXT: ori $a2, $zero, 33
; CHECK-NEXT: b %plt(memcpy)
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr getelementptr inbounds ([2 x i8], ptr @dest, i32 0, i32 0), ptr %src, i32 7, i1 false)
tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr getelementptr inbounds ([2 x i8], ptr @dest, i32 0, i32 0), ptr %src, i32 33, i1 false)
ret void
}

View File

@ -0,0 +1,72 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
;; Test the ual feature which is similar to AArch64/arm64-strict-align.ll.
; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32-ALIGNED
; RUN: llc --mtriple=loongarch32 --mattr=+ual < %s | FileCheck %s --check-prefix=LA32-UNALIGNED
; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32-ALIGNED
; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64-UNALIGNED
; RUN: llc --mtriple=loongarch64 --mattr=+ual < %s | FileCheck %s --check-prefix=LA64-UNALIGNED
; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64-ALIGNED
define i32 @f0(ptr %p) nounwind {
; LA32-ALIGNED-LABEL: f0:
; LA32-ALIGNED: # %bb.0:
; LA32-ALIGNED-NEXT: ld.hu $a1, $a0, 0
; LA32-ALIGNED-NEXT: ld.hu $a0, $a0, 2
; LA32-ALIGNED-NEXT: slli.w $a0, $a0, 16
; LA32-ALIGNED-NEXT: or $a0, $a0, $a1
; LA32-ALIGNED-NEXT: ret
;
; LA32-UNALIGNED-LABEL: f0:
; LA32-UNALIGNED: # %bb.0:
; LA32-UNALIGNED-NEXT: ld.w $a0, $a0, 0
; LA32-UNALIGNED-NEXT: ret
;
; LA64-UNALIGNED-LABEL: f0:
; LA64-UNALIGNED: # %bb.0:
; LA64-UNALIGNED-NEXT: ld.w $a0, $a0, 0
; LA64-UNALIGNED-NEXT: ret
;
; LA64-ALIGNED-LABEL: f0:
; LA64-ALIGNED: # %bb.0:
; LA64-ALIGNED-NEXT: ld.hu $a1, $a0, 0
; LA64-ALIGNED-NEXT: ld.h $a0, $a0, 2
; LA64-ALIGNED-NEXT: slli.d $a0, $a0, 16
; LA64-ALIGNED-NEXT: or $a0, $a0, $a1
; LA64-ALIGNED-NEXT: ret
%tmp = load i32, ptr %p, align 2
ret i32 %tmp
}
define i64 @f1(ptr %p) nounwind {
; LA32-ALIGNED-LABEL: f1:
; LA32-ALIGNED: # %bb.0:
; LA32-ALIGNED-NEXT: ld.w $a2, $a0, 0
; LA32-ALIGNED-NEXT: ld.w $a1, $a0, 4
; LA32-ALIGNED-NEXT: move $a0, $a2
; LA32-ALIGNED-NEXT: ret
;
; LA32-UNALIGNED-LABEL: f1:
; LA32-UNALIGNED: # %bb.0:
; LA32-UNALIGNED-NEXT: ld.w $a2, $a0, 0
; LA32-UNALIGNED-NEXT: ld.w $a1, $a0, 4
; LA32-UNALIGNED-NEXT: move $a0, $a2
; LA32-UNALIGNED-NEXT: ret
;
; LA64-UNALIGNED-LABEL: f1:
; LA64-UNALIGNED: # %bb.0:
; LA64-UNALIGNED-NEXT: ld.d $a0, $a0, 0
; LA64-UNALIGNED-NEXT: ret
;
; LA64-ALIGNED-LABEL: f1:
; LA64-ALIGNED: # %bb.0:
; LA64-ALIGNED-NEXT: ld.wu $a1, $a0, 0
; LA64-ALIGNED-NEXT: ld.wu $a0, $a0, 4
; LA64-ALIGNED-NEXT: slli.d $a0, $a0, 32
; LA64-ALIGNED-NEXT: or $a0, $a0, $a1
; LA64-ALIGNED-NEXT: ret
%tmp = load i64, ptr %p, align 4
ret i64 %tmp
}

View File

@ -0,0 +1,97 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
;; Test how memcpy is optimized when ual is turned off which is similar to AArch64/arm64-misaligned-memcpy-inline.ll.
; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32
; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64
;; Small (16 bytes here) unaligned memcpy() should be a function call if
;; ual is turned off.
define void @t0(ptr %out, ptr %in) {
; LA32-LABEL: t0:
; LA32: # %bb.0: # %entry
; LA32-NEXT: addi.w $sp, $sp, -16
; LA32-NEXT: .cfi_def_cfa_offset 16
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT: .cfi_offset 1, -4
; LA32-NEXT: ori $a2, $zero, 16
; LA32-NEXT: bl %plt(memcpy)
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: ret
;
; LA64-LABEL: t0:
; LA64: # %bb.0: # %entry
; LA64-NEXT: addi.d $sp, $sp, -16
; LA64-NEXT: .cfi_def_cfa_offset 16
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: .cfi_offset 1, -8
; LA64-NEXT: ori $a2, $zero, 16
; LA64-NEXT: bl %plt(memcpy)
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: addi.d $sp, $sp, 16
; LA64-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 16, i1 false)
ret void
}
;; Small (16 bytes here) aligned memcpy() should be inlined even if
;; ual is turned off.
define void @t1(ptr align 8 %out, ptr align 8 %in) {
; LA32-LABEL: t1:
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a1, 12
; LA32-NEXT: st.w $a2, $a0, 12
; LA32-NEXT: ld.w $a2, $a1, 8
; LA32-NEXT: st.w $a2, $a0, 8
; LA32-NEXT: ld.w $a2, $a1, 4
; LA32-NEXT: st.w $a2, $a0, 4
; LA32-NEXT: ld.w $a1, $a1, 0
; LA32-NEXT: st.w $a1, $a0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: t1:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a2, $a1, 8
; LA64-NEXT: st.d $a2, $a0, 8
; LA64-NEXT: ld.d $a1, $a1, 0
; LA64-NEXT: st.d $a1, $a0, 0
; LA64-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %out, ptr align 8 %in, i64 16, i1 false)
ret void
}
;; Tiny (4 bytes here) unaligned memcpy() should be inlined with byte sized
;; loads and stores if ual is turned off.
define void @t2(ptr %out, ptr %in) {
; LA32-LABEL: t2:
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.b $a2, $a1, 3
; LA32-NEXT: st.b $a2, $a0, 3
; LA32-NEXT: ld.b $a2, $a1, 2
; LA32-NEXT: st.b $a2, $a0, 2
; LA32-NEXT: ld.b $a2, $a1, 1
; LA32-NEXT: st.b $a2, $a0, 1
; LA32-NEXT: ld.b $a1, $a1, 0
; LA32-NEXT: st.b $a1, $a0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: t2:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.b $a2, $a1, 3
; LA64-NEXT: st.b $a2, $a0, 3
; LA64-NEXT: ld.b $a2, $a1, 2
; LA64-NEXT: st.b $a2, $a0, 2
; LA64-NEXT: ld.b $a2, $a1, 1
; LA64-NEXT: st.b $a2, $a0, 1
; LA64-NEXT: ld.b $a1, $a1, 0
; LA64-NEXT: st.b $a1, $a0, 0
; LA64-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false)
ret void
}
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)