mirror of
https://github.com/RPCS3/llvm.git
synced 2026-01-31 01:25:19 +01:00
Tim Northover remarked that the added patterns for fmls fp16
produce wrong code in case the fsub instruction has a
multiplication as its first operand, i.e., all the patterns FMLSv*_OP1:
> define <8 x half> @test_FMLSv8f16_OP1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
> ; CHECK-LABEL: test_FMLSv8f16_OP1:
> ; CHECK: fmls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
> entry:
>
> %mul = fmul fast <8 x half> %c, %b
> %sub = fsub fast <8 x half> %mul, %a
> ret <8 x half> %sub
> }
>
> This doesn't look right to me. The exact instruction produced is "fmls
> v0.8h, v2.8h, v1.8h", which I think calculates "v0 - v2*v1", but the
> IR is calculating "v2*v1-v0". The equivalent <4 x float> code also
> doesn't emit an fmls.
This patch generates an fmla and negates the value of the operand2 of the fsub.
Inspecting the pattern match, I found that there was another mistake in the
opcode to be selected: matching FMULv4*16 should generate FMLSv4*16
and not FMLSv2*32.
Tested on aarch64-linux with make check-all.
Differential Revision: https://reviews.llvm.org/D67990
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374044 91177308-0d34-0410-b5e6-96231b3b80d8
108 lines
2.5 KiB
C++
108 lines
2.5 KiB
C++
//===-- llvm/CodeGen/MachineCombinerPattern.h - Instruction pattern supported by
|
|
// combiner ------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines instruction pattern supported by combiner
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_CODEGEN_MACHINECOMBINERPATTERN_H
|
|
#define LLVM_CODEGEN_MACHINECOMBINERPATTERN_H
|
|
|
|
namespace llvm {
|
|
|
|
/// These are instruction patterns matched by the machine combiner pass.
|
|
enum class MachineCombinerPattern {
|
|
// These are commutative variants for reassociating a computation chain. See
|
|
// the comments before getMachineCombinerPatterns() in TargetInstrInfo.cpp.
|
|
REASSOC_AX_BY,
|
|
REASSOC_AX_YB,
|
|
REASSOC_XA_BY,
|
|
REASSOC_XA_YB,
|
|
|
|
// These are multiply-add patterns matched by the AArch64 machine combiner.
|
|
MULADDW_OP1,
|
|
MULADDW_OP2,
|
|
MULSUBW_OP1,
|
|
MULSUBW_OP2,
|
|
MULADDWI_OP1,
|
|
MULSUBWI_OP1,
|
|
MULADDX_OP1,
|
|
MULADDX_OP2,
|
|
MULSUBX_OP1,
|
|
MULSUBX_OP2,
|
|
MULADDXI_OP1,
|
|
MULSUBXI_OP1,
|
|
// Floating Point
|
|
FMULADDH_OP1,
|
|
FMULADDH_OP2,
|
|
FMULSUBH_OP1,
|
|
FMULSUBH_OP2,
|
|
FMULADDS_OP1,
|
|
FMULADDS_OP2,
|
|
FMULSUBS_OP1,
|
|
FMULSUBS_OP2,
|
|
FMULADDD_OP1,
|
|
FMULADDD_OP2,
|
|
FMULSUBD_OP1,
|
|
FMULSUBD_OP2,
|
|
FNMULSUBH_OP1,
|
|
FNMULSUBS_OP1,
|
|
FNMULSUBD_OP1,
|
|
FMLAv1i32_indexed_OP1,
|
|
FMLAv1i32_indexed_OP2,
|
|
FMLAv1i64_indexed_OP1,
|
|
FMLAv1i64_indexed_OP2,
|
|
FMLAv4f16_OP1,
|
|
FMLAv4f16_OP2,
|
|
FMLAv8f16_OP1,
|
|
FMLAv8f16_OP2,
|
|
FMLAv2f32_OP2,
|
|
FMLAv2f32_OP1,
|
|
FMLAv2f64_OP1,
|
|
FMLAv2f64_OP2,
|
|
FMLAv4i16_indexed_OP1,
|
|
FMLAv4i16_indexed_OP2,
|
|
FMLAv8i16_indexed_OP1,
|
|
FMLAv8i16_indexed_OP2,
|
|
FMLAv2i32_indexed_OP1,
|
|
FMLAv2i32_indexed_OP2,
|
|
FMLAv2i64_indexed_OP1,
|
|
FMLAv2i64_indexed_OP2,
|
|
FMLAv4f32_OP1,
|
|
FMLAv4f32_OP2,
|
|
FMLAv4i32_indexed_OP1,
|
|
FMLAv4i32_indexed_OP2,
|
|
FMLSv1i32_indexed_OP2,
|
|
FMLSv1i64_indexed_OP2,
|
|
FMLSv4f16_OP1,
|
|
FMLSv4f16_OP2,
|
|
FMLSv8f16_OP1,
|
|
FMLSv8f16_OP2,
|
|
FMLSv2f32_OP1,
|
|
FMLSv2f32_OP2,
|
|
FMLSv2f64_OP1,
|
|
FMLSv2f64_OP2,
|
|
FMLSv4i16_indexed_OP1,
|
|
FMLSv4i16_indexed_OP2,
|
|
FMLSv8i16_indexed_OP1,
|
|
FMLSv8i16_indexed_OP2,
|
|
FMLSv2i32_indexed_OP1,
|
|
FMLSv2i32_indexed_OP2,
|
|
FMLSv2i64_indexed_OP1,
|
|
FMLSv2i64_indexed_OP2,
|
|
FMLSv4f32_OP1,
|
|
FMLSv4f32_OP2,
|
|
FMLSv4i32_indexed_OP1,
|
|
FMLSv4i32_indexed_OP2
|
|
};
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif
|