mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-26 23:21:11 +00:00
[AArch64][compiler-rt] Add memcpy, memset, memmove, memchr builtins. (#77496)
Add naive implementation of memcpy, memset, memmove, memchr for SME targets. Co-authored-by: David Sherwood <david.sherwood@arm.com>
This commit is contained in:
parent
13c6f1ea2e
commit
3112578597
@ -35,10 +35,12 @@ asm(\".arch armv8-a+lse\");
|
||||
asm(\"cas w0, w1, [x2]\");
|
||||
")
|
||||
|
||||
builtin_check_c_compiler_source(COMPILER_RT_HAS_ASM_SME
|
||||
builtin_check_c_compiler_source(COMPILER_RT_HAS_AARCH64_SME
|
||||
"
|
||||
asm(\".arch armv9-a+sme\");
|
||||
asm(\"smstart\");
|
||||
void foo(void) __arm_streaming_compatible {
|
||||
asm(\".arch armv9-a+sme\");
|
||||
asm(\"smstart\");
|
||||
}
|
||||
")
|
||||
|
||||
if(ANDROID)
|
||||
|
@ -560,9 +560,10 @@ set(aarch64_SOURCES
|
||||
aarch64/fp_mode.c
|
||||
)
|
||||
|
||||
if(COMPILER_RT_HAS_ASM_SME AND (COMPILER_RT_HAS_AUXV OR COMPILER_RT_BAREMETAL_BUILD))
|
||||
list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-abi-init.c)
|
||||
if(COMPILER_RT_HAS_AARCH64_SME AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG AND (COMPILER_RT_HAS_AUXV OR COMPILER_RT_BAREMETAL_BUILD))
|
||||
list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-abi-init.c aarch64/sme-libc-routines.c)
|
||||
message(STATUS "AArch64 SME ABI routines enabled")
|
||||
set_source_files_properties(aarch64/sme-libc-routines.c PROPERTIES COMPILE_FLAGS "-fno-builtin")
|
||||
else()
|
||||
message(STATUS "AArch64 SME ABI routines disabled")
|
||||
endif()
|
||||
|
87
compiler-rt/lib/builtins/aarch64/sme-libc-routines.c
Normal file
87
compiler-rt/lib/builtins/aarch64/sme-libc-routines.c
Normal file
@ -0,0 +1,87 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
// WARNING: When building the scalar versions of these functions you need to
|
||||
// use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang
|
||||
// from recognising a loop idiom and planting calls to memcpy!
|
||||
|
||||
static void *__arm_sc_memcpy_fwd(void *dest, const void *src,
|
||||
size_t n) __arm_streaming_compatible {
|
||||
unsigned char *destp = (unsigned char *)dest;
|
||||
const unsigned char *srcp = (const unsigned char *)src;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
destp[i] = srcp[i];
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
// If dest and src overlap then behaviour is undefined, hence we can add the
|
||||
// restrict keywords here. This also matches the definition of the libc memcpy
|
||||
// according to the man page.
|
||||
void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src,
|
||||
size_t n) __arm_streaming_compatible {
|
||||
return __arm_sc_memcpy_fwd(dest, src, n);
|
||||
}
|
||||
|
||||
void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible {
|
||||
unsigned char *destp = (unsigned char *)dest;
|
||||
unsigned char c8 = (unsigned char)c;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
destp[i] = c8;
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
static void *__arm_sc_memcpy_rev(void *dest, const void *src,
|
||||
size_t n) __arm_streaming_compatible {
|
||||
unsigned char *destp = (unsigned char *)dest;
|
||||
const unsigned char *srcp = (const unsigned char *)src;
|
||||
// TODO: Improve performance by copying larger chunks in reverse, or by
|
||||
// using SVE.
|
||||
while (n > 0) {
|
||||
--n;
|
||||
destp[n] = srcp[n];
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
// Semantically a memmove is equivalent to the following:
|
||||
// 1. Copy the entire contents of src to a temporary array that does not
|
||||
// overlap with src or dest.
|
||||
// 2. Copy the contents of the temporary array into dest.
|
||||
void *__arm_sc_memmove(void *dest, const void *src,
|
||||
size_t n) __arm_streaming_compatible {
|
||||
unsigned char *destp = (unsigned char *)dest;
|
||||
const unsigned char *srcp = (const unsigned char *)src;
|
||||
|
||||
// If src and dest don't overlap then just invoke memcpy
|
||||
if ((srcp > (destp + n)) || (destp > (srcp + n)))
|
||||
return __arm_sc_memcpy_fwd(dest, src, n);
|
||||
|
||||
// Overlap case 1:
|
||||
// src: Low | -> | High
|
||||
// dest: Low | -> | High
|
||||
// Here src is always ahead of dest at a higher addres. If we first read a
|
||||
// chunk of data from src we can safely write the same chunk to dest without
|
||||
// corrupting future reads of src.
|
||||
if (srcp > destp)
|
||||
return __arm_sc_memcpy_fwd(dest, src, n);
|
||||
|
||||
// Overlap case 2:
|
||||
// src: Low | -> | High
|
||||
// dest: Low | -> | High
|
||||
// While we're in the overlap region we're always corrupting future reads of
|
||||
// src when writing to dest. An efficient way to do this is to copy the data
|
||||
// in reverse by starting at the highest address.
|
||||
return __arm_sc_memcpy_rev(dest, src, n);
|
||||
}
|
||||
|
||||
const void *__arm_sc_memchr(const void *src, int c,
|
||||
size_t n) __arm_streaming_compatible {
|
||||
const unsigned char *srcp = (const unsigned char *)src;
|
||||
unsigned char c8 = (unsigned char)c;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
if (srcp[i] == c8)
|
||||
return &srcp[i];
|
||||
|
||||
return NULL;
|
||||
}
|
@ -18,6 +18,8 @@ pythonize_bool(COMPILER_RT_BUILD_STANDALONE_LIBATOMIC)
|
||||
|
||||
pythonize_bool(COMPILER_RT_ENABLE_INTERNAL_SYMBOLIZER)
|
||||
|
||||
pythonize_bool(COMPILER_RT_HAS_AARCH64_SME)
|
||||
|
||||
configure_compiler_rt_lit_site_cfg(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/lit.common.configured.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/lit.common.configured)
|
||||
|
120
compiler-rt/test/builtins/Unit/sme-string-test.cpp
Normal file
120
compiler-rt/test/builtins/Unit/sme-string-test.cpp
Normal file
@ -0,0 +1,120 @@
|
||||
// REQUIRES: aarch64-target-arch, aarch64-sme-available
|
||||
// RUN: %clangxx_builtins %s %librt -o %t && %run %t
|
||||
|
||||
#include <cassert>
|
||||
#include <initializer_list>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
extern "C" {
|
||||
void *__arm_sc_memcpy(void *, const void *, size_t);
|
||||
void *__arm_sc_memset(void *, int, size_t);
|
||||
void *__arm_sc_memmove(void *, const void *, size_t);
|
||||
void *__arm_sc_memchr(const void *, int, size_t);
|
||||
}
|
||||
|
||||
template <unsigned N> class Memory {
|
||||
public:
|
||||
uint8_t ptr[N];
|
||||
unsigned size;
|
||||
|
||||
Memory(unsigned stride = 0) {
|
||||
size = N;
|
||||
if (stride == 0)
|
||||
return;
|
||||
for (unsigned i = 0; i < N; i++)
|
||||
ptr[i] = i * stride;
|
||||
}
|
||||
|
||||
void assert_equal(const Memory &other) {
|
||||
assert(N == other.size);
|
||||
assert(memcmp(ptr, other.ptr, N) == 0);
|
||||
}
|
||||
|
||||
void assert_equal(std::initializer_list<uint8_t> s) {
|
||||
assert(N == s.size());
|
||||
auto it = s.begin();
|
||||
for (unsigned i = 0; i < N; ++i)
|
||||
assert(ptr[i] == *it++);
|
||||
}
|
||||
|
||||
void assert_elemt_equal_at(unsigned I, uint8_t elem) {
|
||||
assert(ptr[I] == elem);
|
||||
}
|
||||
};
|
||||
|
||||
int main() {
|
||||
|
||||
// Testing memcpy from src to dst.
|
||||
{
|
||||
Memory<8> src(1);
|
||||
Memory<8> dst;
|
||||
if (!__arm_sc_memcpy(dst.ptr, src.ptr, 8))
|
||||
abort();
|
||||
dst.assert_equal(src);
|
||||
dst.assert_equal({0, 1, 2, 3, 4, 5, 6, 7});
|
||||
}
|
||||
|
||||
// Testing memcpy from src to dst with pointer offset.
|
||||
{
|
||||
Memory<8> src(1);
|
||||
Memory<8> dst(1);
|
||||
if (!__arm_sc_memcpy(dst.ptr + 1, src.ptr, 6))
|
||||
abort();
|
||||
dst.assert_equal({0, 0, 1, 2, 3, 4, 5, 7});
|
||||
}
|
||||
|
||||
// Testing memchr.
|
||||
{
|
||||
Memory<8> src(4);
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
uint8_t e = src.ptr[i];
|
||||
uint8_t *elem = (uint8_t *)memchr(src.ptr, e, 8);
|
||||
if (!elem)
|
||||
abort();
|
||||
src.assert_elemt_equal_at(elem - src.ptr, *elem);
|
||||
for (unsigned i = 0; i < 8; ++i)
|
||||
assert(__arm_sc_memchr(src.ptr, src.ptr[i], 8) ==
|
||||
memchr(src.ptr, src.ptr[i], 8));
|
||||
}
|
||||
}
|
||||
|
||||
// Testing memset.
|
||||
{
|
||||
Memory<8> array;
|
||||
if (!__arm_sc_memset(array.ptr, 2, 8))
|
||||
abort();
|
||||
array.assert_equal({2, 2, 2, 2, 2, 2, 2, 2});
|
||||
}
|
||||
|
||||
// Testing memset with pointer offset.
|
||||
{
|
||||
Memory<8> array(1);
|
||||
if (!__arm_sc_memset(array.ptr + 1, 2, 6))
|
||||
abort();
|
||||
array.assert_equal({0, 2, 2, 2, 2, 2, 2, 7});
|
||||
}
|
||||
|
||||
// Testing memmove with a simple non-overlap case.
|
||||
{
|
||||
Memory<8> src(1);
|
||||
Memory<8> dst(1);
|
||||
if (!__arm_sc_memmove(dst.ptr + 1, src.ptr, 6))
|
||||
abort();
|
||||
dst.assert_equal({0, 0, 1, 2, 3, 4, 5, 7});
|
||||
}
|
||||
|
||||
// Testing memove with overlap pointers dst > src, dst < src.
|
||||
{
|
||||
Memory<8> srcdst(1);
|
||||
if (!__arm_sc_memmove(srcdst.ptr + 1, srcdst.ptr, 6))
|
||||
abort();
|
||||
srcdst.assert_equal({0, 0, 1, 2, 3, 4, 5, 7});
|
||||
if (!__arm_sc_memmove(srcdst.ptr, srcdst.ptr + 1, 6))
|
||||
abort();
|
||||
srcdst.assert_equal({0, 1, 2, 3, 4, 5, 5, 7});
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -454,6 +454,9 @@ if not getattr(config, "sanitizer_uses_static_unwind", False):
|
||||
if config.has_lld:
|
||||
config.available_features.add("lld-available")
|
||||
|
||||
if config.aarch64_sme:
|
||||
config.available_features.add("aarch64-sme-available")
|
||||
|
||||
if config.use_lld:
|
||||
config.available_features.add("lld")
|
||||
|
||||
|
@ -50,6 +50,7 @@ set_default("gwp_asan", @COMPILER_RT_HAS_GWP_ASAN_PYBOOL@)
|
||||
set_default("expensive_checks", @LLVM_ENABLE_EXPENSIVE_CHECKS_PYBOOL@)
|
||||
set_default("test_standalone_build_libs", @COMPILER_RT_TEST_STANDALONE_BUILD_LIBS_PYBOOL@)
|
||||
set_default("has_compiler_rt_libatomic", @COMPILER_RT_BUILD_STANDALONE_LIBATOMIC_PYBOOL@)
|
||||
set_default("aarch64_sme", @COMPILER_RT_HAS_AARCH64_SME@)
|
||||
# True iff the test suite supports ignoring the test compiler's runtime library path
|
||||
# and using `config.compiler_rt_libdir` instead. This only matters when the runtime
|
||||
# library paths differ.
|
||||
|
@ -7,6 +7,7 @@ config.llvm_obj_root = "@LLVM_BINARY_DIR@"
|
||||
config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
|
||||
config.compiler_rt_src_root = "@COMPILER_RT_SOURCE_DIR@"
|
||||
config.compiler_rt_libdir = lit_config.substitute("@COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR@")
|
||||
config.aarch64_sme = @COMPILER_RT_HAS_AARCH64_SME@
|
||||
config.enable_per_target_runtime_dir = @LLVM_ENABLE_PER_TARGET_RUNTIME_DIR_PYBOOL@
|
||||
config.llvm_build_mode = lit_config.substitute("@LLVM_BUILD_MODE@")
|
||||
config.host_arch = "@HOST_ARCH@"
|
||||
|
Loading…
Reference in New Issue
Block a user