[libc] add basic wide char functions

This patch adds the wchar header, as well as the functions to convert to
and from wide chars. The header also sets up the definitions for wint
and wchar.

Reviewed By: lntue

Differential Revision: https://reviews.llvm.org/D145995
This commit is contained in:
Michael Jones 2023-03-13 15:30:36 -07:00
parent c600b99e5e
commit 46b5087227
25 changed files with 354 additions and 1 deletions

View File

@ -178,6 +178,14 @@ def UniStdAPI : PublicAPI<"unistd.h"> {
"ssize_t", "uid_t", "__getoptargv_t"];
}
def WCharAPI : PublicAPI<"wchar.h"> {
let Types = [
"wchar_t",
"wint_t",
"size_t",
];
}
def SysRandomAPI : PublicAPI<"sys/random.h"> {
let Types = ["size_t", "ssize_t"];
}

View File

@ -193,6 +193,9 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.unlink
libc.src.unistd.unlinkat
libc.src.unistd.write
# wchar.h entrypoints
libc.src.wchar.wctob
)
set(TARGET_LIBM_ENTRYPOINTS

View File

@ -20,6 +20,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.threads
libc.include.time
libc.include.unistd
libc.include.wchar
libc.include.arpa_inet

View File

@ -468,6 +468,15 @@ add_gen_header(
.llvm-libc-types.tcflag_t
)
add_gen_header(
wchar
DEF_FILE wchar.h.def
GEN_HDR wchar.h
DEPENDS
.llvm_libc_common_h
.llvm-libc-macros.wchar_macros
)
if(NOT LLVM_LIBC_FULL_BUILD)
# We don't install headers in non-fullbuild mode.
return()

View File

@ -166,3 +166,9 @@ add_header(
DEPENDS
.linux.unistd_macros
)
add_header(
wchar_macros
HDR
wchar-macros.h
)

View File

@ -0,0 +1,16 @@
//===-- Macros defined in wchar.h header file -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef __LLVM_LIBC_MACROS_WCHAR_MACROS_H
#define __LLVM_LIBC_MACROS_WCHAR_MACROS_H
#ifndef WEOF
#define WEOF 0xffffffffu
#endif
#endif // __LLVM_LIBC_MACROS_WCHAR_MACROS_H

View File

@ -84,3 +84,5 @@ add_header(speed_t HDR speed_t.h)
add_header(tcflag_t HDR tcflag_t.h)
add_header(struct_termios HDR struct_termios.h DEPENDS .cc_t .speed_t .tcflag_t)
add_header(__getoptargv_t HDR __getoptargv_t.h)
add_header(wchar_t HDR wchar_t.h)
add_header(wint_t HDR wint_t.h)

View File

@ -11,8 +11,9 @@
// Since __need_size_t is defined, we get the definition of size_t from the
// standalone C header stddef.h. Also, because __need_size_t is defined,
// including stddef.h will pull only the type size_t and nothing else.a
// including stddef.h will pull only the type size_t and nothing else.
#define __need_size_t
#include <stddef.h>
#undef __need_size_t
#endif // __LLVM_LIBC_TYPES_SIZE_T_H__

View File

@ -0,0 +1,19 @@
//===-- Definition of wchar_t types ---------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef __LLVM_LIBC_TYPES_WCHAR_T_H__
#define __LLVM_LIBC_TYPES_WCHAR_T_H__
// Since __need_wchar_t is defined, we get the definition of wchar_t from the
// standalone C header stddef.h. Also, because __need_wchar_t is defined,
// including stddef.h will pull only the type wchar_t and nothing else.
#define __need_wchar_t
#include <stddef.h>
#undef __need_wchar_t
#endif // __LLVM_LIBC_TYPES_WCHAR_T_H__

View File

@ -0,0 +1,19 @@
//===-- Definition of wint_t types ----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef __LLVM_LIBC_TYPES_WINT_T_H__
#define __LLVM_LIBC_TYPES_WINT_T_H__
// Since __need_wint_t is defined, we get the definition of wint_t from the
// standalone C header stddef.h. Also, because __need_wint_t is defined,
// including stddef.h will pull only the type wint_t and nothing else.
#define __need_wint_t
#include <stddef.h>
#undef __need_wint_t
#endif // __LLVM_LIBC_TYPES_WINT_T_H__

17
libc/include/wchar.h.def Normal file
View File

@ -0,0 +1,17 @@
//===-- C standard library header wchar.h ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_WCHAR_H
#define LLVM_LIBC_WCHAR_H
#include <__llvm-libc-common.h>
#include <llvm-libc-macros/wchar-macros.h>
%%public_api()
#endif // LLVM_LIBC_WCHAR_H

View File

@ -59,6 +59,10 @@ def SizeTType : NamedType<"size_t">;
def SizeTPtr : PtrType<SizeTType>;
def RestrictedSizeTPtr : RestrictedPtrType<SizeTType>;
def WCharType : NamedType<"wchar_t">;
def WIntType : NamedType<"wint_t">;
def MBStateType : NamedType<"mbstate_t">;
def LongDoublePtr : PtrType<LongDoubleType>;
def IntMaxTType : NamedType<"intmax_t">;

View File

@ -1069,6 +1069,29 @@ def StdC : StandardSpec<"stdc"> {
]
>;
HeaderSpec WChar = HeaderSpec<
"wchar.h",
[ // Macros
Macro<"WEOF">,
],
[ //Types
SizeTType,
WIntType,
WCharType,
MBStateType,
StructTmType,
],
[], // Enumerations
[
FunctionSpec<
"wctob",
RetValSpec<IntType>,
[ArgSpec<WIntType>]
>,
]
>;
let Headers = [
Assert,
CType,
@ -1083,5 +1106,6 @@ def StdC : StandardSpec<"stdc"> {
Signal,
Threads,
Time,
WChar,
];
}

View File

@ -8,6 +8,7 @@ add_subdirectory(math)
add_subdirectory(string)
add_subdirectory(stdlib)
add_subdirectory(stdio)
add_subdirectory(wchar)
if(${LIBC_TARGET_OS} STREQUAL "linux")
add_subdirectory(dirent)

View File

@ -51,6 +51,12 @@ add_header_library(
ctype_utils.h
)
add_header_library(
wctype_utils
HDRS
wctype_utils.h
)
add_header_library(
str_to_num_result
HDRS

View File

@ -0,0 +1,45 @@
//===-- Collection of utils for implementing wide char functions --*-C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_SUPPORT_WCTYPE_UTILS_H
#define LLVM_LIBC_SRC_SUPPORT_WCTYPE_UTILS_H
#include "src/__support/CPP/optional.h"
#include "src/__support/macros/attributes.h" // LIBC_INLINE
#define __need_wint_t
#define __need_wchar_t
#include <stddef.h> // needed for wint_t and wchar_t
namespace __llvm_libc {
namespace internal {
// ------------------------------------------------------
// Rationale: Since these classification functions are
// called in other functions, we will avoid the overhead
// of a function call by inlining them.
// ------------------------------------------------------
LIBC_INLINE cpp::optional<int> wctob(wint_t c) {
// This needs to be translated to EOF at the callsite. This is to avoid
// including stdio.h in this file.
if (c > 127 || c < 0)
return cpp::nullopt;
return static_cast<int>(c);
}
LIBC_INLINE cpp::optional<wint_t> btowc(int c) {
if (c > 127 || c < 0)
return cpp::nullopt;
return static_cast<wint_t>(c);
}
} // namespace internal
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_SUPPORT_WCTYPE_UTILS_H

View File

@ -0,0 +1,10 @@
add_entrypoint_object(
wctob
SRCS
wctob.cpp
HDRS
wctob.h
DEPENDS
libc.src.__support.wctype_utils
)

26
libc/src/wchar/btowc.cpp Normal file
View File

@ -0,0 +1,26 @@
//===-- Implementation of btowc -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/wchar/btowc.h"
#include "src/__support/common.h"
#include "src/__support/wctype_utils.h"
#include <stdio.h> // for EOF.
namespace __llvm_libc {
LLVM_LIBC_FUNCTION(int, btowc, (wint_t c)) {
auto result = internal::btowc(c);
if (result.has_value()) {
return result.value();
} else {
return WEOF;
}
}
} // namespace __llvm_libc

20
libc/src/wchar/btowc.h Normal file
View File

@ -0,0 +1,20 @@
//===-- Implementation header for btowc -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_WCHAR_BTOWC_H
#define LLVM_LIBC_SRC_WCHAR_BTOWC_H
#include <wchar.h>
namespace __llvm_libc {
wint_t btowc(int c);
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_WCHAR_BTOWC_H

26
libc/src/wchar/wctob.cpp Normal file
View File

@ -0,0 +1,26 @@
//===-- Implementation of wctob -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/wchar/wctob.h"
#include "src/__support/common.h"
#include "src/__support/wctype_utils.h"
#include <stdio.h> // for EOF.
namespace __llvm_libc {
LLVM_LIBC_FUNCTION(int, wctob, (wint_t c)) {
auto result = internal::wctob(c);
if (result.has_value()) {
return result.value();
} else {
return EOF;
}
}
} // namespace __llvm_libc

20
libc/src/wchar/wctob.h Normal file
View File

@ -0,0 +1,20 @@
//===-- Implementation header for wctob -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_WCHAR_WCTOB_H
#define LLVM_LIBC_SRC_WCHAR_WCTOB_H
#include <wchar.h>
namespace __llvm_libc {
int wctob(wint_t c);
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_WCHAR_WCTOB_H

View File

@ -35,6 +35,7 @@ add_subdirectory(string)
add_subdirectory(stdlib)
add_subdirectory(inttypes)
add_subdirectory(stdio)
add_subdirectory(wchar)
if(${LIBC_TARGET_OS} STREQUAL "linux")
add_subdirectory(fcntl)

View File

@ -0,0 +1,21 @@
add_libc_testsuite(libc_wchar_unittests)
add_libc_unittest(
btowc_test
SUITE
libc_wchar_unittests
SRCS
btowc_test.cpp
DEPENDS
libc.src.wchar.btowc
)
add_libc_unittest(
wctob_test
SUITE
libc_wchar_unittests
SRCS
wctob_test.cpp
DEPENDS
libc.src.wchar.wctob
)

View File

@ -0,0 +1,24 @@
//===-- Unittests for btowc ---------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <wchar.h> //for WEOF
#include "src/wchar/btowc.h"
#include "test/UnitTest/Test.h"
TEST(LlvmLibcBtowc, DefaultLocale) {
// Loops through all characters, verifying that ascii returns itself and
// everything else returns WEOF.
for (int c = 0; c < 255; ++c) {
if (c < 128)
EXPECT_EQ(__llvm_libc::btowc(c), static_cast<wint_t>(c));
else
EXPECT_EQ(__llvm_libc::btowc(c), WEOF);
}
}

View File

@ -0,0 +1,24 @@
//===-- Unittests for wctob ---------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <stdio.h> //for EOF
#include "src/wchar/wctob.h"
#include "test/UnitTest/Test.h"
TEST(LlvmLibcWctob, DefaultLocale) {
// Loops through a subset of the wide characters, verifying that ascii returns
// itself and everything else returns EOF.
for (wint_t c = 0; c < 32767; ++c) {
if (c < 128)
EXPECT_EQ(__llvm_libc::wctob(c), static_cast<int>(c));
else
EXPECT_EQ(__llvm_libc::wctob(c), EOF);
}
}