[SystemZ][z/OS] Complete EBCDIC I/O support (#75212)

This patch completes the support for EBCDIC I/O support on z/OS using the autoconversion functions.
This commit is contained in:
Abhina Sree 2023-12-13 07:46:02 -05:00 committed by GitHub
parent fd8fa31c55
commit ab380c287a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 169 additions and 16 deletions

View File

@ -1,8 +1,9 @@
/* c-arcmt-test.c */
#include "clang-c/Index.h"
#include <stdlib.h>
#include "llvm/Support/AutoConvert.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if defined(_WIN32)
#include <io.h>
@ -107,6 +108,14 @@ static void flush_atexit(void) {
}
int main(int argc, const char **argv) {
#ifdef __MVS__
if (enableAutoConversion(fileno(stdout)) == -1)
fprintf(stderr, "Setting conversion on stdout failed\n");
if (enableAutoConversion(fileno(stderr)) == -1)
fprintf(stderr, "Setting conversion on stderr failed\n");
#endif
thread_info client_data;
atexit(flush_atexit);

View File

@ -8,6 +8,7 @@
#include "clang-c/Documentation.h"
#include "clang-c/Index.h"
#include "clang/Config/config.h"
#include "llvm/Support/AutoConvert.h"
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
@ -5150,6 +5151,14 @@ static void flush_atexit(void) {
int main(int argc, const char **argv) {
thread_info client_data;
#ifdef __MVS__
if (enableAutoConversion(fileno(stdout)) == -1)
fprintf(stderr, "Setting conversion on stdout failed\n");
if (enableAutoConversion(fileno(stderr)) == -1)
fprintf(stderr, "Setting conversion on stderr failed\n");
#endif
atexit(flush_atexit);
#ifdef CLANG_HAVE_LIBXML

View File

@ -15,10 +15,27 @@
#define LLVM_SUPPORT_AUTOCONVERT_H
#ifdef __MVS__
#include <_Ccsid.h>
#ifdef __cplusplus
#include <system_error>
#endif // __cplusplus
#define CCSID_IBM_1047 1047
#define CCSID_UTF_8 1208
#include <system_error>
#define CCSID_ISO8859_1 819
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
int enableAutoConversion(int FD);
int disableAutoConversion(int FD);
int restoreStdHandleAutoConversion(int FD);
int overrideAutoConversion(int FD, char *Filetag);
#ifdef __cplusplus
}
#endif // __cplusplus
#ifdef __cplusplus
namespace llvm {
/// \brief Disable the z/OS enhanced ASCII auto-conversion for the file
@ -30,10 +47,14 @@ std::error_code disableAutoConversion(int FD);
/// codepage.
std::error_code enableAutoConversion(int FD);
/// Restore the z/OS enhanced ASCII auto-conversion for the std handle.
std::error_code restoreStdHandleAutoConversion(int FD);
/// \brief Set the tag information for a file descriptor.
std::error_code setFileTag(int FD, int CCSID, bool Text);
} // namespace llvm
#endif // __cplusplus
#endif // __MVS__

View File

@ -14,21 +14,36 @@
#ifdef __MVS__
#include "llvm/Support/AutoConvert.h"
#include <cassert>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
std::error_code llvm::disableAutoConversion(int FD) {
static int savedStdHandleAutoConversionMode[3] = {-1, -1, -1};
int disableAutoConversion(int FD) {
static const struct f_cnvrt Convert = {
SETCVTOFF, // cvtcmd
0, // pccsid
(short)FT_BINARY, // fccsid
SETCVTOFF, // cvtcmd
0, // pccsid
0, // fccsid
};
if (fcntl(FD, F_CONTROL_CVT, &Convert) == -1)
return std::error_code(errno, std::generic_category());
return std::error_code();
return fcntl(FD, F_CONTROL_CVT, &Convert);
}
std::error_code llvm::enableAutoConversion(int FD) {
int restoreStdHandleAutoConversion(int FD) {
assert(FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO);
if (savedStdHandleAutoConversionMode[FD] == -1)
return 0;
struct f_cnvrt Cvt = {
savedStdHandleAutoConversionMode[FD], // cvtcmd
0, // pccsid
0, // fccsid
};
return (fcntl(FD, F_CONTROL_CVT, &Cvt));
}
int enableAutoConversion(int FD) {
struct f_cnvrt Query = {
QUERYCVT, // cvtcmd
0, // pccsid
@ -36,17 +51,53 @@ std::error_code llvm::enableAutoConversion(int FD) {
};
if (fcntl(FD, F_CONTROL_CVT, &Query) == -1)
return std::error_code(errno, std::generic_category());
return -1;
// We don't need conversion for UTF-8 tagged files.
// TODO: Remove the assumption of ISO8859-1 = UTF-8 here when we fully resolve
// problems related to UTF-8 tagged source files.
// When the pccsid is not ISO8859-1, autoconversion is still needed.
if (Query.pccsid == CCSID_ISO8859_1 &&
(Query.fccsid == CCSID_UTF_8 || Query.fccsid == CCSID_ISO8859_1))
return 0;
// Save the state of std handles before we make changes to it.
if ((FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO) &&
savedStdHandleAutoConversionMode[FD] == -1)
savedStdHandleAutoConversionMode[FD] = Query.cvtcmd;
if (FD == STDOUT_FILENO || FD == STDERR_FILENO)
Query.cvtcmd = SETCVTON;
else
Query.cvtcmd = SETCVTALL;
Query.cvtcmd = SETCVTALL;
Query.pccsid =
(FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO)
? 0
: CCSID_UTF_8;
// Assume untagged files to be IBM-1047 encoded.
Query.fccsid = (Query.fccsid == FT_UNTAGGED) ? CCSID_IBM_1047 : Query.fccsid;
if (fcntl(FD, F_CONTROL_CVT, &Query) == -1)
return fcntl(FD, F_CONTROL_CVT, &Query);
}
std::error_code llvm::disableAutoConversion(int FD) {
if (::disableAutoConversion(FD) == -1)
return std::error_code(errno, std::generic_category());
return std::error_code();
}
std::error_code llvm::enableAutoConversion(int FD) {
if (::enableAutoConversion(FD) == -1)
return std::error_code(errno, std::generic_category());
return std::error_code();
}
std::error_code llvm::restoreStdHandleAutoConversion(int FD) {
if (::restoreStdHandleAutoConversion(FD) == -1)
return std::error_code(errno, std::generic_category());
return std::error_code();
}

View File

@ -8,6 +8,8 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/AutoConvert.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/PrettyStackTrace.h"
@ -15,15 +17,31 @@
#include "llvm/Support/SwapByteOrder.h"
#ifdef _WIN32
#include "llvm/Support/Error.h"
#include "llvm/Support/Windows/WindowsSupport.h"
#endif
#ifdef __MVS__
#include <unistd.h>
void CleanupStdHandles(void *Cookie) {
llvm::raw_ostream *Outs = &llvm::outs(), *Errs = &llvm::errs();
Outs->flush();
Errs->flush();
llvm::restoreStdHandleAutoConversion(STDIN_FILENO);
llvm::restoreStdHandleAutoConversion(STDOUT_FILENO);
llvm::restoreStdHandleAutoConversion(STDERR_FILENO);
}
#endif
using namespace llvm;
using namespace llvm::sys;
InitLLVM::InitLLVM(int &Argc, const char **&Argv,
bool InstallPipeSignalExitHandler) {
#ifdef __MVS__
// Bring stdin/stdout/stderr into a known state.
sys::AddSignalHandler(CleanupStdHandles, nullptr);
#endif
if (InstallPipeSignalExitHandler)
// The pipe signal handler must be installed before any other handlers are
// registered. This is because the Unix \ref RegisterHandlers function does
@ -37,6 +55,20 @@ InitLLVM::InitLLVM(int &Argc, const char **&Argv,
sys::PrintStackTraceOnErrorSignal(Argv[0]);
install_out_of_memory_new_handler();
#ifdef __MVS__
// We use UTF-8 as the internal character encoding. On z/OS, all external
// output is encoded in EBCDIC. In order to be able to read all
// error messages, we turn conversion to EBCDIC on for stderr fd.
std::string Banner = std::string(Argv[0]) + ": ";
ExitOnError ExitOnErr(Banner);
// If turning on conversion for stderr fails then the error message
// may be garbled. There is no solution to this problem.
ExitOnErr(errorCodeToError(llvm::enableAutoConversion(STDERR_FILENO)));
ExitOnErr(errorCodeToError(llvm::enableAutoConversion(STDOUT_FILENO)));
#endif
#ifdef _WIN32
// We use UTF-8 as the internal character encoding. On Windows,
// arguments passed to main() may not be encoded in UTF-8. In order
@ -61,4 +93,9 @@ InitLLVM::InitLLVM(int &Argc, const char **&Argv,
#endif
}
InitLLVM::~InitLLVM() { llvm_shutdown(); }
InitLLVM::~InitLLVM() {
#ifdef __MVS__
CleanupStdHandles(nullptr);
#endif
llvm_shutdown();
}

View File

@ -20,6 +20,7 @@
#include "Unix.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/config.h"
#include "llvm/Support/AutoConvert.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
@ -521,8 +522,12 @@ std::error_code llvm::sys::ChangeStdoutMode(fs::OpenFlags Flags) {
}
std::error_code llvm::sys::ChangeStdinToBinary() {
#ifdef __MVS__
return disableAutoConversion(STDIN_FILENO);
#else
// Do nothing, as Unix doesn't differentiate between text and binary.
return std::error_code();
#endif
}
std::error_code llvm::sys::ChangeStdoutToBinary() {

View File

@ -13,6 +13,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/config.h"
#include "llvm/Support/AutoConvert.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Duration.h"
#include "llvm/Support/ErrorHandling.h"
@ -895,6 +896,10 @@ void raw_fd_ostream::anchor() {}
raw_fd_ostream &llvm::outs() {
// Set buffer settings to model stdout behavior.
std::error_code EC;
#ifdef __MVS__
EC = enableAutoConversion(STDOUT_FILENO);
assert(!EC);
#endif
static raw_fd_ostream S("-", EC, sys::fs::OF_None);
assert(!EC);
return S;
@ -902,6 +907,10 @@ raw_fd_ostream &llvm::outs() {
raw_fd_ostream &llvm::errs() {
// Set standard error to be unbuffered and tied to outs() by default.
#ifdef __MVS__
std::error_code EC = enableAutoConversion(STDOUT_FILENO);
assert(!EC);
#endif
static raw_fd_ostream S(STDERR_FILENO, false, true);
return S;
}

View File

@ -1,3 +1,7 @@
set(LLVM_LINK_COMPONENTS
support
)
add_llvm_utility(count
count.c
)

View File

@ -6,10 +6,18 @@
*
\*===----------------------------------------------------------------------===*/
#include <stdlib.h>
#include "llvm/Support/AutoConvert.h"
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char **argv) {
#ifdef __MVS__
if (enableAutoConversion(fileno(stdin)) == -1)
fprintf(stderr, "Setting conversion on stdin failed\n");
if (enableAutoConversion(fileno(stderr)) == -1)
fprintf(stdout, "Setting conversion on stderr failed\n");
#endif
size_t Count, NumLines, NumRead;
char Buffer[4096], *End;