Add getenv() wrapper that works on multibyte environment variable.

On Windows, character encoding of multibyte environment variable varies
depending on settings. The only reliable way to handle it I think is to use
GetEnvironmentVariableW().

GetEnvironmentVariableW() works on wchar_t string, which is on Windows UTF16
string. That's not ideal because we use UTF-8 as the internal encoding in LLVM.
This patch defines a wrapper function which takes and returns UTF-8 string for
GetEnvironmentVariableW().

The wrapper function does not do any conversion and just forwards the argument
to getenv() on Unix.

Differential Revision: http://llvm-reviews.chandlerc.com/D1612

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190423 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Rui Ueyama 2013-09-10 19:45:51 +00:00
parent 8e12d95d15
commit f42d4247ae
6 changed files with 148 additions and 56 deletions

View File

@ -25,11 +25,14 @@
#ifndef LLVM_SUPPORT_PROCESS_H
#define LLVM_SUPPORT_PROCESS_H
#include "llvm/ADT/Optional.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/TimeValue.h"
namespace llvm {
class StringRef;
namespace sys {
class self_process;
@ -161,6 +164,10 @@ public:
/// @brief Prevent core file generation.
static void PreventCoreFiles();
// This function returns the environment variable \arg name's value as a UTF-8
// string. \arg Name is assumed to be in UTF-8 encoding too.
static Optional<std::string> GetEnv(StringRef name);
/// This function determines if the standard input is connected directly
/// to a user's input (keyboard probably), rather than coming from a file
/// or pipe.

View File

@ -13,6 +13,7 @@
#include "Unix.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/TimeValue.h"
@ -181,6 +182,14 @@ void Process::PreventCoreFiles() {
#endif
}
Optional<std::string> Process::GetEnv(StringRef Name) {
std::string NameStr = Name.str();
const char *Val = ::getenv(NameStr.c_str());
if (!Val)
return None;
return std::string(Val);
}
bool Process::StandardInIsUserInput() {
return FileDescriptorIsDisplayed(STDIN_FILENO);
}

View File

@ -37,6 +37,9 @@ typedef int errno_t;
using namespace llvm;
using llvm::sys::windows::UTF8ToUTF16;
using llvm::sys::windows::UTF16ToUTF8;
namespace {
typedef BOOLEAN (WINAPI *PtrCreateSymbolicLinkW)(
/*__in*/ LPCWSTR lpSymlinkFileName,
@ -47,61 +50,6 @@ namespace {
::GetProcAddress(::GetModuleHandleA("kernel32.dll"),
"CreateSymbolicLinkW"));
error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16) {
int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
utf8.begin(), utf8.size(),
utf16.begin(), 0);
if (len == 0)
return windows_error(::GetLastError());
utf16.reserve(len + 1);
utf16.set_size(len);
len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
utf8.begin(), utf8.size(),
utf16.begin(), utf16.size());
if (len == 0)
return windows_error(::GetLastError());
// Make utf16 null terminated.
utf16.push_back(0);
utf16.pop_back();
return error_code::success();
}
error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
SmallVectorImpl<char> &utf8) {
// Get length.
int len = ::WideCharToMultiByte(CP_UTF8, 0,
utf16, utf16_len,
utf8.begin(), 0,
NULL, NULL);
if (len == 0)
return windows_error(::GetLastError());
utf8.reserve(len);
utf8.set_size(len);
// Now do the actual conversion.
len = ::WideCharToMultiByte(CP_UTF8, 0,
utf16, utf16_len,
utf8.data(), utf8.size(),
NULL, NULL);
if (len == 0)
return windows_error(::GetLastError());
// Make utf8 null terminated.
utf8.push_back(0);
utf8.pop_back();
return error_code::success();
}
error_code TempDir(SmallVectorImpl<wchar_t> &result) {
retry_temp_dir:
DWORD len = ::GetTempPathW(result.capacity(), result.begin());
@ -1092,7 +1040,64 @@ error_code openFileForWrite(const Twine &Name, int &ResultFD,
ResultFD = FD;
return error_code::success();
}
} // end namespace fs
namespace windows {
llvm::error_code UTF8ToUTF16(llvm::StringRef utf8,
llvm::SmallVectorImpl<wchar_t> &utf16) {
int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
utf8.begin(), utf8.size(),
utf16.begin(), 0);
if (len == 0)
return llvm::windows_error(::GetLastError());
utf16.reserve(len + 1);
utf16.set_size(len);
len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
utf8.begin(), utf8.size(),
utf16.begin(), utf16.size());
if (len == 0)
return llvm::windows_error(::GetLastError());
// Make utf16 null terminated.
utf16.push_back(0);
utf16.pop_back();
return llvm::error_code::success();
}
llvm::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
llvm::SmallVectorImpl<char> &utf8) {
// Get length.
int len = ::WideCharToMultiByte(CP_UTF8, 0,
utf16, utf16_len,
utf8.begin(), 0,
NULL, NULL);
if (len == 0)
return llvm::windows_error(::GetLastError());
utf8.reserve(len);
utf8.set_size(len);
// Now do the actual conversion.
len = ::WideCharToMultiByte(CP_UTF8, 0,
utf16, utf16_len,
utf8.data(), utf8.size(),
NULL, NULL);
if (len == 0)
return llvm::windows_error(::GetLastError());
// Make utf8 null terminated.
utf8.push_back(0);
utf8.pop_back();
return llvm::error_code::success();
}
} // end namespace windows
} // end namespace sys
} // end namespace llvm

View File

@ -140,6 +140,36 @@ void Process::PreventCoreFiles() {
SEM_NOOPENFILEERRORBOX);
}
/// Returns the environment variable \arg Name's value as a string encoded in
/// UTF-8. \arg Name is assumed to be in UTF-8 encoding.
Optional<std::string> Process::GetEnv(StringRef Name) {
// Convert the argument to UTF-16 to pass it to _wgetenv().
SmallVector<wchar_t, 128> NameUTF16;
if (error_code ec = windows::UTF8ToUTF16(Name, NameUTF16))
return None;
// Environment variable can be encoded in non-UTF8 encoding, and there's no
// way to know what the encoding is. The only reliable way to look up
// multibyte environment variable is to use GetEnvironmentVariableW().
std::vector<wchar_t> Buf(16);
size_t Size = 0;
for (;;) {
Size = GetEnvironmentVariableW(&NameUTF16[0], &Buf[0], Buf.size());
if (Size < Buf.size())
break;
// Try again with larger buffer.
Buf.resize(Size + 1);
}
if (Size == 0)
return None;
// Convert the result from UTF-16 to UTF-8.
SmallVector<char, 128> Res;
if (error_code ec = windows::UTF16ToUTF8(&Buf[0], Size, Res))
return None;
return std::string(&Res[0]);
}
bool Process::StandardInIsUserInput() {
return FileDescriptorIsDisplayed(0);
}

View File

@ -24,13 +24,17 @@
#define _WIN32_IE 0x0600 // MinGW at it again.
#define WIN32_LEAN_AND_MEAN
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h" // Get build system configuration settings
#include "llvm/Support/Compiler.h"
#include "llvm/Support/system_error.h"
#include <windows.h>
#include <wincrypt.h>
#include <shlobj.h>
#include <cassert>
#include <string>
#include <vector>
inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
if (!ErrMsg)
@ -148,4 +152,13 @@ c_str(SmallVectorImpl<T> &str) {
str.pop_back();
return str.data();
}
namespace sys {
namespace windows {
error_code UTF8ToUTF16(StringRef utf8,
SmallVectorImpl<wchar_t> &utf16);
error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
SmallVectorImpl<char> &utf8);
} // end namespace windows
} // end namespace sys
} // end namespace llvm.

View File

@ -39,4 +39,32 @@ TEST(ProcessTest, SelfProcess) {
EXPECT_GT(TimeValue::MaxTime, process::get_self()->get_wall_time());
}
#ifdef LLVM_ON_WIN32
#define setenv(name, var, ignore) _putenv_s(name, var)
#endif
#if HAVE_SETENV || defined(LLVM_ON_WIN32)
TEST(ProcessTest, Basic) {
setenv("__LLVM_TEST_ENVIRON_VAR__", "abc", true);
Optional<std::string> val(Process::GetEnv("__LLVM_TEST_ENVIRON_VAR__"));
EXPECT_TRUE(val.hasValue());
EXPECT_STREQ("abc", val->c_str());
}
TEST(ProcessTest, None) {
Optional<std::string> val(
Process::GetEnv("__LLVM_TEST_ENVIRON_NO_SUCH_VAR__"));
EXPECT_FALSE(val.hasValue());
}
#endif
#ifdef LLVM_ON_WIN32
TEST(ProcessTest, Wchar) {
SetEnvironmentVariableW(L"__LLVM_TEST_ENVIRON_VAR__", L"abcdefghijklmnopqrs");
Optional<std::string> val(Process::GetEnv("__LLVM_TEST_ENVIRON_VAR__"));
EXPECT_TRUE(val.hasValue());
EXPECT_STREQ("abcdefghijklmnopqrs", val->c_str());
}
#endif
} // end anonymous namespace