xemu/util/oslib-win32.c

595 lines
13 KiB
C
Raw Normal View History

/*
* os-win32.c
*
* Copyright (c) 2003-2008 Fabrice Bellard
* Copyright (c) 2010-2016 Red Hat, Inc.
*
* QEMU library functions for win32 which are shared between QEMU and
* the QEMU tools.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
* The implementation of g_poll (functions poll_rest, g_poll) at the end of
* this file are based on code from GNOME glib-2 and use a different license,
* see the license comment there.
*/
#include "qemu/osdep.h"
#include <windows.h>
2016-03-14 08:01:28 +00:00
#include "qapi/error.h"
#include "qemu/main-loop.h"
#include "trace.h"
#include "qemu/sockets.h"
#include "qemu/cutils.h"
#include "qemu/error-report.h"
#include <malloc.h>
/* this must come after including "trace.h" */
#include <shlobj.h>
util/oslib-win32: indicate alignment for qemu_anon_ram_alloc() Let's set the alignment just like for the posix variant. This will implicitly set the alignment of the underlying memory region and therefore make memory_region_get_alignment(mr) return something > 0 for all memory backends applicable to PCDIMM/NVDIMM. The allocation granularity is ususally 64k, while the page size is 4k. The documentation of VirtualAlloc is not really comprehensible in case only MEM_COMMIT is specified without an address. We'll detect the actual values and then go for the bigger one. The expection is, that it will always be 64k aligned. (The assumption is that MEM_COMMIT does an implicit MEM_RESERVE, so the address will always be aligned to the allocation granularity. And the allocation granularity is always bigger than the page size). This will allow us to drop special handling in pc.c for memory_region_get_alignment(mr) == 0, as we can then assume that it is always set (and AFAICS >= getpagesize()). For pc in pc_memory_plug(), under Windows TARGET_PAGE_SIZE == getpagesize(), therefore alignment of DIMMs will not change, and therefore also not the guest physical memory layout. For spapr in spapr_memory_plug(), an alignment of 0 would have been used until now. As QEMU_ALIGN_UP will crash with the alignment being 0, this never worked, so we don't have to care about compatibility handling. Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: David Hildenbrand <david@redhat.com> Message-Id: <20180801133444.11269-3-david@redhat.com> Reviewed-by: Igor Mammedov <imammedo@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-08-01 13:34:42 +00:00
static int get_allocation_granularity(void)
{
SYSTEM_INFO system_info;
GetSystemInfo(&system_info);
return system_info.dwAllocationGranularity;
}
void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
bool noreserve)
{
void *ptr;
if (noreserve) {
/*
* We need a MEM_COMMIT before accessing any memory in a MEM_RESERVE
* area; we cannot easily mimic POSIX MAP_NORESERVE semantics.
*/
error_report("Skipping reservation of swap space is not supported.");
return NULL;
}
ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
trace_qemu_anon_ram_alloc(size, ptr);
util/oslib-win32: indicate alignment for qemu_anon_ram_alloc() Let's set the alignment just like for the posix variant. This will implicitly set the alignment of the underlying memory region and therefore make memory_region_get_alignment(mr) return something > 0 for all memory backends applicable to PCDIMM/NVDIMM. The allocation granularity is ususally 64k, while the page size is 4k. The documentation of VirtualAlloc is not really comprehensible in case only MEM_COMMIT is specified without an address. We'll detect the actual values and then go for the bigger one. The expection is, that it will always be 64k aligned. (The assumption is that MEM_COMMIT does an implicit MEM_RESERVE, so the address will always be aligned to the allocation granularity. And the allocation granularity is always bigger than the page size). This will allow us to drop special handling in pc.c for memory_region_get_alignment(mr) == 0, as we can then assume that it is always set (and AFAICS >= getpagesize()). For pc in pc_memory_plug(), under Windows TARGET_PAGE_SIZE == getpagesize(), therefore alignment of DIMMs will not change, and therefore also not the guest physical memory layout. For spapr in spapr_memory_plug(), an alignment of 0 would have been used until now. As QEMU_ALIGN_UP will crash with the alignment being 0, this never worked, so we don't have to care about compatibility handling. Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: David Hildenbrand <david@redhat.com> Message-Id: <20180801133444.11269-3-david@redhat.com> Reviewed-by: Igor Mammedov <imammedo@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-08-01 13:34:42 +00:00
if (ptr && align) {
*align = MAX(get_allocation_granularity(), getpagesize());
}
return ptr;
}
void qemu_anon_ram_free(void *ptr, size_t size)
{
trace_qemu_anon_ram_free(ptr, size);
if (ptr) {
VirtualFree(ptr, 0, MEM_RELEASE);
}
}
win32: Simplify gmtime_r detection not depends on if _POSIX_C_SOURCE are defined on msys2/mingw We remove the CONFIG_LOCALTIME_R detection option in configure, and move the check existence of gmtime_r from configure into C header and source directly by using macro `_POSIX_THREAD_SAFE_FUNCTIONS`. Before this patch, the configure script are always assume the compiler doesn't define _POSIX_C_SOURCE macro at all, but that's not true, because thirdparty library such as ncursesw may define -D_POSIX_C_SOURCE in it's pkg-config file. And that C Flags will added -D_POSIX_C_SOURCE into each QEMU_CFLAGS. And that's causing the following compiling error: n file included from C:/work/xemu/qemu/include/qemu/osdep.h:119, from ../softmmu/main.c:25: C:/work/xemu/qemu/include/sysemu/os-win32.h:53:12: error: redundant redeclaration of 'gmtime_r' [-Werror=redundant-decls] 53 | struct tm *gmtime_r(const time_t *timep, struct tm *result); | ^~~~~~~~ In file included from C:/work/xemu/qemu/include/qemu/osdep.h:94, from ../softmmu/main.c:25: C:/CI-Tools/msys64/mingw64/x86_64-w64-mingw32/include/time.h:284:36: note: previous definition of 'gmtime_r' was here 284 | __forceinline struct tm *__CRTDECL gmtime_r(const time_t *_Time, struct tm *_Tm) { | ^~~~~~~~ In file included from C:/work/xemu/qemu/include/qemu/osdep.h:119, from ../softmmu/main.c:25: C:/work/xemu/qemu/include/sysemu/os-win32.h:55:12: error: redundant redeclaration of 'localtime_r' [-Werror=redundant-decls] 55 | struct tm *localtime_r(const time_t *timep, struct tm *result); | ^~~~~~~~~~~ In file included from C:/work/xemu/qemu/include/qemu/osdep.h:94, from ../softmmu/main.c:25: C:/CI-Tools/msys64/mingw64/x86_64-w64-mingw32/include/time.h:281:36: note: previous definition of 'localtime_r' was here 281 | __forceinline struct tm *__CRTDECL localtime_r(const time_t *_Time, struct tm *_Tm) { | ^~~~~~~~~~~ Compiling C object libcommon.fa.p/hw_gpio_zaurus.c.obj In file included from C:/work/xemu/qemu/include/qemu/osdep.h:119, from ../hw/i2c/smbus_slave.c:16: C:/work/xemu/qemu/include/sysemu/os-win32.h:53:12: error: redundant redeclaration of 'gmtime_r' [-Werror=redundant-decls] 53 | struct tm *gmtime_r(const time_t *timep, struct tm *result); | ^~~~~~~~ In file included from C:/work/xemu/qemu/include/qemu/osdep.h:94, from ../hw/i2c/smbus_slave.c:16: C:/CI-Tools/msys64/mingw64/x86_64-w64-mingw32/include/time.h:284:36: note: previous definition of 'gmtime_r' was here 284 | __forceinline struct tm *__CRTDECL gmtime_r(const time_t *_Time, struct tm *_Tm) { | ^~~~~~~~ In file included from C:/work/xemu/qemu/include/qemu/osdep.h:119, from ../hw/i2c/smbus_slave.c:16: C:/work/xemu/qemu/include/sysemu/os-win32.h:55:12: error: redundant redeclaration of 'localtime_r' [-Werror=redundant-decls] 55 | struct tm *localtime_r(const time_t *timep, struct tm *result); | ^~~~~~~~~~~ In file included from C:/work/xemu/qemu/include/qemu/osdep.h:94, from ../hw/i2c/smbus_slave.c:16: C:/CI-Tools/msys64/mingw64/x86_64-w64-mingw32/include/time.h:281:36: note: previous definition of 'localtime_r' was here 281 | __forceinline struct tm *__CRTDECL localtime_r(const time_t *_Time, struct tm *_Tm) { | ^~~~~~~~~~~ Compiling C object libcommon.fa.p/hw_dma_xilinx_axidma.c.obj After this patch, whenever ncursesw or other thirdparty libraries tried to define or not define _POSIX_C_SOURCE, the source will building properly. Because now, we don't make any assumption if _POSIX_C_SOURCE are defined. We solely relied on if the macro `_POSIX_THREAD_SAFE_FUNCTIONS` are defined in msys2/mingw header. The _POSIX_THREAD_SAFE_FUNCTIONS are defined in mingw header like this: ``` #if defined(_POSIX_C_SOURCE) && !defined(_POSIX_THREAD_SAFE_FUNCTIONS) #define _POSIX_THREAD_SAFE_FUNCTIONS 200112L #endif #ifdef _POSIX_THREAD_SAFE_FUNCTIONS __forceinline struct tm *__CRTDECL localtime_r(const time_t *_Time, struct tm *_Tm) { return localtime_s(_Tm, _Time) ? NULL : _Tm; } __forceinline struct tm *__CRTDECL gmtime_r(const time_t *_Time, struct tm *_Tm) { return gmtime_s(_Tm, _Time) ? NULL : _Tm; } __forceinline char *__CRTDECL ctime_r(const time_t *_Time, char *_Str) { return ctime_s(_Str, 0x7fffffff, _Time) ? NULL : _Str; } __forceinline char *__CRTDECL asctime_r(const struct tm *_Tm, char * _Str) { return asctime_s(_Str, 0x7fffffff, _Tm) ? NULL : _Str; } #endif ``` Signed-off-by: Yonggang Luo <luoyonggang@gmail.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Message-id: 20201012234348.1427-5-luoyonggang@gmail.com Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2020-10-12 23:43:47 +00:00
#ifndef _POSIX_THREAD_SAFE_FUNCTIONS
/* FIXME: add proper locking */
struct tm *gmtime_r(const time_t *timep, struct tm *result)
{
struct tm *p = gmtime(timep);
memset(result, 0, sizeof(*result));
if (p) {
*result = *p;
p = result;
}
return p;
}
/* FIXME: add proper locking */
struct tm *localtime_r(const time_t *timep, struct tm *result)
{
struct tm *p = localtime(timep);
memset(result, 0, sizeof(*result));
if (p) {
*result = *p;
p = result;
}
return p;
}
win32: Simplify gmtime_r detection not depends on if _POSIX_C_SOURCE are defined on msys2/mingw We remove the CONFIG_LOCALTIME_R detection option in configure, and move the check existence of gmtime_r from configure into C header and source directly by using macro `_POSIX_THREAD_SAFE_FUNCTIONS`. Before this patch, the configure script are always assume the compiler doesn't define _POSIX_C_SOURCE macro at all, but that's not true, because thirdparty library such as ncursesw may define -D_POSIX_C_SOURCE in it's pkg-config file. And that C Flags will added -D_POSIX_C_SOURCE into each QEMU_CFLAGS. And that's causing the following compiling error: n file included from C:/work/xemu/qemu/include/qemu/osdep.h:119, from ../softmmu/main.c:25: C:/work/xemu/qemu/include/sysemu/os-win32.h:53:12: error: redundant redeclaration of 'gmtime_r' [-Werror=redundant-decls] 53 | struct tm *gmtime_r(const time_t *timep, struct tm *result); | ^~~~~~~~ In file included from C:/work/xemu/qemu/include/qemu/osdep.h:94, from ../softmmu/main.c:25: C:/CI-Tools/msys64/mingw64/x86_64-w64-mingw32/include/time.h:284:36: note: previous definition of 'gmtime_r' was here 284 | __forceinline struct tm *__CRTDECL gmtime_r(const time_t *_Time, struct tm *_Tm) { | ^~~~~~~~ In file included from C:/work/xemu/qemu/include/qemu/osdep.h:119, from ../softmmu/main.c:25: C:/work/xemu/qemu/include/sysemu/os-win32.h:55:12: error: redundant redeclaration of 'localtime_r' [-Werror=redundant-decls] 55 | struct tm *localtime_r(const time_t *timep, struct tm *result); | ^~~~~~~~~~~ In file included from C:/work/xemu/qemu/include/qemu/osdep.h:94, from ../softmmu/main.c:25: C:/CI-Tools/msys64/mingw64/x86_64-w64-mingw32/include/time.h:281:36: note: previous definition of 'localtime_r' was here 281 | __forceinline struct tm *__CRTDECL localtime_r(const time_t *_Time, struct tm *_Tm) { | ^~~~~~~~~~~ Compiling C object libcommon.fa.p/hw_gpio_zaurus.c.obj In file included from C:/work/xemu/qemu/include/qemu/osdep.h:119, from ../hw/i2c/smbus_slave.c:16: C:/work/xemu/qemu/include/sysemu/os-win32.h:53:12: error: redundant redeclaration of 'gmtime_r' [-Werror=redundant-decls] 53 | struct tm *gmtime_r(const time_t *timep, struct tm *result); | ^~~~~~~~ In file included from C:/work/xemu/qemu/include/qemu/osdep.h:94, from ../hw/i2c/smbus_slave.c:16: C:/CI-Tools/msys64/mingw64/x86_64-w64-mingw32/include/time.h:284:36: note: previous definition of 'gmtime_r' was here 284 | __forceinline struct tm *__CRTDECL gmtime_r(const time_t *_Time, struct tm *_Tm) { | ^~~~~~~~ In file included from C:/work/xemu/qemu/include/qemu/osdep.h:119, from ../hw/i2c/smbus_slave.c:16: C:/work/xemu/qemu/include/sysemu/os-win32.h:55:12: error: redundant redeclaration of 'localtime_r' [-Werror=redundant-decls] 55 | struct tm *localtime_r(const time_t *timep, struct tm *result); | ^~~~~~~~~~~ In file included from C:/work/xemu/qemu/include/qemu/osdep.h:94, from ../hw/i2c/smbus_slave.c:16: C:/CI-Tools/msys64/mingw64/x86_64-w64-mingw32/include/time.h:281:36: note: previous definition of 'localtime_r' was here 281 | __forceinline struct tm *__CRTDECL localtime_r(const time_t *_Time, struct tm *_Tm) { | ^~~~~~~~~~~ Compiling C object libcommon.fa.p/hw_dma_xilinx_axidma.c.obj After this patch, whenever ncursesw or other thirdparty libraries tried to define or not define _POSIX_C_SOURCE, the source will building properly. Because now, we don't make any assumption if _POSIX_C_SOURCE are defined. We solely relied on if the macro `_POSIX_THREAD_SAFE_FUNCTIONS` are defined in msys2/mingw header. The _POSIX_THREAD_SAFE_FUNCTIONS are defined in mingw header like this: ``` #if defined(_POSIX_C_SOURCE) && !defined(_POSIX_THREAD_SAFE_FUNCTIONS) #define _POSIX_THREAD_SAFE_FUNCTIONS 200112L #endif #ifdef _POSIX_THREAD_SAFE_FUNCTIONS __forceinline struct tm *__CRTDECL localtime_r(const time_t *_Time, struct tm *_Tm) { return localtime_s(_Tm, _Time) ? NULL : _Tm; } __forceinline struct tm *__CRTDECL gmtime_r(const time_t *_Time, struct tm *_Tm) { return gmtime_s(_Tm, _Time) ? NULL : _Tm; } __forceinline char *__CRTDECL ctime_r(const time_t *_Time, char *_Str) { return ctime_s(_Str, 0x7fffffff, _Time) ? NULL : _Str; } __forceinline char *__CRTDECL asctime_r(const struct tm *_Tm, char * _Str) { return asctime_s(_Str, 0x7fffffff, _Tm) ? NULL : _Str; } #endif ``` Signed-off-by: Yonggang Luo <luoyonggang@gmail.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Message-id: 20201012234348.1427-5-luoyonggang@gmail.com Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2020-10-12 23:43:47 +00:00
#endif /* _POSIX_THREAD_SAFE_FUNCTIONS */
static int socket_error(void)
{
switch (WSAGetLastError()) {
case 0:
return 0;
case WSAEINTR:
return EINTR;
case WSAEINVAL:
return EINVAL;
case WSA_INVALID_HANDLE:
return EBADF;
case WSA_NOT_ENOUGH_MEMORY:
return ENOMEM;
case WSA_INVALID_PARAMETER:
return EINVAL;
case WSAENAMETOOLONG:
return ENAMETOOLONG;
case WSAENOTEMPTY:
return ENOTEMPTY;
case WSAEWOULDBLOCK:
/* not using EWOULDBLOCK as we don't want code to have
* to check both EWOULDBLOCK and EAGAIN */
return EAGAIN;
case WSAEINPROGRESS:
return EINPROGRESS;
case WSAEALREADY:
return EALREADY;
case WSAENOTSOCK:
return ENOTSOCK;
case WSAEDESTADDRREQ:
return EDESTADDRREQ;
case WSAEMSGSIZE:
return EMSGSIZE;
case WSAEPROTOTYPE:
return EPROTOTYPE;
case WSAENOPROTOOPT:
return ENOPROTOOPT;
case WSAEPROTONOSUPPORT:
return EPROTONOSUPPORT;
case WSAEOPNOTSUPP:
return EOPNOTSUPP;
case WSAEAFNOSUPPORT:
return EAFNOSUPPORT;
case WSAEADDRINUSE:
return EADDRINUSE;
case WSAEADDRNOTAVAIL:
return EADDRNOTAVAIL;
case WSAENETDOWN:
return ENETDOWN;
case WSAENETUNREACH:
return ENETUNREACH;
case WSAENETRESET:
return ENETRESET;
case WSAECONNABORTED:
return ECONNABORTED;
case WSAECONNRESET:
return ECONNRESET;
case WSAENOBUFS:
return ENOBUFS;
case WSAEISCONN:
return EISCONN;
case WSAENOTCONN:
return ENOTCONN;
case WSAETIMEDOUT:
return ETIMEDOUT;
case WSAECONNREFUSED:
return ECONNREFUSED;
case WSAELOOP:
return ELOOP;
case WSAEHOSTUNREACH:
return EHOSTUNREACH;
default:
return EIO;
}
}
void qemu_socket_set_block(int fd)
net: check if the file descriptor is valid before using it qemu_set_nonblock() checks that the file descriptor can be used and, if not, crashes QEMU. An assert() is used for that. The use of assert() is used to detect programming error and the coredump will allow to debug the problem. But in the case of the tap device, this assert() can be triggered by a misconfiguration by the user. At startup, it's not a real problem, but it can also happen during the hot-plug of a new device, and here it's a problem because we can crash a perfectly healthy system. For instance: # ip link add link virbr0 name macvtap0 type macvtap mode bridge # ip link set macvtap0 up # TAP=/dev/tap$(ip -o link show macvtap0 | cut -d: -f1) # qemu-system-x86_64 -machine q35 -device pcie-root-port,id=pcie-root-port-0 -monitor stdio 9<> $TAP (qemu) netdev_add type=tap,id=hostnet0,vhost=on,fd=9 (qemu) device_add driver=virtio-net-pci,netdev=hostnet0,id=net0,bus=pcie-root-port-0 (qemu) device_del net0 (qemu) netdev_del hostnet0 (qemu) netdev_add type=tap,id=hostnet1,vhost=on,fd=9 qemu-system-x86_64: .../util/oslib-posix.c:247: qemu_set_nonblock: Assertion `f != -1' failed. Aborted (core dumped) To avoid that, add a function, qemu_try_set_nonblock(), that allows to report the problem without crashing. In the same way, we also update the function for vhostfd in net_init_tap_one() and for fd in net_init_socket() (both descriptors are provided by the user and can be wrong). Signed-off-by: Laurent Vivier <lvivier@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com>
2020-07-07 18:45:14 +00:00
{
unsigned long opt = 0;
WSAEventSelect(fd, NULL, 0);
ioctlsocket(fd, FIONBIO, &opt);
}
int qemu_socket_try_set_nonblock(int fd)
net: check if the file descriptor is valid before using it qemu_set_nonblock() checks that the file descriptor can be used and, if not, crashes QEMU. An assert() is used for that. The use of assert() is used to detect programming error and the coredump will allow to debug the problem. But in the case of the tap device, this assert() can be triggered by a misconfiguration by the user. At startup, it's not a real problem, but it can also happen during the hot-plug of a new device, and here it's a problem because we can crash a perfectly healthy system. For instance: # ip link add link virbr0 name macvtap0 type macvtap mode bridge # ip link set macvtap0 up # TAP=/dev/tap$(ip -o link show macvtap0 | cut -d: -f1) # qemu-system-x86_64 -machine q35 -device pcie-root-port,id=pcie-root-port-0 -monitor stdio 9<> $TAP (qemu) netdev_add type=tap,id=hostnet0,vhost=on,fd=9 (qemu) device_add driver=virtio-net-pci,netdev=hostnet0,id=net0,bus=pcie-root-port-0 (qemu) device_del net0 (qemu) netdev_del hostnet0 (qemu) netdev_add type=tap,id=hostnet1,vhost=on,fd=9 qemu-system-x86_64: .../util/oslib-posix.c:247: qemu_set_nonblock: Assertion `f != -1' failed. Aborted (core dumped) To avoid that, add a function, qemu_try_set_nonblock(), that allows to report the problem without crashing. In the same way, we also update the function for vhostfd in net_init_tap_one() and for fd in net_init_socket() (both descriptors are provided by the user and can be wrong). Signed-off-by: Laurent Vivier <lvivier@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com>
2020-07-07 18:45:14 +00:00
{
unsigned long opt = 1;
if (ioctlsocket(fd, FIONBIO, &opt) != NO_ERROR) {
return -socket_error();
}
return 0;
}
void qemu_socket_set_nonblock(int fd)
net: check if the file descriptor is valid before using it qemu_set_nonblock() checks that the file descriptor can be used and, if not, crashes QEMU. An assert() is used for that. The use of assert() is used to detect programming error and the coredump will allow to debug the problem. But in the case of the tap device, this assert() can be triggered by a misconfiguration by the user. At startup, it's not a real problem, but it can also happen during the hot-plug of a new device, and here it's a problem because we can crash a perfectly healthy system. For instance: # ip link add link virbr0 name macvtap0 type macvtap mode bridge # ip link set macvtap0 up # TAP=/dev/tap$(ip -o link show macvtap0 | cut -d: -f1) # qemu-system-x86_64 -machine q35 -device pcie-root-port,id=pcie-root-port-0 -monitor stdio 9<> $TAP (qemu) netdev_add type=tap,id=hostnet0,vhost=on,fd=9 (qemu) device_add driver=virtio-net-pci,netdev=hostnet0,id=net0,bus=pcie-root-port-0 (qemu) device_del net0 (qemu) netdev_del hostnet0 (qemu) netdev_add type=tap,id=hostnet1,vhost=on,fd=9 qemu-system-x86_64: .../util/oslib-posix.c:247: qemu_set_nonblock: Assertion `f != -1' failed. Aborted (core dumped) To avoid that, add a function, qemu_try_set_nonblock(), that allows to report the problem without crashing. In the same way, we also update the function for vhostfd in net_init_tap_one() and for fd in net_init_socket() (both descriptors are provided by the user and can be wrong). Signed-off-by: Laurent Vivier <lvivier@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com>
2020-07-07 18:45:14 +00:00
{
(void)qemu_socket_try_set_nonblock(fd);
net: check if the file descriptor is valid before using it qemu_set_nonblock() checks that the file descriptor can be used and, if not, crashes QEMU. An assert() is used for that. The use of assert() is used to detect programming error and the coredump will allow to debug the problem. But in the case of the tap device, this assert() can be triggered by a misconfiguration by the user. At startup, it's not a real problem, but it can also happen during the hot-plug of a new device, and here it's a problem because we can crash a perfectly healthy system. For instance: # ip link add link virbr0 name macvtap0 type macvtap mode bridge # ip link set macvtap0 up # TAP=/dev/tap$(ip -o link show macvtap0 | cut -d: -f1) # qemu-system-x86_64 -machine q35 -device pcie-root-port,id=pcie-root-port-0 -monitor stdio 9<> $TAP (qemu) netdev_add type=tap,id=hostnet0,vhost=on,fd=9 (qemu) device_add driver=virtio-net-pci,netdev=hostnet0,id=net0,bus=pcie-root-port-0 (qemu) device_del net0 (qemu) netdev_del hostnet0 (qemu) netdev_add type=tap,id=hostnet1,vhost=on,fd=9 qemu-system-x86_64: .../util/oslib-posix.c:247: qemu_set_nonblock: Assertion `f != -1' failed. Aborted (core dumped) To avoid that, add a function, qemu_try_set_nonblock(), that allows to report the problem without crashing. In the same way, we also update the function for vhostfd in net_init_tap_one() and for fd in net_init_socket() (both descriptors are provided by the user and can be wrong). Signed-off-by: Laurent Vivier <lvivier@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com>
2020-07-07 18:45:14 +00:00
}
int socket_set_fast_reuse(int fd)
{
/* Enabling the reuse of an endpoint that was used by a socket still in
* TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows
* fast reuse is the default and SO_REUSEADDR does strange things. So we
* don't have to do anything here. More info can be found at:
* http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */
return 0;
}
int inet_aton(const char *cp, struct in_addr *ia)
{
uint32_t addr = inet_addr(cp);
if (addr == 0xffffffff) {
return 0;
}
ia->s_addr = addr;
return 1;
}
void qemu_set_cloexec(int fd)
{
}
int qemu_get_thread_id(void)
{
return GetCurrentThreadId();
}
char *
qemu_get_local_state_dir(void)
{
HRESULT result;
char base_path[MAX_PATH+1] = "";
result = SHGetFolderPath(NULL, CSIDL_COMMON_APPDATA, NULL,
/* SHGFP_TYPE_CURRENT */ 0, base_path);
if (result != S_OK) {
/* misconfigured environment */
g_critical("CSIDL_COMMON_APPDATA unavailable: %ld", (long)result);
abort();
}
return g_strdup(base_path);
}
void qemu_set_tty_echo(int fd, bool echo)
{
HANDLE handle = (HANDLE)_get_osfhandle(fd);
DWORD dwMode = 0;
if (handle == INVALID_HANDLE_VALUE) {
return;
}
GetConsoleMode(handle, &dwMode);
if (echo) {
SetConsoleMode(handle, dwMode | ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT);
} else {
SetConsoleMode(handle,
dwMode & ~(ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT));
}
}
static const char *exec_dir;
void qemu_init_exec_dir(const char *argv0)
{
char *p;
char buf[MAX_PATH];
DWORD len;
if (exec_dir) {
return;
}
len = GetModuleFileName(NULL, buf, sizeof(buf) - 1);
if (len == 0) {
return;
}
buf[len] = 0;
p = buf + len - 1;
while (p != buf && *p != '\\') {
p--;
}
*p = 0;
if (access(buf, R_OK) == 0) {
exec_dir = g_strdup(buf);
} else {
exec_dir = CONFIG_BINDIR;
}
}
const char *qemu_get_exec_dir(void)
{
return exec_dir;
}
int getpagesize(void)
{
SYSTEM_INFO system_info;
GetSystemInfo(&system_info);
return system_info.dwPageSize;
}
mem-prealloc: reduce large guest start-up and migration time. Using "-mem-prealloc" option for a large guest leads to higher guest start-up and migration time. This is because with "-mem-prealloc" option qemu tries to map every guest page (create address translations), and make sure the pages are available during runtime. virsh/libvirt by default, seems to use "-mem-prealloc" option in case the guest is configured to use huge pages. The patch tries to map all guest pages simultaneously by spawning multiple threads. Currently limiting the change to QEMU library functions on POSIX compliant host only, as we are not sure if the problem exists on win32. Below are some stats with "-mem-prealloc" option for guest configured to use huge pages. ------------------------------------------------------------------------ Idle Guest | Start-up time | Migration time ------------------------------------------------------------------------ Guest stats with 2M HugePage usage - single threaded (existing code) ------------------------------------------------------------------------ 64 Core - 4TB | 54m11.796s | 75m43.843s 64 Core - 1TB | 8m56.576s | 14m29.049s 64 Core - 256GB | 2m11.245s | 3m26.598s ------------------------------------------------------------------------ Guest stats with 2M HugePage usage - map guest pages using 8 threads ------------------------------------------------------------------------ 64 Core - 4TB | 5m1.027s | 34m10.565s 64 Core - 1TB | 1m10.366s | 8m28.188s 64 Core - 256GB | 0m19.040s | 2m10.148s ----------------------------------------------------------------------- Guest stats with 2M HugePage usage - map guest pages using 16 threads ----------------------------------------------------------------------- 64 Core - 4TB | 1m58.970s | 31m43.400s 64 Core - 1TB | 0m39.885s | 7m55.289s 64 Core - 256GB | 0m11.960s | 2m0.135s ----------------------------------------------------------------------- Changed in v2: - modify number of memset threads spawned to min(smp_cpus, 16). - removed 64GB memory restriction for spawning memset threads. Changed in v3: - limit number of threads spawned based on min(sysconf(_SC_NPROCESSORS_ONLN), 16, smp_cpus) - implement memset thread specific siglongjmp in SIGBUS signal_handler. Changed in v4 - remove sigsetjmp/siglongjmp and SIGBUS unblock/block for main thread as main thread no longer touches any pages. - simplify code my returning memset_thread_failed status from touch_all_pages. Signed-off-by: Jitendra Kolhe <jitendra.kolhe@hpe.com> Message-Id: <1487907103-32350-1-git-send-email-jitendra.kolhe@hpe.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-02-24 03:31:43 +00:00
void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
Error **errp)
{
int i;
size_t pagesize = qemu_real_host_page_size();
memory = (memory + pagesize - 1) & -pagesize;
for (i = 0; i < memory / pagesize; i++) {
memset(area + pagesize * i, 0, 1);
}
}
char *qemu_get_pid_name(pid_t pid)
{
/* XXX Implement me */
abort();
}
pid_t qemu_fork(Error **errp)
{
errno = ENOSYS;
error_setg_errno(errp, errno,
"cannot fork child process");
return -1;
}
osdep: add wrappers for socket functions The windows socket functions look identical to the normal POSIX sockets functions, but instead of setting errno, the caller needs to call WSAGetLastError(). QEMU has tried to deal with this incompatibility by defining a socket_error() method that callers must use that abstracts the difference between WSAGetLastError() and errno. This approach is somewhat error prone though - many callers of the sockets functions are just using errno directly because it is easy to forget the need use a QEMU specific wrapper. It is not always immediately obvious that a particular function will in fact call into Windows sockets functions, so the dev may not even realize they need to use socket_error(). This introduces an alternative approach to portability inspired by the way GNULIB fixes portability problems. We use a macro to redefine the original socket function names to refer to a QEMU wrapper function. The wrapper function calls the original Win32 sockets method and then sets errno from the WSAGetLastError() value. Thus all code can simply call the normal POSIX sockets APIs are have standard errno reporting on error, even on Windows. This makes the socket_error() method obsolete. We also bring closesocket & ioctlsocket into this approach. Even though they are non-standard Win32 names, we can't wrap the normal close/ioctl methods since there's no reliable way to distinguish between a file descriptor and HANDLE in Win32. Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-07 20:25:19 +00:00
#undef connect
int qemu_connect_wrap(int sockfd, const struct sockaddr *addr,
socklen_t addrlen)
{
int ret;
ret = connect(sockfd, addr, addrlen);
if (ret < 0) {
if (WSAGetLastError() == WSAEWOULDBLOCK) {
errno = EINPROGRESS;
} else {
errno = socket_error();
}
osdep: add wrappers for socket functions The windows socket functions look identical to the normal POSIX sockets functions, but instead of setting errno, the caller needs to call WSAGetLastError(). QEMU has tried to deal with this incompatibility by defining a socket_error() method that callers must use that abstracts the difference between WSAGetLastError() and errno. This approach is somewhat error prone though - many callers of the sockets functions are just using errno directly because it is easy to forget the need use a QEMU specific wrapper. It is not always immediately obvious that a particular function will in fact call into Windows sockets functions, so the dev may not even realize they need to use socket_error(). This introduces an alternative approach to portability inspired by the way GNULIB fixes portability problems. We use a macro to redefine the original socket function names to refer to a QEMU wrapper function. The wrapper function calls the original Win32 sockets method and then sets errno from the WSAGetLastError() value. Thus all code can simply call the normal POSIX sockets APIs are have standard errno reporting on error, even on Windows. This makes the socket_error() method obsolete. We also bring closesocket & ioctlsocket into this approach. Even though they are non-standard Win32 names, we can't wrap the normal close/ioctl methods since there's no reliable way to distinguish between a file descriptor and HANDLE in Win32. Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
2016-03-07 20:25:19 +00:00
}
return ret;
}
#undef listen
int qemu_listen_wrap(int sockfd, int backlog)
{
int ret;
ret = listen(sockfd, backlog);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
#undef bind
int qemu_bind_wrap(int sockfd, const struct sockaddr *addr,
socklen_t addrlen)
{
int ret;
ret = bind(sockfd, addr, addrlen);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
#undef socket
int qemu_socket_wrap(int domain, int type, int protocol)
{
int ret;
ret = socket(domain, type, protocol);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
#undef accept
int qemu_accept_wrap(int sockfd, struct sockaddr *addr,
socklen_t *addrlen)
{
int ret;
ret = accept(sockfd, addr, addrlen);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
#undef shutdown
int qemu_shutdown_wrap(int sockfd, int how)
{
int ret;
ret = shutdown(sockfd, how);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
#undef ioctlsocket
int qemu_ioctlsocket_wrap(int fd, int req, void *val)
{
int ret;
ret = ioctlsocket(fd, req, val);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
#undef closesocket
int qemu_closesocket_wrap(int fd)
{
int ret;
ret = closesocket(fd);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
#undef getsockopt
int qemu_getsockopt_wrap(int sockfd, int level, int optname,
void *optval, socklen_t *optlen)
{
int ret;
ret = getsockopt(sockfd, level, optname, optval, optlen);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
#undef setsockopt
int qemu_setsockopt_wrap(int sockfd, int level, int optname,
const void *optval, socklen_t optlen)
{
int ret;
ret = setsockopt(sockfd, level, optname, optval, optlen);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
#undef getpeername
int qemu_getpeername_wrap(int sockfd, struct sockaddr *addr,
socklen_t *addrlen)
{
int ret;
ret = getpeername(sockfd, addr, addrlen);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
#undef getsockname
int qemu_getsockname_wrap(int sockfd, struct sockaddr *addr,
socklen_t *addrlen)
{
int ret;
ret = getsockname(sockfd, addr, addrlen);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
#undef send
ssize_t qemu_send_wrap(int sockfd, const void *buf, size_t len, int flags)
{
int ret;
ret = send(sockfd, buf, len, flags);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
#undef sendto
ssize_t qemu_sendto_wrap(int sockfd, const void *buf, size_t len, int flags,
const struct sockaddr *addr, socklen_t addrlen)
{
int ret;
ret = sendto(sockfd, buf, len, flags, addr, addrlen);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
#undef recv
ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags)
{
int ret;
ret = recv(sockfd, buf, len, flags);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
#undef recvfrom
ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags,
struct sockaddr *addr, socklen_t *addrlen)
{
int ret;
ret = recvfrom(sockfd, buf, len, flags, addr, addrlen);
if (ret < 0) {
errno = socket_error();
}
return ret;
}
util: add qemu_write_pidfile() There are variants of qemu_create_pidfile() in qemu-pr-helper and qemu-ga. Let's have a common implementation in libqemuutil. The code is initially based from pr-helper write_pidfile(), with various improvements and suggestions from Daniel Berrangé: QEMU will leave the pidfile existing on disk when it exits which initially made me think it avoids the deletion race. The app managing QEMU, however, may well delete the pidfile after it has seen QEMU exit, and even if the app locks the pidfile before deleting it, there is still a race. eg consider the following sequence QEMU 1 libvirtd QEMU 2 1. lock(pidfile) 2. exit() 3. open(pidfile) 4. lock(pidfile) 5. open(pidfile) 6. unlink(pidfile) 7. close(pidfile) 8. lock(pidfile) IOW, at step 8 the new QEMU has successfully acquired the lock, but the pidfile no longer exists on disk because it was deleted after the original QEMU exited. While we could just say no external app should ever delete the pidfile, I don't think that is satisfactory as people don't read docs, and admins don't like stale pidfiles being left around on disk. To make this robust, I think we might want to copy libvirt's approach to pidfile acquisition which runs in a loop and checks that the file on disk /after/ acquiring the lock matches the file that was locked. Then we could in fact safely let QEMU delete its own pidfiles on clean exit.. Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-Id: <20180831145314.14736-2-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-08-31 14:53:12 +00:00
bool qemu_write_pidfile(const char *filename, Error **errp)
{
char buffer[128];
int len;
HANDLE file;
OVERLAPPED overlap;
BOOL ret;
memset(&overlap, 0, sizeof(overlap));
file = CreateFile(filename, GENERIC_WRITE, FILE_SHARE_READ, NULL,
OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
if (file == INVALID_HANDLE_VALUE) {
error_setg(errp, "Failed to create PID file");
return false;
}
len = snprintf(buffer, sizeof(buffer), FMT_pid "\n", (pid_t)getpid());
ret = WriteFile(file, (LPCVOID)buffer, (DWORD)len,
NULL, &overlap);
CloseHandle(file);
if (ret == 0) {
error_setg(errp, "Failed to write PID file");
return false;
}
return true;
}
size_t qemu_get_host_physmem(void)
{
MEMORYSTATUSEX statex;
statex.dwLength = sizeof(statex);
if (GlobalMemoryStatusEx(&statex)) {
return statex.ullTotalPhys;
}
return 0;
}
int qemu_msync(void *addr, size_t length, int fd)
{
/**
* Perform the sync based on the file descriptor
* The sync range will most probably be wider than the one
* requested - but it will still get the job done
*/
return qemu_fdatasync(fd);
}