mirror of
https://github.com/xemu-project/xemu.git
synced 2024-12-18 09:19:53 +00:00
d2623129a7
The only way object_property_add() can fail is when a property with the same name already exists. Since our property names are all hardcoded, failure is a programming error, and the appropriate way to handle it is passing &error_abort. Same for its variants, except for object_property_add_child(), which additionally fails when the child already has a parent. Parentage is also under program control, so this is a programming error, too. We have a bit over 500 callers. Almost half of them pass &error_abort, slightly fewer ignore errors, one test case handles errors, and the remaining few callers pass them to their own callers. The previous few commits demonstrated once again that ignoring programming errors is a bad idea. Of the few ones that pass on errors, several violate the Error API. The Error ** argument must be NULL, &error_abort, &error_fatal, or a pointer to a variable containing NULL. Passing an argument of the latter kind twice without clearing it in between is wrong: if the first call sets an error, it no longer points to NULL for the second call. ich9_pm_add_properties(), sparc32_ledma_realize(), sparc32_dma_realize(), xilinx_axidma_realize(), xilinx_enet_realize() are wrong that way. When the one appropriate choice of argument is &error_abort, letting users pick the argument is a bad idea. Drop parameter @errp and assert the preconditions instead. There's one exception to "duplicate property name is a programming error": the way object_property_add() implements the magic (and undocumented) "automatic arrayification". Don't drop @errp there. Instead, rename object_property_add() to object_property_try_add(), and add the obvious wrapper object_property_add(). Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20200505152926.18877-15-armbru@redhat.com> [Two semantic rebase conflicts resolved]
443 lines
13 KiB
C
443 lines
13 KiB
C
/*
|
|
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
|
|
* Copyright (c) 2016 FUJITSU LIMITED
|
|
* Copyright (c) 2016 Intel Corporation
|
|
*
|
|
* Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or
|
|
* later. See the COPYING file in the top-level directory.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "trace.h"
|
|
#include "colo.h"
|
|
#include "net/filter.h"
|
|
#include "net/net.h"
|
|
#include "qemu/error-report.h"
|
|
#include "qom/object.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "qemu/iov.h"
|
|
#include "net/checksum.h"
|
|
#include "net/colo.h"
|
|
#include "migration/colo.h"
|
|
#include "util.h"
|
|
|
|
#define FILTER_COLO_REWRITER(obj) \
|
|
OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
|
|
|
|
#define TYPE_FILTER_REWRITER "filter-rewriter"
|
|
#define FAILOVER_MODE_ON true
|
|
#define FAILOVER_MODE_OFF false
|
|
|
|
typedef struct RewriterState {
|
|
NetFilterState parent_obj;
|
|
NetQueue *incoming_queue;
|
|
/* hashtable to save connection */
|
|
GHashTable *connection_track_table;
|
|
bool vnet_hdr;
|
|
bool failover_mode;
|
|
} RewriterState;
|
|
|
|
static void filter_rewriter_failover_mode(RewriterState *s)
|
|
{
|
|
s->failover_mode = FAILOVER_MODE_ON;
|
|
}
|
|
|
|
static void filter_rewriter_flush(NetFilterState *nf)
|
|
{
|
|
RewriterState *s = FILTER_COLO_REWRITER(nf);
|
|
|
|
if (!qemu_net_queue_flush(s->incoming_queue)) {
|
|
/* Unable to empty the queue, purge remaining packets */
|
|
qemu_net_queue_purge(s->incoming_queue, nf->netdev);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Return 1 on success, if return 0 means the pkt
|
|
* is not TCP packet
|
|
*/
|
|
static int is_tcp_packet(Packet *pkt)
|
|
{
|
|
if (!parse_packet_early(pkt) &&
|
|
pkt->ip->ip_p == IPPROTO_TCP) {
|
|
return 1;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/* handle tcp packet from primary guest */
|
|
static int handle_primary_tcp_pkt(RewriterState *rf,
|
|
Connection *conn,
|
|
Packet *pkt, ConnectionKey *key)
|
|
{
|
|
struct tcp_hdr *tcp_pkt;
|
|
|
|
tcp_pkt = (struct tcp_hdr *)pkt->transport_header;
|
|
if (trace_event_get_state_backends(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
|
|
trace_colo_filter_rewriter_pkt_info(__func__,
|
|
inet_ntoa(pkt->ip->ip_src), inet_ntoa(pkt->ip->ip_dst),
|
|
ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
|
|
tcp_pkt->th_flags);
|
|
trace_colo_filter_rewriter_conn_offset(conn->offset);
|
|
}
|
|
|
|
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN)) &&
|
|
conn->tcp_state == TCPS_SYN_SENT) {
|
|
conn->tcp_state = TCPS_ESTABLISHED;
|
|
}
|
|
|
|
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
|
|
/*
|
|
* we use this flag update offset func
|
|
* run once in independent tcp connection
|
|
*/
|
|
conn->tcp_state = TCPS_SYN_RECEIVED;
|
|
}
|
|
|
|
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
|
|
if (conn->tcp_state == TCPS_SYN_RECEIVED) {
|
|
/*
|
|
* offset = secondary_seq - primary seq
|
|
* ack packet sent by guest from primary node,
|
|
* so we use th_ack - 1 get primary_seq
|
|
*/
|
|
conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
|
|
conn->tcp_state = TCPS_ESTABLISHED;
|
|
}
|
|
if (conn->offset) {
|
|
/* handle packets to the secondary from the primary */
|
|
tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset);
|
|
|
|
net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len,
|
|
pkt->size - pkt->vnet_hdr_len);
|
|
}
|
|
|
|
/*
|
|
* Passive close step 3
|
|
*/
|
|
if ((conn->tcp_state == TCPS_LAST_ACK) &&
|
|
(ntohl(tcp_pkt->th_ack) == (conn->fin_ack_seq + 1))) {
|
|
conn->tcp_state = TCPS_CLOSED;
|
|
g_hash_table_remove(rf->connection_track_table, key);
|
|
}
|
|
}
|
|
|
|
if ((tcp_pkt->th_flags & TH_FIN) == TH_FIN) {
|
|
/*
|
|
* Passive close.
|
|
* Step 1:
|
|
* The *server* side of this connect is VM, *client* tries to close
|
|
* the connection. We will into CLOSE_WAIT status.
|
|
*
|
|
* Step 2:
|
|
* In this step we will into LAST_ACK status.
|
|
*
|
|
* We got 'fin=1, ack=1' packet from server side, we need to
|
|
* record the seq of 'fin=1, ack=1' packet.
|
|
*
|
|
* Step 3:
|
|
* We got 'ack=1' packets from client side, it acks 'fin=1, ack=1'
|
|
* packet from server side. From this point, we can ensure that there
|
|
* will be no packets in the connection, except that, some errors
|
|
* happen between the path of 'filter object' and vNIC, if this rare
|
|
* case really happen, we can still create a new connection,
|
|
* So it is safe to remove the connection from connection_track_table.
|
|
*
|
|
*/
|
|
if (conn->tcp_state == TCPS_ESTABLISHED) {
|
|
conn->tcp_state = TCPS_CLOSE_WAIT;
|
|
}
|
|
|
|
/*
|
|
* Active close step 2.
|
|
*/
|
|
if (conn->tcp_state == TCPS_FIN_WAIT_1) {
|
|
/*
|
|
* For simplify implementation, we needn't wait 2MSL time
|
|
* in filter rewriter. Because guest kernel will track the
|
|
* TCP status and wait 2MSL time, if client resend the FIN
|
|
* packet, guest will apply the last ACK too.
|
|
* So, we skip the TCPS_TIME_WAIT state here and go straight
|
|
* to TCPS_CLOSED state.
|
|
*/
|
|
conn->tcp_state = TCPS_CLOSED;
|
|
g_hash_table_remove(rf->connection_track_table, key);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* handle tcp packet from secondary guest */
|
|
static int handle_secondary_tcp_pkt(RewriterState *rf,
|
|
Connection *conn,
|
|
Packet *pkt, ConnectionKey *key)
|
|
{
|
|
struct tcp_hdr *tcp_pkt;
|
|
|
|
tcp_pkt = (struct tcp_hdr *)pkt->transport_header;
|
|
|
|
if (trace_event_get_state_backends(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
|
|
trace_colo_filter_rewriter_pkt_info(__func__,
|
|
inet_ntoa(pkt->ip->ip_src), inet_ntoa(pkt->ip->ip_dst),
|
|
ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
|
|
tcp_pkt->th_flags);
|
|
trace_colo_filter_rewriter_conn_offset(conn->offset);
|
|
}
|
|
|
|
if (conn->tcp_state == TCPS_SYN_RECEIVED &&
|
|
((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
|
|
/*
|
|
* save offset = secondary_seq and then
|
|
* in handle_primary_tcp_pkt make offset
|
|
* = secondary_seq - primary_seq
|
|
*/
|
|
conn->offset = ntohl(tcp_pkt->th_seq);
|
|
}
|
|
|
|
/* VM active connect */
|
|
if (conn->tcp_state == TCPS_CLOSED &&
|
|
((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
|
|
conn->tcp_state = TCPS_SYN_SENT;
|
|
}
|
|
|
|
if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
|
|
/* Only need to adjust seq while offset is Non-zero */
|
|
if (conn->offset) {
|
|
/* handle packets to the primary from the secondary*/
|
|
tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset);
|
|
|
|
net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len,
|
|
pkt->size - pkt->vnet_hdr_len);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Passive close step 2:
|
|
*/
|
|
if (conn->tcp_state == TCPS_CLOSE_WAIT &&
|
|
(tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == (TH_ACK | TH_FIN)) {
|
|
conn->fin_ack_seq = ntohl(tcp_pkt->th_seq);
|
|
conn->tcp_state = TCPS_LAST_ACK;
|
|
}
|
|
|
|
/*
|
|
* Active close
|
|
*
|
|
* Step 1:
|
|
* The *server* side of this connect is VM, *server* tries to close
|
|
* the connection.
|
|
*
|
|
* Step 2:
|
|
* We will into CLOSE_WAIT status.
|
|
* We simplify the TCPS_FIN_WAIT_2, TCPS_TIME_WAIT and
|
|
* CLOSING status.
|
|
*/
|
|
if (conn->tcp_state == TCPS_ESTABLISHED &&
|
|
(tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == TH_FIN) {
|
|
conn->tcp_state = TCPS_FIN_WAIT_1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
|
|
NetClientState *sender,
|
|
unsigned flags,
|
|
const struct iovec *iov,
|
|
int iovcnt,
|
|
NetPacketSent *sent_cb)
|
|
{
|
|
RewriterState *s = FILTER_COLO_REWRITER(nf);
|
|
Connection *conn;
|
|
ConnectionKey key;
|
|
Packet *pkt;
|
|
ssize_t size = iov_size(iov, iovcnt);
|
|
ssize_t vnet_hdr_len = 0;
|
|
char *buf = g_malloc0(size);
|
|
|
|
iov_to_buf(iov, iovcnt, 0, buf, size);
|
|
|
|
if (s->vnet_hdr) {
|
|
vnet_hdr_len = nf->netdev->vnet_hdr_len;
|
|
}
|
|
|
|
pkt = packet_new(buf, size, vnet_hdr_len);
|
|
g_free(buf);
|
|
|
|
/*
|
|
* if we get tcp packet
|
|
* we will rewrite it to make secondary guest's
|
|
* connection established successfully
|
|
*/
|
|
if (pkt && is_tcp_packet(pkt)) {
|
|
|
|
fill_connection_key(pkt, &key);
|
|
|
|
if (sender == nf->netdev) {
|
|
/*
|
|
* We need make tcp TX and RX packet
|
|
* into one connection.
|
|
*/
|
|
reverse_connection_key(&key);
|
|
}
|
|
|
|
/* After failover we needn't change new TCP packet */
|
|
if (s->failover_mode &&
|
|
!connection_has_tracked(s->connection_track_table, &key)) {
|
|
goto out;
|
|
}
|
|
|
|
conn = connection_get(s->connection_track_table,
|
|
&key,
|
|
NULL);
|
|
|
|
if (sender == nf->netdev) {
|
|
/* NET_FILTER_DIRECTION_TX */
|
|
if (!handle_primary_tcp_pkt(s, conn, pkt, &key)) {
|
|
qemu_net_queue_send(s->incoming_queue, sender, 0,
|
|
(const uint8_t *)pkt->data, pkt->size, NULL);
|
|
packet_destroy(pkt, NULL);
|
|
pkt = NULL;
|
|
/*
|
|
* We block the packet here,after rewrite pkt
|
|
* and will send it
|
|
*/
|
|
return 1;
|
|
}
|
|
} else {
|
|
/* NET_FILTER_DIRECTION_RX */
|
|
if (!handle_secondary_tcp_pkt(s, conn, pkt, &key)) {
|
|
qemu_net_queue_send(s->incoming_queue, sender, 0,
|
|
(const uint8_t *)pkt->data, pkt->size, NULL);
|
|
packet_destroy(pkt, NULL);
|
|
pkt = NULL;
|
|
/*
|
|
* We block the packet here,after rewrite pkt
|
|
* and will send it
|
|
*/
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
out:
|
|
packet_destroy(pkt, NULL);
|
|
pkt = NULL;
|
|
return 0;
|
|
}
|
|
|
|
static void reset_seq_offset(gpointer key, gpointer value, gpointer user_data)
|
|
{
|
|
Connection *conn = (Connection *)value;
|
|
|
|
conn->offset = 0;
|
|
}
|
|
|
|
static gboolean offset_is_nonzero(gpointer key,
|
|
gpointer value,
|
|
gpointer user_data)
|
|
{
|
|
Connection *conn = (Connection *)value;
|
|
|
|
return conn->offset ? true : false;
|
|
}
|
|
|
|
static void colo_rewriter_handle_event(NetFilterState *nf, int event,
|
|
Error **errp)
|
|
{
|
|
RewriterState *rs = FILTER_COLO_REWRITER(nf);
|
|
|
|
switch (event) {
|
|
case COLO_EVENT_CHECKPOINT:
|
|
g_hash_table_foreach(rs->connection_track_table,
|
|
reset_seq_offset, NULL);
|
|
break;
|
|
case COLO_EVENT_FAILOVER:
|
|
if (!g_hash_table_find(rs->connection_track_table,
|
|
offset_is_nonzero, NULL)) {
|
|
filter_rewriter_failover_mode(rs);
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void colo_rewriter_cleanup(NetFilterState *nf)
|
|
{
|
|
RewriterState *s = FILTER_COLO_REWRITER(nf);
|
|
|
|
/* flush packets */
|
|
if (s->incoming_queue) {
|
|
filter_rewriter_flush(nf);
|
|
g_free(s->incoming_queue);
|
|
}
|
|
}
|
|
|
|
static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
|
|
{
|
|
RewriterState *s = FILTER_COLO_REWRITER(nf);
|
|
|
|
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
|
|
connection_key_equal,
|
|
g_free,
|
|
connection_destroy);
|
|
s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
|
|
}
|
|
|
|
static bool filter_rewriter_get_vnet_hdr(Object *obj, Error **errp)
|
|
{
|
|
RewriterState *s = FILTER_COLO_REWRITER(obj);
|
|
|
|
return s->vnet_hdr;
|
|
}
|
|
|
|
static void filter_rewriter_set_vnet_hdr(Object *obj,
|
|
bool value,
|
|
Error **errp)
|
|
{
|
|
RewriterState *s = FILTER_COLO_REWRITER(obj);
|
|
|
|
s->vnet_hdr = value;
|
|
}
|
|
|
|
static void filter_rewriter_init(Object *obj)
|
|
{
|
|
RewriterState *s = FILTER_COLO_REWRITER(obj);
|
|
|
|
s->vnet_hdr = false;
|
|
s->failover_mode = FAILOVER_MODE_OFF;
|
|
object_property_add_bool(obj, "vnet_hdr_support",
|
|
filter_rewriter_get_vnet_hdr,
|
|
filter_rewriter_set_vnet_hdr);
|
|
}
|
|
|
|
static void colo_rewriter_class_init(ObjectClass *oc, void *data)
|
|
{
|
|
NetFilterClass *nfc = NETFILTER_CLASS(oc);
|
|
|
|
nfc->setup = colo_rewriter_setup;
|
|
nfc->cleanup = colo_rewriter_cleanup;
|
|
nfc->receive_iov = colo_rewriter_receive_iov;
|
|
nfc->handle_event = colo_rewriter_handle_event;
|
|
}
|
|
|
|
static const TypeInfo colo_rewriter_info = {
|
|
.name = TYPE_FILTER_REWRITER,
|
|
.parent = TYPE_NETFILTER,
|
|
.class_init = colo_rewriter_class_init,
|
|
.instance_init = filter_rewriter_init,
|
|
.instance_size = sizeof(RewriterState),
|
|
};
|
|
|
|
static void register_types(void)
|
|
{
|
|
type_register_static(&colo_rewriter_info);
|
|
}
|
|
|
|
type_init(register_types);
|