From 631e63a9f346cb657761ae22138f294718696501 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sat, 11 Feb 2017 23:02:20 -0700
Subject: [PATCH] vmbus: change to per channel tasklet

Make the event handling tasklet per channel rather than per-cpu.
This allows for better fairness when getting lots of data on the same
cpu.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c      |  2 +-
 drivers/hv/channel_mgmt.c | 16 ++++----
 drivers/hv/connection.c   | 78 ++-------------------------------------
 drivers/hv/hv.c           |  2 -
 drivers/hv/hyperv_vmbus.h |  1 -
 drivers/hv/vmbus_drv.c    | 58 +++++++++++++++++++++++++----
 include/linux/hyperv.h    |  3 +-
 7 files changed, 64 insertions(+), 96 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 789c75f6df26..18cc1c78260d 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -530,7 +530,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	int ret;
 
 	/*
-	 * process_chn_event(), running in the tasklet, can race
+	 * vmbus_on_event(), running in the tasklet, can race
 	 * with vmbus_close_internal() in the case of SMP guest, e.g., when
 	 * the former is accessing channel->inbound.ring_buffer, the latter
 	 * could be freeing the ring_buffer pages.
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 579ad2560a39..2f6270d76b79 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -339,6 +339,9 @@ static struct vmbus_channel *alloc_channel(void)
 	INIT_LIST_HEAD(&channel->sc_list);
 	INIT_LIST_HEAD(&channel->percpu_list);
 
+	tasklet_init(&channel->callback_event,
+		     vmbus_on_event, (unsigned long)channel);
+
 	return channel;
 }
 
@@ -347,6 +350,7 @@ static struct vmbus_channel *alloc_channel(void)
  */
 static void free_channel(struct vmbus_channel *channel)
 {
+	tasklet_kill(&channel->callback_event);
 	kfree(channel);
 }
 
@@ -380,21 +384,15 @@ static void vmbus_release_relid(u32 relid)
 
 void hv_event_tasklet_disable(struct vmbus_channel *channel)
 {
-	struct hv_per_cpu_context *hv_cpu;
-
-	hv_cpu = per_cpu_ptr(hv_context.cpu_context, channel->target_cpu);
-	tasklet_disable(&hv_cpu->event_dpc);
+	tasklet_disable(&channel->callback_event);
 }
 
 void hv_event_tasklet_enable(struct vmbus_channel *channel)
 {
-	struct hv_per_cpu_context *hv_cpu;
-
-	hv_cpu = per_cpu_ptr(hv_context.cpu_context, channel->target_cpu);
-	tasklet_enable(&hv_cpu->event_dpc);
+	tasklet_enable(&channel->callback_event);
 
 	/* In case there is any pending event */
-	tasklet_schedule(&hv_cpu->event_dpc);
+	tasklet_schedule(&channel->callback_event);
 }
 
 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 158f12823baf..27e72dc07e12 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -259,29 +259,6 @@ void vmbus_disconnect(void)
 	vmbus_connection.monitor_pages[1] = NULL;
 }
 
-/*
- * Map the given relid to the corresponding channel based on the
- * per-cpu list of channels that have been affinitized to this CPU.
- * This will be used in the channel callback path as we can do this
- * mapping in a lock-free fashion.
- */
-static struct vmbus_channel *pcpu_relid2channel(u32 relid)
-{
-	struct hv_per_cpu_context *hv_cpu
-		= this_cpu_ptr(hv_context.cpu_context);
-	struct vmbus_channel *found_channel = NULL;
-	struct vmbus_channel *channel;
-
-	list_for_each_entry(channel, &hv_cpu->chan_list, percpu_list) {
-		if (channel->offermsg.child_relid == relid) {
-			found_channel = channel;
-			break;
-		}
-	}
-
-	return found_channel;
-}
-
 /*
  * relid2channel - Get the channel object given its
  * child relative id (ie channel id)
@@ -318,24 +295,15 @@ struct vmbus_channel *relid2channel(u32 relid)
 }
 
 /*
- * process_chn_event - Process a channel event notification
+ * vmbus_on_event - Process a channel event notification
  */
-static void process_chn_event(u32 relid)
+void vmbus_on_event(unsigned long data)
 {
-	struct vmbus_channel *channel;
+	struct vmbus_channel *channel = (void *) data;
 	void *arg;
 	bool read_state;
 	u32 bytes_to_read;
 
-	/*
-	 * Find the channel based on this relid and invokes the
-	 * channel callback to process the event
-	 */
-	channel = pcpu_relid2channel(relid);
-
-	if (!channel)
-		return;
-
 	/*
 	 * A channel once created is persistent even when there
 	 * is no driver handling the device. An unloading driver
@@ -344,7 +312,6 @@ static void process_chn_event(u32 relid)
 	 * Thus, checking and invoking the driver specific callback takes
 	 * care of orderly unloading of the driver.
 	 */
-
 	if (channel->onchannel_callback != NULL) {
 		arg = channel->channel_callback_context;
 		read_state = channel->batched_reading;
@@ -372,45 +339,6 @@ static void process_chn_event(u32 relid)
 	}
 }
 
-/*
- * vmbus_on_event - Handler for events
- */
-void vmbus_on_event(unsigned long data)
-{
-	struct hv_per_cpu_context *hv_cpu = (void *)data;
-	unsigned long *recv_int_page;
-	u32 maxbits, relid;
-
-	if (vmbus_proto_version < VERSION_WIN8) {
-		maxbits = MAX_NUM_CHANNELS_SUPPORTED;
-		recv_int_page = vmbus_connection.recv_int_page;
-	} else {
-		/*
-		 * When the host is win8 and beyond, the event page
-		 * can be directly checked to get the id of the channel
-		 * that has the interrupt pending.
-		 */
-		void *page_addr = hv_cpu->synic_event_page;
-		union hv_synic_event_flags *event
-			= (union hv_synic_event_flags *)page_addr +
-						 VMBUS_MESSAGE_SINT;
-
-		maxbits = HV_EVENT_FLAGS_COUNT;
-		recv_int_page = event->flags;
-	}
-
-	if (unlikely(!recv_int_page))
-		return;
-
-	for_each_set_bit(relid, recv_int_page, maxbits) {
-		if (sync_test_and_clear_bit(relid, recv_int_page)) {
-			/* Special case - vmbus channel protocol msg */
-			if (relid != 0)
-				process_chn_event(relid);
-		}
-	}
-}
-
 /*
  * vmbus_post_msg - Send a msg on the vmbus's message connection
  */
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 9fc2355a60ea..665a64f1611e 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -156,8 +156,6 @@ int hv_synic_alloc(void)
 			= per_cpu_ptr(hv_context.cpu_context, cpu);
 
 		memset(hv_cpu, 0, sizeof(*hv_cpu));
-		tasklet_init(&hv_cpu->event_dpc,
-			     vmbus_on_event, (unsigned long) hv_cpu);
 		tasklet_init(&hv_cpu->msg_dpc,
 			     vmbus_on_msg_dpc, (unsigned long) hv_cpu);
 
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index c8ce9ab2e16a..558a798c407c 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -206,7 +206,6 @@ struct hv_per_cpu_context {
 	 * we will manage the tasklet that handles events messages on a per CPU
 	 * basis.
 	 */
-	struct tasklet_struct event_dpc;
 	struct tasklet_struct msg_dpc;
 
 	/*
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index cf8540c1df4a..eaf1a10b0245 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -885,6 +885,56 @@ msg_handled:
 	vmbus_signal_eom(msg, message_type);
 }
 
+
+/*
+ * Schedule all channels with events pending
+ */
+static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
+{
+	unsigned long *recv_int_page;
+	u32 maxbits, relid;
+
+	if (vmbus_proto_version < VERSION_WIN8) {
+		maxbits = MAX_NUM_CHANNELS_SUPPORTED;
+		recv_int_page = vmbus_connection.recv_int_page;
+	} else {
+		/*
+		 * When the host is win8 and beyond, the event page
+		 * can be directly checked to get the id of the channel
+		 * that has the interrupt pending.
+		 */
+		void *page_addr = hv_cpu->synic_event_page;
+		union hv_synic_event_flags *event
+			= (union hv_synic_event_flags *)page_addr +
+						 VMBUS_MESSAGE_SINT;
+
+		maxbits = HV_EVENT_FLAGS_COUNT;
+		recv_int_page = event->flags;
+	}
+
+	if (unlikely(!recv_int_page))
+		return;
+
+	for_each_set_bit(relid, recv_int_page, maxbits) {
+		struct vmbus_channel *channel;
+
+		if (!sync_test_and_clear_bit(relid, recv_int_page))
+			continue;
+
+		/* Special case - vmbus channel protocol msg */
+		if (relid == 0)
+			continue;
+
+		/* Find channel based on relid */
+		list_for_each_entry(channel, &hv_cpu->chan_list, percpu_list) {
+			if (channel->offermsg.child_relid == relid) {
+				tasklet_schedule(&channel->callback_event);
+				break;
+			}
+		}
+	}
+}
+
 static void vmbus_isr(void)
 {
 	struct hv_per_cpu_context *hv_cpu
@@ -922,8 +972,7 @@ static void vmbus_isr(void)
 	}
 
 	if (handled)
-		tasklet_schedule(&hv_cpu->event_dpc);
-
+		vmbus_chan_sched(hv_cpu);
 
 	page_addr = hv_cpu->synic_message_page;
 	msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
@@ -1536,12 +1585,7 @@ static void __exit vmbus_exit(void)
 						 &hyperv_panic_block);
 	}
 	bus_unregister(&hv_bus);
-	for_each_online_cpu(cpu) {
-		struct hv_per_cpu_context *hv_cpu
-			= per_cpu_ptr(hv_context.cpu_context, cpu);
 
-		tasklet_kill(&hv_cpu->event_dpc);
-	}
 	cpuhp_remove_state(hyperv_cpuhp_online);
 	hv_synic_free();
 	acpi_bus_unregister_driver(&vmbus_acpi_driver);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index c9b6d533958f..69afc9337c0d 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -35,7 +35,7 @@
 #include <linux/completion.h>
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
-
+#include <linux/interrupt.h>
 
 #define MAX_PAGE_BUFFER_COUNT				32
 #define MAX_MULTIPAGE_BUFFER_COUNT			32 /* 128K */
@@ -743,6 +743,7 @@ struct vmbus_channel {
 	struct vmbus_close_msg close_msg;
 
 	/* Channel callback's invoked in softirq context */
+	struct tasklet_struct callback_event;
 	void (*onchannel_callback)(void *context);
 	void *channel_callback_context;