diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index eb530b4128ba..2ed88122be93 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -565,6 +565,7 @@ bool "s390 guest support (EXPERIMENTAL)" depends on 64BIT && EXPERIMENTAL select VIRTIO select VIRTIO_RING + select VIRTIO_CONSOLE help Select this option if you want to run the kernel under s390 linux endmenu diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b358e18273b0..62122bad1e33 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -54,6 +54,7 @@ #include #include #include +#include long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | PSW_MASK_MCHECK | PSW_DEFAULT_KEY); @@ -766,7 +767,8 @@ setup_arch(char **cmdline_p) printk("We are running under VM (64 bit mode)\n"); else if (MACHINE_IS_KVM) { printk("We are running under KVM (64 bit mode)\n"); - add_preferred_console("ttyS", 1, NULL); + add_preferred_console("hvc", 0, NULL); + s390_virtio_console_init(); } else printk("We are running native (64 bit mode)\n"); #endif /* CONFIG_64BIT */ diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index dd7ea203f940..42251095134f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -196,6 +196,7 @@ static int virtblk_probe(struct virtio_device *vdev) int err; u64 cap; u32 v; + u32 blk_size; if (index_to_minor(index) >= 1 << MINORBITS) return -ENOSPC; @@ -290,6 +291,13 @@ static int virtblk_probe(struct virtio_device *vdev) if (!err) blk_queue_max_hw_segments(vblk->disk->queue, v); + /* Host can optionally specify the block size of the device */ + err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE, + offsetof(struct virtio_blk_config, blk_size), + &blk_size); + if (!err) + blk_queue_hardsect_size(vblk->disk->queue, blk_size); + add_disk(vblk->disk); return 0; @@ -330,7 +338,7 @@ static struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, - VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, + VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, }; static struct virtio_driver virtio_blk = { diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 67b07576f8bf..6c070dc5f2d4 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -578,11 +578,14 @@ config HVC_DRIVER It will automatically be selected if one of the back-end console drivers is selected. +config HVC_IRQ + bool config HVC_CONSOLE bool "pSeries Hypervisor Virtual Console support" depends on PPC_PSERIES select HVC_DRIVER + select HVC_IRQ help pSeries machines when partitioned support a hypervisor virtual console. This driver allows each pSeries partition to have a console @@ -593,6 +596,7 @@ config HVC_ISERIES depends on PPC_ISERIES default y select HVC_DRIVER + select HVC_IRQ help iSeries machines support a hypervisor virtual console. @@ -614,13 +618,18 @@ config HVC_XEN bool "Xen Hypervisor Console support" depends on XEN select HVC_DRIVER + select HVC_IRQ default y help Xen virtual console device driver config VIRTIO_CONSOLE - bool + tristate "Virtio console" + depends on VIRTIO select HVC_DRIVER + help + Virtio console for use with lguest and other hypervisors. + config HVCS tristate "IBM Hypervisor Virtual Console Server support" diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 4b6e736cfa02..eb02c3506800 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o obj-$(CONFIG_HVC_BEAT) += hvc_beat.o obj-$(CONFIG_HVC_DRIVER) += hvc_console.o +obj-$(CONFIG_HVC_IRQ) += hvc_irq.o obj-$(CONFIG_HVC_XEN) += hvc_xen.o obj-$(CONFIG_VIRTIO_CONSOLE) += virtio_console.o obj-$(CONFIG_RAW_DRIVER) += raw.o diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 2f9759d625cc..02aac104842d 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -75,23 +74,6 @@ static int hvc_init(void); static int sysrq_pressed; #endif -struct hvc_struct { - spinlock_t lock; - int index; - struct tty_struct *tty; - unsigned int count; - int do_wakeup; - char *outbuf; - int outbuf_size; - int n_outbuf; - uint32_t vtermno; - struct hv_ops *ops; - int irq_requested; - int irq; - struct list_head next; - struct kref kref; /* ref count & hvc_struct lifetime */ -}; - /* dynamic list of hvc_struct instances */ static LIST_HEAD(hvc_structs); @@ -298,27 +280,15 @@ int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops) return 0; } +EXPORT_SYMBOL_GPL(hvc_instantiate); /* Wake the sleeping khvcd */ -static void hvc_kick(void) +void hvc_kick(void) { hvc_kicked = 1; wake_up_process(hvc_task); } - -static int hvc_poll(struct hvc_struct *hp); - -/* - * NOTE: This API isn't used if the console adapter doesn't support interrupts. - * In this case the console is poll driven. - */ -static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance) -{ - /* if hvc_poll request a repoll, then kick the hvcd thread */ - if (hvc_poll(dev_instance)) - hvc_kick(); - return IRQ_HANDLED; -} +EXPORT_SYMBOL_GPL(hvc_kick); static void hvc_unthrottle(struct tty_struct *tty) { @@ -333,7 +303,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) { struct hvc_struct *hp; unsigned long flags; - int irq = 0; int rc = 0; /* Auto increments kref reference if found. */ @@ -352,18 +321,15 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) tty->low_latency = 1; /* Makes flushes to ldisc synchronous. */ hp->tty = tty; - /* Save for request_irq outside of spin_lock. */ - irq = hp->irq; - if (irq) - hp->irq_requested = 1; + + if (hp->ops->notifier_add) + rc = hp->ops->notifier_add(hp, hp->data); spin_unlock_irqrestore(&hp->lock, flags); - /* check error, fallback to non-irq */ - if (irq) - rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED, "hvc_console", hp); + /* - * If the request_irq() fails and we return an error. The tty layer + * If the notifier fails we return an error. The tty layer * will call hvc_close() after a failed open but we don't want to clean * up there so we'll clean up here and clear out the previously set * tty fields and return the kref reference. @@ -371,7 +337,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) if (rc) { spin_lock_irqsave(&hp->lock, flags); hp->tty = NULL; - hp->irq_requested = 0; spin_unlock_irqrestore(&hp->lock, flags); tty->driver_data = NULL; kref_put(&hp->kref, destroy_hvc_struct); @@ -386,7 +351,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) static void hvc_close(struct tty_struct *tty, struct file * filp) { struct hvc_struct *hp; - int irq = 0; unsigned long flags; if (tty_hung_up_p(filp)) @@ -404,9 +368,8 @@ static void hvc_close(struct tty_struct *tty, struct file * filp) spin_lock_irqsave(&hp->lock, flags); if (--hp->count == 0) { - if (hp->irq_requested) - irq = hp->irq; - hp->irq_requested = 0; + if (hp->ops->notifier_del) + hp->ops->notifier_del(hp, hp->data); /* We are done with the tty pointer now. */ hp->tty = NULL; @@ -418,10 +381,6 @@ static void hvc_close(struct tty_struct *tty, struct file * filp) * waking periodically to check chars_in_buffer(). */ tty_wait_until_sent(tty, HVC_CLOSE_WAIT); - - if (irq) - free_irq(irq, hp); - } else { if (hp->count < 0) printk(KERN_ERR "hvc_close %X: oops, count is %d\n", @@ -436,7 +395,6 @@ static void hvc_hangup(struct tty_struct *tty) { struct hvc_struct *hp = tty->driver_data; unsigned long flags; - int irq = 0; int temp_open_count; if (!hp) @@ -458,13 +416,12 @@ static void hvc_hangup(struct tty_struct *tty) hp->count = 0; hp->n_outbuf = 0; hp->tty = NULL; - if (hp->irq_requested) - /* Saved for use outside of spin_lock. */ - irq = hp->irq; - hp->irq_requested = 0; + + if (hp->ops->notifier_del) + hp->ops->notifier_del(hp, hp->data); + spin_unlock_irqrestore(&hp->lock, flags); - if (irq) - free_irq(irq, hp); + while(temp_open_count) { --temp_open_count; kref_put(&hp->kref, destroy_hvc_struct); @@ -575,7 +532,7 @@ static u32 timeout = MIN_TIMEOUT; #define HVC_POLL_READ 0x00000001 #define HVC_POLL_WRITE 0x00000002 -static int hvc_poll(struct hvc_struct *hp) +int hvc_poll(struct hvc_struct *hp) { struct tty_struct *tty; int i, n, poll_mask = 0; @@ -602,10 +559,10 @@ static int hvc_poll(struct hvc_struct *hp) if (test_bit(TTY_THROTTLED, &tty->flags)) goto throttled; - /* If we aren't interrupt driven and aren't throttled, we always + /* If we aren't notifier driven and aren't throttled, we always * request a reschedule */ - if (hp->irq == 0) + if (!hp->irq_requested) poll_mask |= HVC_POLL_READ; /* Read data if any */ @@ -674,6 +631,7 @@ static int hvc_poll(struct hvc_struct *hp) return poll_mask; } +EXPORT_SYMBOL_GPL(hvc_poll); /* * This kthread is either polling or interrupt driven. This is determined by @@ -733,7 +691,7 @@ static const struct tty_operations hvc_ops = { .chars_in_buffer = hvc_chars_in_buffer, }; -struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq, +struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data, struct hv_ops *ops, int outbuf_size) { struct hvc_struct *hp; @@ -754,7 +712,7 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq, memset(hp, 0x00, sizeof(*hp)); hp->vtermno = vtermno; - hp->irq = irq; + hp->data = data; hp->ops = ops; hp->outbuf_size = outbuf_size; hp->outbuf = &((char *)hp)[ALIGN(sizeof(*hp), sizeof(long))]; @@ -784,6 +742,7 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq, return hp; } +EXPORT_SYMBOL_GPL(hvc_alloc); int __devexit hvc_remove(struct hvc_struct *hp) { diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h index 42ffb17e15df..d9ce10915625 100644 --- a/drivers/char/hvc_console.h +++ b/drivers/char/hvc_console.h @@ -26,6 +26,7 @@ #ifndef HVC_CONSOLE_H #define HVC_CONSOLE_H +#include /* * This is the max number of console adapters that can/will be found as @@ -42,24 +43,50 @@ */ #define HVC_ALLOC_TTY_ADAPTERS 8 +struct hvc_struct { + spinlock_t lock; + int index; + struct tty_struct *tty; + unsigned int count; + int do_wakeup; + char *outbuf; + int outbuf_size; + int n_outbuf; + uint32_t vtermno; + struct hv_ops *ops; + int irq_requested; + int data; + struct list_head next; + struct kref kref; /* ref count & hvc_struct lifetime */ +}; /* implemented by a low level driver */ struct hv_ops { int (*get_chars)(uint32_t vtermno, char *buf, int count); int (*put_chars)(uint32_t vtermno, const char *buf, int count); -}; -struct hvc_struct; + /* Callbacks for notification. Called in open and close */ + int (*notifier_add)(struct hvc_struct *hp, int irq); + void (*notifier_del)(struct hvc_struct *hp, int irq); +}; /* Register a vterm and a slot index for use as a console (console_init) */ extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops); /* register a vterm for hvc tty operation (module_init or hotplug add) */ -extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq, +extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int data, struct hv_ops *ops, int outbuf_size); -/* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */ +/* remove a vterm from hvc tty operation (module_exit or hotplug remove) */ extern int __devexit hvc_remove(struct hvc_struct *hp); +/* data available */ +int hvc_poll(struct hvc_struct *hp); +void hvc_kick(void); + +/* default notifier for irq based notification */ +extern int notifier_add_irq(struct hvc_struct *hp, int data); +extern void notifier_del_irq(struct hvc_struct *hp, int data); + #if defined(CONFIG_XMON) && defined(CONFIG_SMP) #include diff --git a/drivers/char/hvc_irq.c b/drivers/char/hvc_irq.c new file mode 100644 index 000000000000..73a59cdb8947 --- /dev/null +++ b/drivers/char/hvc_irq.c @@ -0,0 +1,44 @@ +/* + * Copyright IBM Corp. 2001,2008 + * + * This file contains the IRQ specific code for hvc_console + * + */ + +#include + +#include "hvc_console.h" + +static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance) +{ + /* if hvc_poll request a repoll, then kick the hvcd thread */ + if (hvc_poll(dev_instance)) + hvc_kick(); + return IRQ_HANDLED; +} + +/* + * For IRQ based systems these callbacks can be used + */ +int notifier_add_irq(struct hvc_struct *hp, int irq) +{ + int rc; + + if (!irq) { + hp->irq_requested = 0; + return 0; + } + rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED, + "hvc_console", hp); + if (!rc) + hp->irq_requested = 1; + return rc; +} + +void notifier_del_irq(struct hvc_struct *hp, int irq) +{ + if (!irq) + return; + free_irq(irq, hp); + hp->irq_requested = 0; +} diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c index a08f8f981c11..b71c610fe5ae 100644 --- a/drivers/char/hvc_iseries.c +++ b/drivers/char/hvc_iseries.c @@ -200,6 +200,8 @@ done: static struct hv_ops hvc_get_put_ops = { .get_chars = get_chars, .put_chars = put_chars, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, }; static int __devinit hvc_vio_probe(struct vio_dev *vdev, diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c index 79711aa4b41d..93f3840c1682 100644 --- a/drivers/char/hvc_vio.c +++ b/drivers/char/hvc_vio.c @@ -80,6 +80,8 @@ static int filtered_get_chars(uint32_t vtermno, char *buf, int count) static struct hv_ops hvc_get_put_ops = { .get_chars = filtered_get_chars, .put_chars = hvc_put_chars, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, }; static int __devinit hvc_vio_probe(struct vio_dev *vdev, diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index db2ae4216279..6b70aa66a587 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -100,6 +100,8 @@ static int read_console(uint32_t vtermno, char *buf, int len) static struct hv_ops hvc_ops = { .get_chars = read_console, .put_chars = write_console, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, }; static int __init xen_init(void) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index dc17fe3a88bc..d0f4eb6fdb7f 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -46,6 +46,9 @@ static char *in, *inbuf; /* The operations for our console. */ static struct hv_ops virtio_cons; +/* The hvc device */ +static struct hvc_struct *hvc; + /*D:310 The put_chars() callback is pretty straightforward. * * We turn the characters into a scatter-gather list, add it to the output @@ -134,6 +137,27 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) return hvc_instantiate(0, 0, &virtio_cons); } +/* + * we support only one console, the hvc struct is a global var + * There is no need to do anything + */ +static int notifier_add_vio(struct hvc_struct *hp, int data) +{ + hp->irq_requested = 1; + return 0; +} + +static void notifier_del_vio(struct hvc_struct *hp, int data) +{ + hp->irq_requested = 0; +} + +static void hvc_handle_input(struct virtqueue *vq) +{ + if (hvc_poll(hvc)) + hvc_kick(); +} + /*D:370 Once we're further in boot, we get probed like any other virtio device. * At this stage we set up the output virtqueue. * @@ -144,7 +168,6 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) static int __devinit virtcons_probe(struct virtio_device *dev) { int err; - struct hvc_struct *hvc; vdev = dev; @@ -158,7 +181,7 @@ static int __devinit virtcons_probe(struct virtio_device *dev) /* Find the input queue. */ /* FIXME: This is why we want to wean off hvc: we do nothing * when input comes in. */ - in_vq = vdev->config->find_vq(vdev, 0, NULL); + in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input); if (IS_ERR(in_vq)) { err = PTR_ERR(in_vq); goto free; @@ -173,15 +196,18 @@ static int __devinit virtcons_probe(struct virtio_device *dev) /* Start using the new console output. */ virtio_cons.get_chars = get_chars; virtio_cons.put_chars = put_chars; + virtio_cons.notifier_add = notifier_add_vio; + virtio_cons.notifier_del = notifier_del_vio; /* The first argument of hvc_alloc() is the virtual console number, so - * we use zero. The second argument is the interrupt number; we - * currently leave this as zero: it would be better not to use the - * hvc mechanism and fix this (FIXME!). + * we use zero. The second argument is the parameter for the + * notification mechanism (like irq number). We currently leave this + * as zero, virtqueues have implicit notifications. * * The third argument is a "struct hv_ops" containing the put_chars() - * and get_chars() pointers. The final argument is the output buffer - * size: we can do any size, so we put PAGE_SIZE here. */ + * get_chars(), notifier_add() and notifier_del() pointers. + * The final argument is the output buffer size: we can do any size, + * so we put PAGE_SIZE here. */ hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE); if (IS_ERR(hvc)) { err = PTR_ERR(hvc); diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 1a8de57289eb..37344aaee22f 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -98,16 +98,20 @@ static u32 lg_get_features(struct virtio_device *vdev) return features; } -static void lg_set_features(struct virtio_device *vdev, u32 features) +static void lg_finalize_features(struct virtio_device *vdev) { - unsigned int i; + unsigned int i, bits; struct lguest_device_desc *desc = to_lgdev(vdev)->desc; /* Second half of bitmap is features we accept. */ u8 *out_features = lg_features(desc) + desc->feature_len; + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + memset(out_features, 0, desc->feature_len); - for (i = 0; i < min(desc->feature_len * 8, 32); i++) { - if (features & (1 << i)) + bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; + for (i = 0; i < bits; i++) { + if (test_bit(i, vdev->features)) out_features[i / 8] |= (1 << (i % 8)); } } @@ -297,7 +301,7 @@ static void lg_del_vq(struct virtqueue *vq) /* The ops structure which hooks everything together. */ static struct virtio_config_ops lguest_config_ops = { .get_features = lg_get_features, - .set_features = lg_set_features, + .finalize_features = lg_finalize_features, .get = lg_get, .set = lg_set, .get_status = lg_get_status, diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c28d7cb2035b..0196a0df9021 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -19,6 +19,7 @@ //#define DEBUG #include #include +#include #include #include #include @@ -54,9 +55,15 @@ struct virtnet_info struct tasklet_struct tasklet; bool free_in_tasklet; + /* I like... big packets and I cannot lie! */ + bool big_packets; + /* Receive & send queues. */ struct sk_buff_head recv; struct sk_buff_head send; + + /* Chain pages by the private ptr. */ + struct page *pages; }; static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb) @@ -69,6 +76,23 @@ static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb) sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr)); } +static void give_a_page(struct virtnet_info *vi, struct page *page) +{ + page->private = (unsigned long)vi->pages; + vi->pages = page; +} + +static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask) +{ + struct page *p = vi->pages; + + if (p) + vi->pages = (struct page *)p->private; + else + p = alloc_page(gfp_mask); + return p; +} + static void skb_xmit_done(struct virtqueue *svq) { struct virtnet_info *vi = svq->vdev->priv; @@ -88,6 +112,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, unsigned len) { struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); + int err; if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); @@ -95,10 +120,23 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, goto drop; } len -= sizeof(struct virtio_net_hdr); - BUG_ON(len > MAX_PACKET_LEN); - skb_trim(skb, len); + if (len <= MAX_PACKET_LEN) { + unsigned int i; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + give_a_page(dev->priv, skb_shinfo(skb)->frags[i].page); + skb->data_len = 0; + skb_shinfo(skb)->nr_frags = 0; + } + + err = pskb_trim(skb, len); + if (err) { + pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err); + dev->stats.rx_dropped++; + goto drop; + } + skb->truesize += skb->data_len; dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; @@ -160,7 +198,7 @@ static void try_fill_recv(struct virtnet_info *vi) { struct sk_buff *skb; struct scatterlist sg[2+MAX_SKB_FRAGS]; - int num, err; + int num, err, i; sg_init_table(sg, 2+MAX_SKB_FRAGS); for (;;) { @@ -170,6 +208,24 @@ static void try_fill_recv(struct virtnet_info *vi) skb_put(skb, MAX_PACKET_LEN); vnet_hdr_to_sg(sg, skb); + + if (vi->big_packets) { + for (i = 0; i < MAX_SKB_FRAGS; i++) { + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + f->page = get_a_page(vi, GFP_ATOMIC); + if (!f->page) + break; + + f->page_offset = 0; + f->size = PAGE_SIZE; + + skb->data_len += PAGE_SIZE; + skb->len += PAGE_SIZE; + + skb_shinfo(skb)->nr_frags++; + } + } + num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; skb_queue_head(&vi->recv, skb); @@ -335,16 +391,11 @@ again: free_old_xmit_skbs(vi); /* If we has a buffer left over from last time, send it now. */ - if (unlikely(vi->last_xmit_skb)) { - if (xmit_skb(vi, vi->last_xmit_skb) != 0) { - /* Drop this skb: we only queue one. */ - vi->dev->stats.tx_dropped++; - kfree_skb(skb); - skb = NULL; - goto stop_queue; - } - vi->last_xmit_skb = NULL; - } + if (unlikely(vi->last_xmit_skb) && + xmit_skb(vi, vi->last_xmit_skb) != 0) + goto stop_queue; + + vi->last_xmit_skb = NULL; /* Put new one in send queue and do transmit */ if (likely(skb)) { @@ -370,6 +421,11 @@ stop_queue: netif_start_queue(dev); goto again; } + if (skb) { + /* Drop this skb: we only queue one. */ + vi->dev->stats.tx_dropped++; + kfree_skb(skb); + } goto done; } @@ -408,6 +464,22 @@ static int virtnet_close(struct net_device *dev) return 0; } +static int virtnet_set_tx_csum(struct net_device *dev, u32 data) +{ + struct virtnet_info *vi = netdev_priv(dev); + struct virtio_device *vdev = vi->vdev; + + if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) + return -ENOSYS; + + return ethtool_op_set_tx_hw_csum(dev, data); +} + +static struct ethtool_ops virtnet_ethtool_ops = { + .set_tx_csum = virtnet_set_tx_csum, + .set_sg = ethtool_op_set_sg, +}; + static int virtnet_probe(struct virtio_device *vdev) { int err; @@ -427,6 +499,7 @@ static int virtnet_probe(struct virtio_device *vdev) #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = virtnet_netpoll; #endif + SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops); SET_NETDEV_DEV(dev, &vdev->dev); /* Do we support "hardware" checksums? */ @@ -462,11 +535,18 @@ static int virtnet_probe(struct virtio_device *vdev) vi->dev = dev; vi->vdev = vdev; vdev->priv = vi; + vi->pages = NULL; /* If they give us a callback when all buffers are done, we don't need * the timer. */ vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY); + /* If we can receive ANY GSO packets, we must allocate large ones. */ + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) + || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) + || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) + vi->big_packets = true; + /* We expect two virtqueues, receive then send. */ vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); if (IS_ERR(vi->rvq)) { @@ -541,6 +621,10 @@ static void virtnet_remove(struct virtio_device *vdev) vdev->config->del_vq(vi->svq); vdev->config->del_vq(vi->rvq); unregister_netdev(vi->dev); + + while (vi->pages) + __free_pages(get_a_page(vi, GFP_KERNEL), 0); + free_netdev(vi->dev); } @@ -553,7 +637,9 @@ static unsigned int features[] = { VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, - VIRTIO_NET_F_HOST_ECN, VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */ + VIRTIO_F_NOTIFY_ON_EMPTY, }; static struct virtio_driver virtio_net = { diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 5ab34340919b..79954bd6bfa5 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -87,16 +88,20 @@ static u32 kvm_get_features(struct virtio_device *vdev) return features; } -static void kvm_set_features(struct virtio_device *vdev, u32 features) +static void kvm_finalize_features(struct virtio_device *vdev) { - unsigned int i; + unsigned int i, bits; struct kvm_device_desc *desc = to_kvmdev(vdev)->desc; /* Second half of bitmap is features we accept. */ u8 *out_features = kvm_vq_features(desc) + desc->feature_len; + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + memset(out_features, 0, desc->feature_len); - for (i = 0; i < min(desc->feature_len * 8, 32); i++) { - if (features & (1 << i)) + bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; + for (i = 0; i < bits; i++) { + if (test_bit(i, vdev->features)) out_features[i / 8] |= (1 << (i % 8)); } } @@ -222,7 +227,7 @@ static void kvm_del_vq(struct virtqueue *vq) */ static struct virtio_config_ops kvm_vq_configspace_ops = { .get_features = kvm_get_features, - .set_features = kvm_set_features, + .finalize_features = kvm_finalize_features, .get = kvm_get, .set = kvm_set, .get_status = kvm_get_status, @@ -333,6 +338,25 @@ static int __init kvm_devices_init(void) return 0; } +/* code for early console output with virtio_console */ +static __init int early_put_chars(u32 vtermno, const char *buf, int count) +{ + char scratch[17]; + unsigned int len = count; + + if (len > sizeof(scratch) - 1) + len = sizeof(scratch) - 1; + scratch[len] = '\0'; + memcpy(scratch, buf, len); + kvm_hypercall1(KVM_S390_VIRTIO_NOTIFY, __pa(scratch)); + return len; +} + +void s390_virtio_console_init(void) +{ + virtio_cons_early_init(early_put_chars); +} + /* * We do this after core stuff, but before the drivers. */ diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 7084e7e146c0..5b78fd0aff0a 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -71,13 +71,6 @@ static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env) dev->id.device, dev->id.vendor); } -static struct bus_type virtio_bus = { - .name = "virtio", - .match = virtio_dev_match, - .dev_attrs = virtio_dev_attrs, - .uevent = virtio_uevent, -}; - static void add_status(struct virtio_device *dev, unsigned status) { dev->config->set_status(dev, dev->config->get_status(dev) | status); @@ -120,12 +113,16 @@ static int virtio_dev_probe(struct device *_d) set_bit(f, dev->features); } + /* Transport features always preserved to pass to finalize_features. */ + for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) + if (device_features & (1 << i)) + set_bit(i, dev->features); + err = drv->probe(dev); if (err) add_status(dev, VIRTIO_CONFIG_S_FAILED); else { - /* They should never have set feature bits beyond 32 */ - dev->config->set_features(dev, dev->features[0]); + dev->config->finalize_features(dev); add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); } return err; @@ -147,13 +144,20 @@ static int virtio_dev_remove(struct device *_d) return 0; } +static struct bus_type virtio_bus = { + .name = "virtio", + .match = virtio_dev_match, + .dev_attrs = virtio_dev_attrs, + .uevent = virtio_uevent, + .probe = virtio_dev_probe, + .remove = virtio_dev_remove, +}; + int register_virtio_driver(struct virtio_driver *driver) { /* Catch this early. */ BUG_ON(driver->feature_table_size && !driver->feature_table); driver->driver.bus = &virtio_bus; - driver->driver.probe = virtio_dev_probe; - driver->driver.remove = virtio_dev_remove; return driver_register(&driver->driver); } EXPORT_SYMBOL_GPL(register_virtio_driver); diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index eae7236310e4..c7dc37c7cce9 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -94,12 +94,17 @@ static u32 vp_get_features(struct virtio_device *vdev) return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); } -/* virtio config->set_features() implementation */ -static void vp_set_features(struct virtio_device *vdev, u32 features) +/* virtio config->finalize_features() implementation */ +static void vp_finalize_features(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - iowrite32(features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + + /* We only support 32 feature bits. */ + BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1); + iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES); } /* virtio config->get() implementation */ @@ -297,7 +302,7 @@ static struct virtio_config_ops virtio_pci_config_ops = { .find_vq = vp_find_vq, .del_vq = vp_del_vq, .get_features = vp_get_features, - .set_features = vp_set_features, + .finalize_features = vp_finalize_features, }; /* the PCI probing function */ diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 72bf8bc09014..6eb5303fed11 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -18,6 +18,7 @@ */ #include #include +#include #include #ifdef DEBUG @@ -87,8 +88,11 @@ static int vring_add_buf(struct virtqueue *_vq, if (vq->num_free < out + in) { pr_debug("Can't add buf len %i - avail = %i\n", out + in, vq->num_free); - /* We notify *even if* VRING_USED_F_NO_NOTIFY is set here. */ - vq->notify(&vq->vq); + /* FIXME: for historical reasons, we force a notify here if + * there are outgoing parts to the buffer. Presumably the + * host should service the ring ASAP. */ + if (out) + vq->notify(&vq->vq); END_USE(vq); return -ENOSPC; } @@ -320,4 +324,19 @@ void vring_del_virtqueue(struct virtqueue *vq) } EXPORT_SYMBOL_GPL(vring_del_virtqueue); +/* Manipulates transport-specific feature bits. */ +void vring_transport_features(struct virtio_device *vdev) +{ + unsigned int i; + + for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { + switch (i) { + default: + /* We don't understand this bit. */ + clear_bit(i, vdev->features); + } + } +} +EXPORT_SYMBOL_GPL(vring_transport_features); + MODULE_LICENSE("GPL"); diff --git a/include/asm-s390/kvm_virtio.h b/include/asm-s390/kvm_virtio.h index 5c871a990c29..146100224def 100644 --- a/include/asm-s390/kvm_virtio.h +++ b/include/asm-s390/kvm_virtio.h @@ -50,4 +50,14 @@ struct kvm_vqconfig { #define KVM_S390_VIRTIO_RESET 1 #define KVM_S390_VIRTIO_SET_STATUS 2 +#ifdef __KERNEL__ +/* early virtio console setup */ +#ifdef CONFIG_VIRTIO_CONSOLE +extern void s390_virtio_console_init(void); +#else +static inline void s390_virtio_console_init(void) +{ +} +#endif /* CONFIG_VIRTIO_CONSOLE */ +#endif /* __KERNEL__ */ #endif diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index 8eff0b53910b..b3c4a60ceeb3 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_9P_H #define _LINUX_VIRTIO_9P_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include /* The ID for virtio console */ diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index 979524ee75b7..c30c7bfbf39b 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_BALLOON_H #define _LINUX_VIRTIO_BALLOON_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include /* The ID for virtio_balloon */ diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 5f79a5f9de79..c1aef85243bf 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_BLK_H #define _LINUX_VIRTIO_BLK_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include /* The ID for virtio_block */ @@ -11,6 +13,7 @@ #define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */ #define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */ #define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ +#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ struct virtio_blk_config { @@ -26,6 +29,8 @@ struct virtio_blk_config __u8 heads; __u8 sectors; } geometry; + /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */ + __u32 blk_size; } __attribute__((packed)); /* These two define direction. */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index f364bbf63c34..bf8ec283b232 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -1,5 +1,8 @@ #ifndef _LINUX_VIRTIO_CONFIG_H #define _LINUX_VIRTIO_CONFIG_H +/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers. */ + /* Virtio devices use a standardized configuration space to define their * features and pass configuration information, but each implementation can * store and access that space differently. */ @@ -15,6 +18,12 @@ /* We've given up on this device. */ #define VIRTIO_CONFIG_S_FAILED 0x80 +/* Some virtio feature bits (currently bits 28 through 31) are reserved for the + * transport being used (eg. virtio_ring), the rest are per-device feature + * bits. */ +#define VIRTIO_TRANSPORT_F_START 28 +#define VIRTIO_TRANSPORT_F_END 32 + /* Do we get callbacks when the ring is completely used, even if we've * suppressed them? */ #define VIRTIO_F_NOTIFY_ON_EMPTY 24 @@ -52,9 +61,10 @@ * @get_features: get the array of feature bits for this device. * vdev: the virtio_device * Returns the first 32 feature bits (all we currently need). - * @set_features: confirm what device features we'll be using. + * @finalize_features: confirm what device features we'll be using. * vdev: the virtio_device - * feature: the first 32 feature bits + * This gives the final feature bits for the device: it can change + * the dev->feature bits if it wants. */ struct virtio_config_ops { @@ -70,7 +80,7 @@ struct virtio_config_ops void (*callback)(struct virtqueue *)); void (*del_vq)(struct virtqueue *vq); u32 (*get_features)(struct virtio_device *vdev); - void (*set_features)(struct virtio_device *vdev, u32 features); + void (*finalize_features)(struct virtio_device *vdev); }; /* If driver didn't advertise the feature, it will never appear. */ diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index ed2d4ead7eb7..19a0da0dba41 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -1,6 +1,8 @@ #ifndef _LINUX_VIRTIO_CONSOLE_H #define _LINUX_VIRTIO_CONSOLE_H #include +/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers. */ /* The ID for virtio console */ #define VIRTIO_ID_CONSOLE 3 diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 38c0571820fb..5e33761b9b8a 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_NET_H #define _LINUX_VIRTIO_NET_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include /* The ID for virtio_net */ diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index b3151659cf49..cdef35742932 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -9,9 +9,8 @@ * Authors: * Anthony Liguori * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #ifndef _LINUX_VIRTIO_PCI_H diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index abe481ed990e..c4a598fb3826 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -120,6 +120,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, void (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq)); void vring_del_virtqueue(struct virtqueue *vq); +/* Filter out transport-specific feature bits. */ +void vring_transport_features(struct virtio_device *vdev); irqreturn_t vring_interrupt(int irq, void *_vq); #endif /* __KERNEL__ */ diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h index 331afb6c9f62..1a85dab8a940 100644 --- a/include/linux/virtio_rng.h +++ b/include/linux/virtio_rng.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_RNG_H #define _LINUX_VIRTIO_RNG_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include /* The ID for virtio_rng */