rpms/kernel/F-9 linux-2.6-interface-to-query-tun-tap-features.patch, NONE, 1.1.2.1 linux-2.6-partial-checksum-and-gso-support-for-tun-tap.patch, NONE, 1.1.2.1 linux-2.6-tun-fix-gso-mapping.patch, NONE, 1.1.2.1 linux-2.6-virtio-fix-scatterlist-sizing-in-net-driver.patch, NONE, 1.1.2.1 linux-2.6-virtio-net-add-ethtool-ops-for-sg-gso.patch, NONE, 1.1.2.1 linux-2.6-virtio-net-allow-receiving-sg-packets.patch, NONE, 1.1.2.1 kernel.spec, 1.648, 1.648.2.1

Mark McLoughlin (markmc) fedora-extras-commits at redhat.com
Tue May 20 13:50:21 UTC 2008


Author: markmc

Update of /cvs/pkgs/rpms/kernel/F-9
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv26112

Modified Files:
      Tag: private-markmc-virtio-gso-branch
	kernel.spec 
Added Files:
      Tag: private-markmc-virtio-gso-branch
	linux-2.6-interface-to-query-tun-tap-features.patch 
	linux-2.6-partial-checksum-and-gso-support-for-tun-tap.patch 
	linux-2.6-tun-fix-gso-mapping.patch 
	linux-2.6-virtio-fix-scatterlist-sizing-in-net-driver.patch 
	linux-2.6-virtio-net-add-ethtool-ops-for-sg-gso.patch 
	linux-2.6-virtio-net-allow-receiving-sg-packets.patch 
Log Message:
* Tue May 20 2008 Mark McLoughlin <markmc at redhat.com> - 2.6.25.4-29.1.virtio_gso
- virtio_net/tun: add GSO patches


linux-2.6-interface-to-query-tun-tap-features.patch:

--- NEW FILE linux-2.6-interface-to-query-tun-tap-features.patch ---
>From 83b8d1cb58e6ab169b46d2d0891d24f364902b75 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty at rustcorp.com.au>
Date: Thu, 24 Jan 2008 01:14:18 +1100
Subject: [PATCH 5/6] Interface to query tun/tap features.

(No real change, just updated with new bits)

The problem with introducing IFF_RECV_CSUM and IFF_RECV_GSO is that
they need to set dev->features to enable GSO and/or checksumming,
which is supposed to be done before register_netdevice(), ie. as part
of TUNSETIFF.

Unfortunately, TUNSETIFF has always just ignored flags it doesn't understand,
so there's no good way of detecting whether the kernel supports IFF_GSO_HDR.

This patch implements a TUNGETFEATURES ioctl which returns all the valid IFF
flags.  It could be extended later to include other features.

Here's an example program which uses it:

#include <linux/if_tun.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <err.h>
#include <stdio.h>

static struct {
	unsigned int flag;
	const char *name;
} known_flags[] = {
	{ IFF_TUN, "TUN" },
	{ IFF_TAP, "TAP" },
	{ IFF_NO_PI, "NO_PI" },
	{ IFF_ONE_QUEUE, "ONE_QUEUE" },
	{ IFF_VIRTIO_HDR, "VIRTIO_HDR" },
	{ IFF_RECV_CSUM, "RECV_CSUM" },
	{ IFF_RECV_GSO, "RECV_GSO" },
};

int main()
{
	unsigned int features, i;

	int netfd = open("/dev/net/tun", O_RDWR);
	if (netfd < 0)
		err(1, "Opening /dev/net/tun");

	if (ioctl(netfd, TUNGETFEATURES, &features) != 0) {
		printf("Kernel does not support TUNGETFEATURES, guessing\n");
		features = (IFF_TUN|IFF_TAP|IFF_NO_PI|IFF_ONE_QUEUE);
	}
	printf("Available features are: ");
	for (i = 0; i < sizeof(known_flags)/sizeof(known_flags[0]); i++) {
		if (features & known_flags[i].flag) {
			features &= ~known_flags[i].flag;
			printf("%s ", known_flags[i].name);
		}
	}
	if (features)
		printf("(UNKNOWN %#x)", features);
	printf("\n");
	return 0;
}

Signed-off-by: Rusty Russell <rusty at rustcorp.com.au>
---
 drivers/net/tun.c      |    9 +++++++++
 include/linux/if_tun.h |    3 +++
 2 files changed, 12 insertions(+), 0 deletions(-)

Index: linux-2.6.25.noarch/drivers/net/tun.c
===================================================================
--- linux-2.6.25.noarch.orig/drivers/net/tun.c	2008-05-20 14:31:40.000000000 +0100
+++ linux-2.6.25.noarch.orig/drivers/net/tun.c	2008-05-20 14:31:40.000000000 +0100
@@ -836,6 +836,15 @@ static int tun_chr_ioctl(struct inode *i
 		return 0;
 	}
 
+	if (cmd == TUNGETFEATURES) {
+		/* Currently this just means: "what IFF flags are valid?".
+		 * This is needed because we never checked for invalid flags on
+		 * TUNSETIFF.  This was introduced with IFF_GSO_HDR, so if a
+		 * kernel doesn't have this ioctl, it doesn't have GSO header
+		 * support. */
+		return put_user(IFF_ALL_FLAGS, (unsigned int __user*)argp);
+	}
+
 	if (!tun)
 		return -EBADFD;
 
Index: linux-2.6.25.noarch/include/linux/if_tun.h
===================================================================
--- linux-2.6.25.noarch.orig/include/linux/if_tun.h	2008-05-20 14:31:40.000000000 +0100
+++ linux-2.6.25.noarch.orig/include/linux/if_tun.h	2008-05-20 14:31:40.000000000 +0100
@@ -45,6 +45,7 @@
 #define TUNSETOWNER   _IOW('T', 204, int)
 #define TUNSETLINK    _IOW('T', 205, int)
 #define TUNSETGROUP   _IOW('T', 206, int)
+#define TUNGETFEATURES _IOR('T', 207, unsigned int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
@@ -54,6 +55,8 @@
 #define IFF_VIRTIO_HDR	0x4000
 #define IFF_RECV_CSUM	0x8000
 #define IFF_RECV_GSO	0x0800
+#define IFF_ALL_FLAGS (IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | \
+		       IFF_VIRTIO_HDR | IFF_RECV_CSUM | IFF_RECV_GSO)
 
 struct tun_pi {
 	unsigned short flags;

linux-2.6-partial-checksum-and-gso-support-for-tun-tap.patch:

--- NEW FILE linux-2.6-partial-checksum-and-gso-support-for-tun-tap.patch ---
>From e9f1629218e7f434c8e8738faea4558506d67faa Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty at rustcorp.com.au>
Date: Thu, 24 Jan 2008 01:10:44 +1100
Subject: [PATCH 4/6] partial checksum and GSO support for tun/tap.

(Changes since last time: we how have explicit IFF_RECV_CSUM and
IFF_RECV_GSO bits, and some renaming of virtio_net hdr)

We use the virtio_net_hdr: it is an ABI already and designed to
encapsulate such metadata as GSO and partial checksums.

IFF_VIRTIO_HDR means you will write and read a 'struct virtio_net_hdr'
at the start of each packet.  You can always write packets with
partial checksum and gso to the tap device using this header.

IFF_RECV_CSUM means you can handle reading packets with partial
checksums.  If IFF_RECV_GSO is also set, it means you can handle
reading (all types of) GSO packets.

Note that there is no easy way to detect if these flags are supported:
see next patch.

Signed-off-by: Rusty Russell <rusty at rustcorp.com.au>
---
 drivers/net/tun.c      |  252 +++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/if_tun.h |    6 +
 2 files changed, 243 insertions(+), 15 deletions(-)

Index: linux-2.6.25.noarch/drivers/net/tun.c
===================================================================
--- linux-2.6.25.noarch.orig/drivers/net/tun.c	2008-04-17 03:49:44.000000000 +0100
+++ linux-2.6.25.noarch.orig/drivers/net/tun.c	2008-04-17 03:49:44.000000000 +0100
@@ -62,6 +62,7 @@
 #include <linux/if_ether.h>
 #include <linux/if_tun.h>
 #include <linux/crc32.h>
+#include <linux/virtio_net.h>
 #include <net/net_namespace.h>
 
 #include <asm/system.h>
@@ -271,20 +272,176 @@ static unsigned int tun_chr_poll(struct 
 	return mask;
 }
 
+static struct sk_buff *copy_user_skb(size_t align, struct iovec *iv, size_t len)
+{
+	struct sk_buff *skb;
+
+	if (!(skb = alloc_skb(len + align, GFP_KERNEL)))
+		return ERR_PTR(-ENOMEM);
+
+	if (align)
+		skb_reserve(skb, align);
+
+	if (memcpy_fromiovec(skb_put(skb, len), iv, len)) {
+		kfree_skb(skb);
+		return ERR_PTR(-EFAULT);
+	}
+	return skb;
+}
+
+/* This will fail if they give us a crazy iovec, but that's their own fault. */
+static int get_user_skb_frags(const struct iovec *iv, size_t count,
+			      struct skb_frag_struct *f)
+{
+	unsigned int i, j, num_pg = 0;
+	int err;
+	struct page *pages[MAX_SKB_FRAGS];
+
+	down_read(&current->mm->mmap_sem);
+	for (i = 0; i < count; i++) {
+		int n, npages;
+		unsigned long base, len;
+		base = (unsigned long)iv[i].iov_base;
+		len = (unsigned long)iv[i].iov_len;
+
+		if (len == 0)
+			continue;
+
+		/* How many pages will this take? */
+		npages = 1 + (base + len - 1)/PAGE_SIZE - base/PAGE_SIZE;
+		if (unlikely(num_pg + npages > MAX_SKB_FRAGS)) {
+			err = -ENOSPC;
+			goto fail;
+		}
+		n = get_user_pages(current, current->mm, base, npages,
+				   0, 0, pages, NULL);
+		if (unlikely(n < 0)) {
+			err = n;
+			goto fail;
+		}
+
+		/* Transfer pages to the frag array */
+		for (j = 0; j < n; j++) {
+			f[num_pg].page = pages[j];
+			if (j == 0) {
+				f[num_pg].page_offset = offset_in_page(base);
+				f[num_pg].size = min(len, PAGE_SIZE -
+						     f[num_pg].page_offset);
+			} else {
+				f[num_pg].page_offset = 0;
+				f[num_pg].size = min(len, PAGE_SIZE);
+			}
+			len -= f[num_pg].size;
+			base += f[num_pg].size;
+			num_pg++;
+		}
+
+		if (unlikely(n != npages)) {
+			err = -EFAULT;
+			goto fail;
+		}
+	}
+	up_read(&current->mm->mmap_sem);
+	return num_pg;
+
+fail:
+	for (i = 0; i < num_pg; i++)
+		put_page(f[i].page);
+	up_read(&current->mm->mmap_sem);
+	return err;
+}
+
+
+static struct sk_buff *map_user_skb(const struct virtio_net_hdr *gso,
+				    size_t align, struct iovec *iv,
+				    size_t count, size_t len)
+{
+	struct sk_buff *skb;
+	struct skb_shared_info *sinfo;
+	int err;
+
+	if (!(skb = alloc_skb(gso->hdr_len + align, GFP_KERNEL)))
+		return ERR_PTR(-ENOMEM);
+
+	if (align)
+		skb_reserve(skb, align);
+
+	sinfo = skb_shinfo(skb);
+	sinfo->gso_size = gso->gso_size;
+	sinfo->gso_type = SKB_GSO_DODGY;
+	switch (gso->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+	case VIRTIO_NET_HDR_GSO_TCPV4:
+		sinfo->gso_type |= SKB_GSO_TCPV4;
+		break;
+	case VIRTIO_NET_HDR_GSO_TCPV6:
+		sinfo->gso_type |= SKB_GSO_TCPV6;
+		break;
+	case VIRTIO_NET_HDR_GSO_UDP:
+		sinfo->gso_type |= SKB_GSO_UDP;
+		break;
+	default:
+		err = -EINVAL;
+		goto fail;
+	}
+
+	if (gso->gso_type & VIRTIO_NET_HDR_GSO_ECN)
+		skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+
+	/* Copy in the header. */
+	if (memcpy_fromiovec(skb_put(skb, gso->hdr_len), iv, gso->hdr_len)) {
+		err = -EFAULT;
+		goto fail;
+	}
+
+	err = get_user_skb_frags(iv, count, sinfo->frags);
+	if (err < 0)
+		goto fail;
+
+	sinfo->nr_frags = err;
+	skb->len += len;
+	skb->data_len += len;
+	
+	return skb;
+
+fail:
+	kfree_skb(skb);
+	return ERR_PTR(err);
+}
+
+static inline size_t iov_total(const struct iovec *iv, unsigned long count)
+{
+	unsigned long i;
+	size_t len;
+
+	for (i = 0, len = 0; i < count; i++)
+		len += iv[i].iov_len;
+
+	return len;
+}
+
 /* Get packet from user space buffer */
-static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count)
+static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t num)
 {
 	struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };
+	struct virtio_net_hdr gso = { 0, VIRTIO_NET_HDR_GSO_NONE };
 	struct sk_buff *skb;
-	size_t len = count, align = 0;
+	size_t tot_len = iov_total(iv, num);
+	size_t len = tot_len, align = 0;
 
 	if (!(tun->flags & TUN_NO_PI)) {
-		if ((len -= sizeof(pi)) > count)
+		if ((len -= sizeof(pi)) > tot_len)
 			return -EINVAL;
 
 		if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
 			return -EFAULT;
 	}
+	if (tun->flags & TUN_VIRTIO_HDR) {
+		if ((len -= sizeof(gso)) > tot_len)
+			return -EINVAL;
+
+		if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso)))
+			return -EFAULT;
+	}
 
 	if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
 		align = NET_IP_ALIGN;
@@ -292,17 +449,14 @@ static __inline__ ssize_t tun_get_user(s
 			return -EINVAL;
 	}
 
-	if (!(skb = alloc_skb(len + align, GFP_KERNEL))) {
-		tun->dev->stats.rx_dropped++;
-		return -ENOMEM;
-	}
+	if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE)
+		skb = map_user_skb(&gso, align, iv, num, len);
+	else
+		skb = copy_user_skb(align, iv, len);
 
-	if (align)
-		skb_reserve(skb, align);
-	if (memcpy_fromiovec(skb_put(skb, len), iv, len)) {
+	if (IS_ERR(skb)) {
 		tun->dev->stats.rx_dropped++;
-		kfree_skb(skb);
-		return -EFAULT;
+		return PTR_ERR(skb);
 	}
 
 	switch (tun->flags & TUN_TYPE_MASK) {
@@ -316,7 +470,13 @@ static __inline__ ssize_t tun_get_user(s
 		break;
 	};
 
-	if (tun->flags & TUN_NOCHECKSUM)
+	if (gso.flags & (1 << VIRTIO_NET_F_CSUM)) {
+		if (!skb_partial_csum_set(skb,gso.csum_start,gso.csum_offset)) {
+			tun->dev->stats.rx_dropped++;
+			kfree_skb(skb);
+			return -EINVAL;
+		}
+	} else if (tun->flags & TUN_NOCHECKSUM)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	netif_rx_ni(skb);
@@ -325,7 +485,7 @@ static __inline__ ssize_t tun_get_user(s
 	tun->dev->stats.rx_packets++;
 	tun->dev->stats.rx_bytes += len;
 
-	return count;
+	return tot_len;
 }
 
 static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
@@ -338,7 +498,7 @@ static ssize_t tun_chr_aio_write(struct 
 
 	DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count);
 
-	return tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count));
+	return tun_get_user(tun, (struct iovec *) iv, count);
 }
 
 /* Put packet to the user space buffer */
@@ -362,6 +522,42 @@ static __inline__ ssize_t tun_put_user(s
 			return -EFAULT;
 		total += sizeof(pi);
 	}
+	if (tun->flags & TUN_VIRTIO_HDR) {
+		struct virtio_net_hdr gso;
+		struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+		if (skb_is_gso(skb)) {
+			gso.hdr_len = skb_transport_header(skb) - skb->data;
+			gso.gso_size = sinfo->gso_size;
+			if (sinfo->gso_type & SKB_GSO_TCPV4)
+				gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+			else if (sinfo->gso_type & SKB_GSO_TCPV6)
+				gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+			else if (sinfo->gso_type & SKB_GSO_UDP)
+				gso.gso_type = VIRTIO_NET_HDR_GSO_UDP;
+			else
+				BUG();
+			if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+				gso.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
+		} else
+			gso.gso_type = VIRTIO_NET_HDR_GSO_NONE;
+		
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+			gso.csum_start = skb->csum_start - skb_headroom(skb);
+			gso.csum_offset = skb->csum_offset;
+		} else {
+			gso.flags = 0;
+			gso.csum_offset = gso.csum_start = 0;
+		}
+
+		if ((len -= sizeof(gso)) < 0)
+			return -EINVAL;
+
+		if (memcpy_toiovec(iv, (void *)&gso, sizeof(gso)))
+			return -EFAULT;
+		total += sizeof(gso);
+	}
 
 	len = min_t(int, skb->len, len);
 
@@ -548,6 +744,17 @@ static int tun_set_iff(struct file *file
 
 		tun_net_init(dev);
 
+		/* Virtio header means we can handle csum & gso. */
+		if ((ifr->ifr_flags & (IFF_VIRTIO_HDR|IFF_RECV_CSUM)) ==
+		    (IFF_VIRTIO_HDR|IFF_RECV_CSUM)) {
+			dev->features = NETIF_F_SG | NETIF_F_HW_CSUM |
+					NETIF_F_HIGHDMA | NETIF_F_FRAGLIST;
+
+			if (ifr->ifr_flags & IFF_RECV_GSO)
+				dev->features |= NETIF_F_TSO | NETIF_F_UFO |
+						 NETIF_F_TSO_ECN | NETIF_F_TSO6;
+		}
+
 		if (strchr(dev->name, '%')) {
 			err = dev_alloc_name(dev, dev->name);
 			if (err < 0)
@@ -573,6 +780,21 @@ static int tun_set_iff(struct file *file
 	else
 		tun->flags &= ~TUN_ONE_QUEUE;
 
+	if (ifr->ifr_flags & IFF_VIRTIO_HDR)
+		tun->flags |= TUN_VIRTIO_HDR;
+	else
+		tun->flags &= ~TUN_VIRTIO_HDR;
+
+	if (ifr->ifr_flags & IFF_RECV_CSUM)
+		tun->flags |= TUN_RECV_CSUM;
+	else
+		tun->flags &= ~TUN_RECV_CSUM;
+
+	if (ifr->ifr_flags & IFF_RECV_GSO)
+		tun->flags |= TUN_RECV_GSO;
+	else
+		tun->flags &= ~TUN_RECV_GSO;
+
 	file->private_data = tun;
 	tun->attached = 1;
 
Index: linux-2.6.25.noarch/include/linux/if_tun.h
===================================================================
--- linux-2.6.25.noarch.orig/include/linux/if_tun.h	2008-04-17 03:49:44.000000000 +0100
+++ linux-2.6.25.noarch.orig/include/linux/if_tun.h	2008-04-17 03:49:44.000000000 +0100
@@ -33,6 +33,9 @@
 #define TUN_NO_PI	0x0040
 #define TUN_ONE_QUEUE	0x0080
 #define TUN_PERSIST 	0x0100	
+#define TUN_VIRTIO_HDR	0x0200
+#define TUN_RECV_CSUM	0x0400
+#define TUN_RECV_GSO	0x0400
 
 /* Ioctl defines */
 #define TUNSETNOCSUM  _IOW('T', 200, int) 
@@ -48,6 +51,9 @@
 #define IFF_TAP		0x0002
 #define IFF_NO_PI	0x1000
 #define IFF_ONE_QUEUE	0x2000
+#define IFF_VIRTIO_HDR	0x4000
+#define IFF_RECV_CSUM	0x8000
+#define IFF_RECV_GSO	0x0800
 
 struct tun_pi {
 	unsigned short flags;

linux-2.6-tun-fix-gso-mapping.patch:

--- NEW FILE linux-2.6-tun-fix-gso-mapping.patch ---
>From 818d0b0282fb4f49bdcf33b70f422e1209c33bab Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert at gondor.apana.org.au>
Date: Fri, 18 Apr 2008 11:17:22 +0800
Subject: [PATCH 6/6] tun: Fix GSO mapping

This patch avoids the correctness issue on the user-space mapping
by just copying the memory.
---
 drivers/net/tun.c |   93 ++++++++++++++++++++++++-----------------------------
 1 files changed, 42 insertions(+), 51 deletions(-)

Index: linux-2.6.25.noarch/drivers/net/tun.c
===================================================================
--- linux-2.6.25.noarch.orig/drivers/net/tun.c	2008-05-20 14:31:50.000000000 +0100
+++ linux-2.6.25.noarch.orig/drivers/net/tun.c	2008-05-20 14:31:50.000000000 +0100
@@ -62,6 +62,7 @@
 #include <linux/if_ether.h>
 #include <linux/if_tun.h>
 #include <linux/crc32.h>
+#include <linux/highmem.h>
 #include <linux/virtio_net.h>
 #include <net/net_namespace.h>
 
@@ -290,65 +291,55 @@ static struct sk_buff *copy_user_skb(siz
 }
 
 /* This will fail if they give us a crazy iovec, but that's their own fault. */
-static int get_user_skb_frags(const struct iovec *iv, size_t count,
-			      struct skb_frag_struct *f)
+static int get_user_skb_frags(struct iovec *iv, size_t count,
+			      struct skb_shared_info *sinfo)
 {
-	unsigned int i, j, num_pg = 0;
+	struct skb_frag_struct *f = sinfo->frags;
+	unsigned int i;
 	int err;
-	struct page *pages[MAX_SKB_FRAGS];
 
-	down_read(&current->mm->mmap_sem);
-	for (i = 0; i < count; i++) {
-		int n, npages;
-		unsigned long base, len;
-		base = (unsigned long)iv[i].iov_base;
-		len = (unsigned long)iv[i].iov_len;
-
-		if (len == 0)
-			continue;
+	f->page = NULL;
 
-		/* How many pages will this take? */
-		npages = 1 + (base + len - 1)/PAGE_SIZE - base/PAGE_SIZE;
-		if (unlikely(num_pg + npages > MAX_SKB_FRAGS)) {
-			err = -ENOSPC;
-			goto fail;
-		}
-		n = get_user_pages(current, current->mm, base, npages,
-				   0, 0, pages, NULL);
-		if (unlikely(n < 0)) {
-			err = n;
-			goto fail;
-		}
+	for (i = 0; i < count; i++) {
+		unsigned int len = iv[i].iov_len;
 
-		/* Transfer pages to the frag array */
-		for (j = 0; j < n; j++) {
-			f[num_pg].page = pages[j];
-			if (j == 0) {
-				f[num_pg].page_offset = offset_in_page(base);
-				f[num_pg].size = min(len, PAGE_SIZE -
-						     f[num_pg].page_offset);
-			} else {
-				f[num_pg].page_offset = 0;
-				f[num_pg].size = min(len, PAGE_SIZE);
+		while (len) {
+			void *virt;
+			unsigned int copy;
+
+			if (!f->page) {
+				f->page = alloc_page(GFP_KERNEL |
+						     __GFP_HIGHMEM);
+				if (!f->page)
+					return -ENOMEM;
+
+				f->page_offset = 0;
+				f->size = 0;
+				sinfo->nr_frags++;
 			}
-			len -= f[num_pg].size;
-			base += f[num_pg].size;
-			num_pg++;
-		}
 
-		if (unlikely(n != npages)) {
-			err = -EFAULT;
-			goto fail;
+			copy = PAGE_SIZE - f->size;
+			if (copy > len)
+				copy = len;
+
+			virt = kmap_atomic(f->page, KM_USER0);
+			err = memcpy_fromiovec(virt + f->size, iv, copy);
+			kunmap_atomic(virt, KM_USER0);
+
+			if (err)
+				return err;
+
+			f->size += copy;
+			if (f->size == PAGE_SIZE) {
+				if (sinfo->nr_frags >= MAX_SKB_FRAGS)
+					return -EMSGSIZE;
+				(++f)->page = NULL;
+			}
+			len -= copy;
 		}
 	}
-	up_read(&current->mm->mmap_sem);
-	return num_pg;
 
-fail:
-	for (i = 0; i < num_pg; i++)
-		put_page(f[i].page);
-	up_read(&current->mm->mmap_sem);
-	return err;
+	return 0;
 }
 
 
@@ -393,13 +384,13 @@ static struct sk_buff *map_user_skb(cons
 		goto fail;
 	}
 
-	err = get_user_skb_frags(iv, count, sinfo->frags);
+	err = get_user_skb_frags(iv, count, sinfo);
 	if (err < 0)
 		goto fail;
 
-	sinfo->nr_frags = err;
 	skb->len += len;
 	skb->data_len += len;
+	skb->truesize += len;
 	
 	return skb;
 

linux-2.6-virtio-fix-scatterlist-sizing-in-net-driver.patch:

--- NEW FILE linux-2.6-virtio-fix-scatterlist-sizing-in-net-driver.patch ---
>From f63084fc590d8b5563e10e8f9972519a1ee6dc5c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty at rustcorp.com.au>
Date: Fri, 2 May 2008 21:50:45 -0500
Subject: [PATCH 2/6] virtio: fix scatterlist sizing in net driver.

Herbert Xu points out (within another patch) that my scatterlists are
too short: one entry for the gso header, one for the skb->data, and
MAX_SKB_FRAGS for all the fragments.

Fix both xmit and recv sides (recv currently unused, coming in later
patch).

Signed-off-by: Rusty Russell <rusty at rustcorp.com.au>
Signed-off-by: Mark McLoughlin <markmc at redhat.com>
---
 drivers/net/virtio_net.c |    8 ++++----
 1 files changed, 4 insertions(+), 4 deletions(-)

Index: linux-2.6.25.noarch/drivers/net/virtio_net.c
===================================================================
--- linux-2.6.25.noarch.orig/drivers/net/virtio_net.c	2008-05-20 14:31:24.000000000 +0100
+++ linux-2.6.25.noarch.orig/drivers/net/virtio_net.c	2008-05-20 14:31:24.000000000 +0100
@@ -145,10 +145,10 @@ drop:
 static void try_fill_recv(struct virtnet_info *vi)
 {
 	struct sk_buff *skb;
-	struct scatterlist sg[1+MAX_SKB_FRAGS];
+	struct scatterlist sg[2+MAX_SKB_FRAGS];
 	int num, err;
 
-	sg_init_table(sg, 1+MAX_SKB_FRAGS);
+	sg_init_table(sg, 2+MAX_SKB_FRAGS);
 	for (;;) {
 		skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN);
 		if (unlikely(!skb))
@@ -243,11 +243,11 @@ static int start_xmit(struct sk_buff *sk
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	int num, err;
-	struct scatterlist sg[1+MAX_SKB_FRAGS];
+	struct scatterlist sg[2+MAX_SKB_FRAGS];
 	struct virtio_net_hdr *hdr;
 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
 
-	sg_init_table(sg, 1+MAX_SKB_FRAGS);
+	sg_init_table(sg, 2+MAX_SKB_FRAGS);
 
 	pr_debug("%s: xmit %p " MAC_FMT "\n", dev->name, skb,
 		 dest[0], dest[1], dest[2],

linux-2.6-virtio-net-add-ethtool-ops-for-sg-gso.patch:

--- NEW FILE linux-2.6-virtio-net-add-ethtool-ops-for-sg-gso.patch ---
>From 4708964290760097f0c91fdf80c95c2355b14be7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert at gondor.apana.org.au>
Date: Fri, 18 Apr 2008 11:21:42 +0800
Subject: [PATCH 1/6] virtio_net: Add ethtool ops for SG/GSO

This patch adds some basic ethtool operations to virtio_net so
I could test SG without GSO (which was really useful because TSO
turned out to be buggy :)

Signed-off-by: Rusty Russell <rusty at rustcorp.com.au> (remove MTU setting)
Signed-off-by: Mark McLoughlin <markmc at redhat.com>
---
 drivers/net/virtio_net.c |   19 +++++++++++++++++++
 1 files changed, 19 insertions(+), 0 deletions(-)

Index: linux-2.6.25.noarch/drivers/net/virtio_net.c
===================================================================
--- linux-2.6.25.noarch.orig/drivers/net/virtio_net.c	2008-05-20 14:24:01.000000000 +0100
+++ linux-2.6.25.noarch.orig/drivers/net/virtio_net.c	2008-05-20 14:24:01.000000000 +0100
@@ -19,6 +19,7 @@
 //#define DEBUG
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/ethtool.h>
 #include <linux/module.h>
 #include <linux/virtio.h>
 #include <linux/virtio_net.h>
@@ -346,6 +347,23 @@ static int virtnet_close(struct net_devi
 	return 0;
 }
 
+static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	struct virtio_device *vdev = vi->vdev;
+
+	if (data && !vdev->config->feature(vdev, VIRTIO_NET_F_CSUM))
+		return -ENOSYS;
+
+	return ethtool_op_set_tx_hw_csum(dev, data);
+}
+
+static struct ethtool_ops virtnet_ethtool_ops =
+{
+	.set_tx_csum = virtnet_set_tx_csum,
+	.set_sg = ethtool_op_set_sg,
+};
+
 static int virtnet_probe(struct virtio_device *vdev)
 {
 	int err;
@@ -365,6 +383,7 @@ static int virtnet_probe(struct virtio_d
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = virtnet_netpoll;
 #endif
+	SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
 	SET_NETDEV_DEV(dev, &vdev->dev);
 
 	/* Do we support "hardware" checksums? */

linux-2.6-virtio-net-allow-receiving-sg-packets.patch:

--- NEW FILE linux-2.6-virtio-net-allow-receiving-sg-packets.patch ---
>From a35558d6b6f35047821a7c86ff31961a70a12431 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert at gondor.apana.org.au>
Date: Fri, 18 Apr 2008 11:24:27 +0800
Subject: [PATCH 3/6] virtio_net: Allow receiving SG packets

Finally this patch lets virtio_net receive GSO packets in addition
to sending them.  This can definitely be optimised for the non-GSO
case.  For comparison the Xen approach stores one page in each skb
and uses subsequent skb's pages to construct an SG skb instead of
preallocating the maximum amount of pages per skb.

Signed-off-by: Rusty Russell <rusty at rustcorp.com.au>
Signed-off-by: Mark McLoughlin <markmc at redhat.com>
---
 drivers/net/virtio_net.c |   27 +++++++++++++++++++++++++--
 1 files changed, 25 insertions(+), 2 deletions(-)

Index: linux-2.6.25.noarch/drivers/net/virtio_net.c
===================================================================
--- linux-2.6.25.noarch.orig/drivers/net/virtio_net.c	2008-05-20 14:31:32.000000000 +0100
+++ linux-2.6.25.noarch.orig/drivers/net/virtio_net.c	2008-05-20 14:31:32.000000000 +0100
@@ -76,6 +76,7 @@ static void receive_skb(struct net_devic
 			unsigned len)
 {
 	struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
+	int err;
 
 	if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
 		pr_debug("%s: short packet %i\n", dev->name, len);
@@ -83,9 +84,15 @@ static void receive_skb(struct net_devic
 		goto drop;
 	}
 	len -= sizeof(struct virtio_net_hdr);
-	BUG_ON(len > MAX_PACKET_LEN);
 
-	skb_trim(skb, len);
+	err = pskb_trim(skb, len);
+	if (err) {
+		pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err);
+		dev->stats.rx_dropped++;
+		goto drop;
+	}
+	skb->truesize += skb->data_len;
+
 	skb->protocol = eth_type_trans(skb, dev);
 	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
 		 ntohs(skb->protocol), skb->len, skb->pkt_type);
@@ -147,6 +154,7 @@ static void try_fill_recv(struct virtnet
 	struct sk_buff *skb;
 	struct scatterlist sg[2+MAX_SKB_FRAGS];
 	int num, err;
+	int i;
 
 	sg_init_table(sg, 2+MAX_SKB_FRAGS);
 	for (;;) {
@@ -156,6 +164,21 @@ static void try_fill_recv(struct virtnet
 
 		skb_put(skb, MAX_PACKET_LEN);
 		vnet_hdr_to_sg(sg, skb);
+
+		for (i = 0; i < MAX_SKB_FRAGS; i++) {
+			skb_shinfo(skb)->frags[i].page = alloc_page(GFP_ATOMIC);
+			if (!skb_shinfo(skb)->frags[i].page)
+				break;
+
+			skb_shinfo(skb)->frags[i].page_offset = 0;
+			skb_shinfo(skb)->frags[i].size = PAGE_SIZE;
+
+			skb->data_len += PAGE_SIZE;
+			skb->len += PAGE_SIZE;
+
+			skb_shinfo(skb)->nr_frags++;
+		}
+
 		num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
 		skb_queue_head(&vi->recv, skb);
 


Index: kernel.spec
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/F-9/kernel.spec,v
retrieving revision 1.648
retrieving revision 1.648.2.1
diff -u -r1.648 -r1.648.2.1
--- kernel.spec	20 May 2008 08:34:58 -0000	1.648
+++ kernel.spec	20 May 2008 13:49:16 -0000	1.648.2.1
@@ -12,7 +12,7 @@
 # that the kernel isn't the stock distribution kernel, for example,
 # by setting the define to ".local" or ".bz123456"
 #
-#% define buildid .local
+%define buildid .virtio_gso
 
 # fedora_build defines which build revision of this kernel version we're
 # building. Rather than incrementing forever, as with the prior versioning
@@ -20,8 +20,9 @@
 # kernel spec when the kernel is rebased, so fedora_build automatically
 # works out to the offset from the rebase, so it doesn't get too ginormous.
 #
-%define fedora_cvs_origin 619
-%define fedora_build %(R="$Revision$"; R="${R%% \$}"; R="${R##: 1.}"; expr $R - %{fedora_cvs_origin})
+%define fedora_cvs_origin 29
+%define private_markmc_virtio_gso_branch 1.648.2.
+%define fedora_build %(R="$Revision$"; R="${R%% \$}"; echo "%{fedora_cvs_origin}.${R##: %{private_markmc_virtio_gso_branch}}")
 
 # base_sublevel is the kernel version we're starting with and patching
 # on top of -- for example, 2.6.22-rc7-git1 starts with a 2.6.21 base,
@@ -635,6 +636,13 @@
 
 Patch1600: linux-2.6-virtio_net-free-transmit-skbs-in-a-timer.patch
 
+Patch1601: linux-2.6-virtio-net-add-ethtool-ops-for-sg-gso.patch
+Patch1602: linux-2.6-virtio-fix-scatterlist-sizing-in-net-driver.patch
+Patch1603: linux-2.6-virtio-net-allow-receiving-sg-packets.patch
+Patch1604: linux-2.6-partial-checksum-and-gso-support-for-tun-tap.patch
+Patch1605: linux-2.6-interface-to-query-tun-tap-features.patch
+Patch1606: linux-2.6-tun-fix-gso-mapping.patch
+
 # nouveau + drm fixes
 Patch1801: linux-2.6-drm-git-mm.patch
 Patch1803: nouveau-drm.patch
@@ -1194,6 +1202,16 @@
 # virtio: dont hang on shutdown
 ApplyPatch linux-2.6-virtio_net-free-transmit-skbs-in-a-timer.patch
 
+# virtio: support GSO
+ApplyPatch linux-2.6-virtio-net-add-ethtool-ops-for-sg-gso.patch
+ApplyPatch linux-2.6-virtio-fix-scatterlist-sizing-in-net-driver.patch
+ApplyPatch linux-2.6-virtio-net-allow-receiving-sg-packets.patch
+
+# tun: support GSO
+ApplyPatch linux-2.6-partial-checksum-and-gso-support-for-tun-tap.patch
+ApplyPatch linux-2.6-interface-to-query-tun-tap-features.patch
+ApplyPatch linux-2.6-tun-fix-gso-mapping.patch
+
 ApplyPatch linux-2.6-e1000-ich9.patch
 
 ApplyPatch linux-2.6-sata-eeepc-faster.patch
@@ -1825,6 +1843,9 @@
 %kernel_variant_files -a /%{image_install_path}/xen*-%{KVERREL}.xen -e /etc/ld.so.conf.d/kernelcap-%{KVERREL}.xen.conf %{with_xen} xen
 
 %changelog
+* Tue May 20 2008 Mark McLoughlin <markmc at redhat.com> - 2.6.25.4-29.1.virtio_gso
+- virtio_net/tun: add GSO patches
+
 * Tue May 20 2008 Chuck Ebbert <cebbert at redhat.com> 2.6.25.4-29
 - virtio_net: free transmit skbs in a timer (#444765)
 




More information about the fedora-extras-commits mailing list