Doug / smarc-fsl-linux-kernel | Embedian Git Server

Commit c9d6369978411f690513994e6e53e2e6410874a4

Authored by David Vrabel 2011-09-29 23:53:31 +0800

Committed by Konrad Rzeszutek Wilk 2011-10-26 22:02:56 +0800

Exists in master and in 6 other branches

net: xen-netback: use API provided by xenbus module to map rings

The xenbus module provides xenbus_map_ring_valloc() and
xenbus_map_ring_vfree().  Use these to map the Tx and Rx ring pages
granted by the frontend.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

Showing 2 changed files with 22 additions and 69 deletions Inline Diff

drivers/net/xen-netback/common.h
drivers/net/xen-netback/netback.c

drivers/net/xen-netback/common.h

Diff comments View file @ c9d6369

 /*
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License version 2
  * as published by the Free Software Foundation; or, when distributed
  * separately from the Linux kernel or incorporated into other
  * software packages, subject to the following license:
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy, modify,
  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  * and to permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
 #ifndef __XEN_NETBACK__COMMON_H__
 #define __XEN_NETBACK__COMMON_H__
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/ip.h>
 #include <linux/in.h>
 #include <linux/io.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
 #include <xen/interface/io/netif.h>
 #include <xen/interface/grant_table.h>
 #include <xen/grant_table.h>
 #include <xen/xenbus.h>
 struct xen_netbk;
 struct xenvif {
 	/* Unique identifier for this interface. */
 	domid_t          domid;
 	unsigned int     handle;
 	/* Reference to netback processing backend. */
 	struct xen_netbk *netbk;
 	u8               fe_dev_addr[6];
 	/* Physical parameters of the comms window. */
-	grant_handle_t   tx_shmem_handle;
-	grant_ref_t      tx_shmem_ref;
-	grant_handle_t   rx_shmem_handle;
-	grant_ref_t      rx_shmem_ref;
 	unsigned int     irq;
 	/* List of frontends to notify after a batch of frames sent. */
 	struct list_head notify_list;
 	/* The shared rings and indexes. */
 	struct xen_netif_tx_back_ring tx;
 	struct xen_netif_rx_back_ring rx;
-	struct vm_struct *tx_comms_area;
-	struct vm_struct *rx_comms_area;
 	/* Frontend feature information. */
 	u8 can_sg:1;
 	u8 gso:1;
 	u8 gso_prefix:1;
 	u8 csum:1;
 	/* Internal feature information. */
 	u8 can_queue:1;	    /* can queue packets for receiver? */
 	/*
 	 * Allow xenvif_start_xmit() to peek ahead in the rx request
 	 * ring.  This is a prediction of what rx_req_cons will be
 	 * once all queued skbs are put on the ring.
 	 */
 	RING_IDX rx_req_cons_peek;
 	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
 	unsigned long   credit_bytes;
 	unsigned long   credit_usec;
 	unsigned long   remaining_credit;
 	struct timer_list credit_timeout;
 	/* Statistics */
 	unsigned long rx_gso_checksum_fixup;
 	/* Miscellaneous private stuff. */
 	struct list_head schedule_list;
 	atomic_t         refcnt;
 	struct net_device *dev;
 	wait_queue_head_t waiting_to_free;
 };
+static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif)
+{
+	return to_xenbus_device(vif->dev->dev.parent);
+}
 #define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
 #define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
 struct xenvif *xenvif_alloc(struct device *parent,
 			    domid_t domid,
 			    unsigned int handle);
 int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
 		   unsigned long rx_ring_ref, unsigned int evtchn);
 void xenvif_disconnect(struct xenvif *vif);
 void xenvif_get(struct xenvif *vif);
 void xenvif_put(struct xenvif *vif);
 int xenvif_xenbus_init(void);
 int xenvif_schedulable(struct xenvif *vif);
 int xen_netbk_rx_ring_full(struct xenvif *vif);
 int xen_netbk_must_stop_queue(struct xenvif *vif);
 /* (Un)Map communication rings. */
 void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
 int xen_netbk_map_frontend_rings(struct xenvif *vif,
 				 grant_ref_t tx_ring_ref,
 				 grant_ref_t rx_ring_ref);
 /* (De)Register a xenvif with the netback backend. */
 void xen_netbk_add_xenvif(struct xenvif *vif);
 void xen_netbk_remove_xenvif(struct xenvif *vif);
 /* (De)Schedule backend processing for a xenvif */
 void xen_netbk_schedule_xenvif(struct xenvif *vif);
 void xen_netbk_deschedule_xenvif(struct xenvif *vif);
 /* Check for SKBs from frontend and schedule backend processing */
 void xen_netbk_check_rx_xenvif(struct xenvif *vif);
 /* Receive an SKB from the frontend */
 void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb);
 /* Queue an SKB for transmission to the frontend */
 void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb);
 /* Notify xenvif that ring now has space to send an skb to the frontend */
 void xenvif_notify_tx_completion(struct xenvif *vif);

drivers/net/xen-netback/netback.c

Diff comments View file @ c9d6369

1	/*	1	/*
2	* Back-end of the driver for virtual network devices. This portion of the	2	* Back-end of the driver for virtual network devices. This portion of the
3	* driver exports a 'unified' network-device interface that can be accessed	3	* driver exports a 'unified' network-device interface that can be accessed
4	* by any operating system that implements a compatible front end. A	4	* by any operating system that implements a compatible front end. A
5	* reference front-end implementation can be found in:	5	* reference front-end implementation can be found in:
6	* drivers/net/xen-netfront.c	6	* drivers/net/xen-netfront.c
7	*	7	*
8	* Copyright (c) 2002-2005, K A Fraser	8	* Copyright (c) 2002-2005, K A Fraser
9	*	9	*
10	* This program is free software; you can redistribute it and/or	10	* This program is free software; you can redistribute it and/or
11	* modify it under the terms of the GNU General Public License version 2	11	* modify it under the terms of the GNU General Public License version 2
12	* as published by the Free Software Foundation; or, when distributed	12	* as published by the Free Software Foundation; or, when distributed
13	* separately from the Linux kernel or incorporated into other	13	* separately from the Linux kernel or incorporated into other
14	* software packages, subject to the following license:	14	* software packages, subject to the following license:
15	*	15	*
16	* Permission is hereby granted, free of charge, to any person obtaining a copy	16	* Permission is hereby granted, free of charge, to any person obtaining a copy
17	* of this source file (the "Software"), to deal in the Software without	17	* of this source file (the "Software"), to deal in the Software without
18	* restriction, including without limitation the rights to use, copy, modify,	18	* restriction, including without limitation the rights to use, copy, modify,
19	* merge, publish, distribute, sublicense, and/or sell copies of the Software,	19	* merge, publish, distribute, sublicense, and/or sell copies of the Software,
20	* and to permit persons to whom the Software is furnished to do so, subject to	20	* and to permit persons to whom the Software is furnished to do so, subject to
21	* the following conditions:	21	* the following conditions:
22	*	22	*
23	* The above copyright notice and this permission notice shall be included in	23	* The above copyright notice and this permission notice shall be included in
24	* all copies or substantial portions of the Software.	24	* all copies or substantial portions of the Software.
25	*	25	*
26	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR	26	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,	27	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE	28	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER	29	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING	30	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS	31	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32	* IN THE SOFTWARE.	32	* IN THE SOFTWARE.
33	*/	33	*/
34		34
35	#include "common.h"	35	#include "common.h"
36		36
37	#include <linux/kthread.h>	37	#include <linux/kthread.h>
38	#include <linux/if_vlan.h>	38	#include <linux/if_vlan.h>
39	#include <linux/udp.h>	39	#include <linux/udp.h>
40		40
41	#include <net/tcp.h>	41	#include <net/tcp.h>
42		42
43	#include <xen/events.h>	43	#include <xen/events.h>
44	#include <xen/interface/memory.h>	44	#include <xen/interface/memory.h>
45		45
46	#include <asm/xen/hypercall.h>	46	#include <asm/xen/hypercall.h>
47	#include <asm/xen/page.h>	47	#include <asm/xen/page.h>
48		48
49	struct pending_tx_info {	49	struct pending_tx_info {
50	struct xen_netif_tx_request req;	50	struct xen_netif_tx_request req;
51	struct xenvif *vif;	51	struct xenvif *vif;
52	};	52	};
53	typedef unsigned int pending_ring_idx_t;	53	typedef unsigned int pending_ring_idx_t;
54		54
55	struct netbk_rx_meta {	55	struct netbk_rx_meta {
56	int id;	56	int id;
57	int size;	57	int size;
58	int gso_size;	58	int gso_size;
59	};	59	};
60		60
61	#define MAX_PENDING_REQS 256	61	#define MAX_PENDING_REQS 256
62		62
63	#define MAX_BUFFER_OFFSET PAGE_SIZE	63	#define MAX_BUFFER_OFFSET PAGE_SIZE
64		64
65	/* extra field used in struct page */	65	/* extra field used in struct page */
66	union page_ext {	66	union page_ext {
67	struct {	67	struct {
68	#if BITS_PER_LONG < 64	68	#if BITS_PER_LONG < 64
69	#define IDX_WIDTH 8	69	#define IDX_WIDTH 8
70	#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)	70	#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
71	unsigned int group:GROUP_WIDTH;	71	unsigned int group:GROUP_WIDTH;
72	unsigned int idx:IDX_WIDTH;	72	unsigned int idx:IDX_WIDTH;
73	#else	73	#else
74	unsigned int group, idx;	74	unsigned int group, idx;
75	#endif	75	#endif
76	} e;	76	} e;
77	void *mapping;	77	void *mapping;
78	};	78	};
79		79
80	struct xen_netbk {	80	struct xen_netbk {
81	wait_queue_head_t wq;	81	wait_queue_head_t wq;
82	struct task_struct *task;	82	struct task_struct *task;
83		83
84	struct sk_buff_head rx_queue;	84	struct sk_buff_head rx_queue;
85	struct sk_buff_head tx_queue;	85	struct sk_buff_head tx_queue;
86		86
87	struct timer_list net_timer;	87	struct timer_list net_timer;
88		88
89	struct page *mmap_pages[MAX_PENDING_REQS];	89	struct page *mmap_pages[MAX_PENDING_REQS];
90		90
91	pending_ring_idx_t pending_prod;	91	pending_ring_idx_t pending_prod;
92	pending_ring_idx_t pending_cons;	92	pending_ring_idx_t pending_cons;
93	struct list_head net_schedule_list;	93	struct list_head net_schedule_list;
94		94
95	/* Protect the net_schedule_list in netif. */	95	/* Protect the net_schedule_list in netif. */
96	spinlock_t net_schedule_list_lock;	96	spinlock_t net_schedule_list_lock;
97		97
98	atomic_t netfront_count;	98	atomic_t netfront_count;
99		99
100	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];	100	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
101	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];	101	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
102		102
103	u16 pending_ring[MAX_PENDING_REQS];	103	u16 pending_ring[MAX_PENDING_REQS];
104		104
105	/*	105	/*
106	* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each	106	* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
107	* head/fragment page uses 2 copy operations because it	107	* head/fragment page uses 2 copy operations because it
108	* straddles two buffers in the frontend.	108	* straddles two buffers in the frontend.
109	*/	109	*/
110	struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];	110	struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
111	struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];	111	struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
112	};	112	};
113		113
114	static struct xen_netbk *xen_netbk;	114	static struct xen_netbk *xen_netbk;
115	static int xen_netbk_group_nr;	115	static int xen_netbk_group_nr;
116		116
117	void xen_netbk_add_xenvif(struct xenvif *vif)	117	void xen_netbk_add_xenvif(struct xenvif *vif)
118	{	118	{
119	int i;	119	int i;
120	int min_netfront_count;	120	int min_netfront_count;
121	int min_group = 0;	121	int min_group = 0;
122	struct xen_netbk *netbk;	122	struct xen_netbk *netbk;
123		123
124	min_netfront_count = atomic_read(&xen_netbk[0].netfront_count);	124	min_netfront_count = atomic_read(&xen_netbk[0].netfront_count);
125	for (i = 0; i < xen_netbk_group_nr; i++) {	125	for (i = 0; i < xen_netbk_group_nr; i++) {
126	int netfront_count = atomic_read(&xen_netbk[i].netfront_count);	126	int netfront_count = atomic_read(&xen_netbk[i].netfront_count);
127	if (netfront_count < min_netfront_count) {	127	if (netfront_count < min_netfront_count) {
128	min_group = i;	128	min_group = i;
129	min_netfront_count = netfront_count;	129	min_netfront_count = netfront_count;
130	}	130	}
131	}	131	}
132		132
133	netbk = &xen_netbk[min_group];	133	netbk = &xen_netbk[min_group];
134		134
135	vif->netbk = netbk;	135	vif->netbk = netbk;
136	atomic_inc(&netbk->netfront_count);	136	atomic_inc(&netbk->netfront_count);
137	}	137	}
138		138
139	void xen_netbk_remove_xenvif(struct xenvif *vif)	139	void xen_netbk_remove_xenvif(struct xenvif *vif)
140	{	140	{
141	struct xen_netbk *netbk = vif->netbk;	141	struct xen_netbk *netbk = vif->netbk;
142	vif->netbk = NULL;	142	vif->netbk = NULL;
143	atomic_dec(&netbk->netfront_count);	143	atomic_dec(&netbk->netfront_count);
144	}	144	}
145		145
146	static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);	146	static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
147	static void make_tx_response(struct xenvif *vif,	147	static void make_tx_response(struct xenvif *vif,
148	struct xen_netif_tx_request *txp,	148	struct xen_netif_tx_request *txp,
149	s8 st);	149	s8 st);
150	static struct xen_netif_rx_response make_rx_response(struct xenvif vif,	150	static struct xen_netif_rx_response make_rx_response(struct xenvif vif,
151	u16 id,	151	u16 id,
152	s8 st,	152	s8 st,
153	u16 offset,	153	u16 offset,
154	u16 size,	154	u16 size,
155	u16 flags);	155	u16 flags);
156		156
157	static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,	157	static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
158	unsigned int idx)	158	unsigned int idx)
159	{	159	{
160	return page_to_pfn(netbk->mmap_pages[idx]);	160	return page_to_pfn(netbk->mmap_pages[idx]);
161	}	161	}
162		162
163	static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,	163	static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
164	unsigned int idx)	164	unsigned int idx)
165	{	165	{
166	return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));	166	return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
167	}	167	}
168		168
169	/* extra field used in struct page */	169	/* extra field used in struct page */
170	static inline void set_page_ext(struct page pg, struct xen_netbk netbk,	170	static inline void set_page_ext(struct page pg, struct xen_netbk netbk,
171	unsigned int idx)	171	unsigned int idx)
172	{	172	{
173	unsigned int group = netbk - xen_netbk;	173	unsigned int group = netbk - xen_netbk;
174	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };	174	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
175		175
176	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));	176	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
177	pg->mapping = ext.mapping;	177	pg->mapping = ext.mapping;
178	}	178	}
179		179
180	static int get_page_ext(struct page *pg,	180	static int get_page_ext(struct page *pg,
181	unsigned int pgroup, unsigned int pidx)	181	unsigned int pgroup, unsigned int pidx)
182	{	182	{
183	union page_ext ext = { .mapping = pg->mapping };	183	union page_ext ext = { .mapping = pg->mapping };
184	struct xen_netbk *netbk;	184	struct xen_netbk *netbk;
185	unsigned int group, idx;	185	unsigned int group, idx;
186		186
187	group = ext.e.group - 1;	187	group = ext.e.group - 1;
188		188
189	if (group < 0 \|\| group >= xen_netbk_group_nr)	189	if (group < 0 \|\| group >= xen_netbk_group_nr)
190	return 0;	190	return 0;
191		191
192	netbk = &xen_netbk[group];	192	netbk = &xen_netbk[group];
193		193
194	idx = ext.e.idx;	194	idx = ext.e.idx;
195		195
196	if ((idx < 0) \|\| (idx >= MAX_PENDING_REQS))	196	if ((idx < 0) \|\| (idx >= MAX_PENDING_REQS))
197	return 0;	197	return 0;
198		198
199	if (netbk->mmap_pages[idx] != pg)	199	if (netbk->mmap_pages[idx] != pg)
200	return 0;	200	return 0;
201		201
202	*pgroup = group;	202	*pgroup = group;
203	*pidx = idx;	203	*pidx = idx;
204		204
205	return 1;	205	return 1;
206	}	206	}
207		207
208	/*	208	/*
209	* This is the amount of packet we copy rather than map, so that the	209	* This is the amount of packet we copy rather than map, so that the
210	* guest can't fiddle with the contents of the headers while we do	210	* guest can't fiddle with the contents of the headers while we do
211	* packet processing on them (netfilter, routing, etc).	211	* packet processing on them (netfilter, routing, etc).
212	*/	212	*/
213	#define PKT_PROT_LEN (ETH_HLEN + \	213	#define PKT_PROT_LEN (ETH_HLEN + \
214	VLAN_HLEN + \	214	VLAN_HLEN + \
215	sizeof(struct iphdr) + MAX_IPOPTLEN + \	215	sizeof(struct iphdr) + MAX_IPOPTLEN + \
216	sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)	216	sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
217		217
218	static inline pending_ring_idx_t pending_index(unsigned i)	218	static inline pending_ring_idx_t pending_index(unsigned i)
219	{	219	{
220	return i & (MAX_PENDING_REQS-1);	220	return i & (MAX_PENDING_REQS-1);
221	}	221	}
222		222
223	static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)	223	static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
224	{	224	{
225	return MAX_PENDING_REQS -	225	return MAX_PENDING_REQS -
226	netbk->pending_prod + netbk->pending_cons;	226	netbk->pending_prod + netbk->pending_cons;
227	}	227	}
228		228
229	static void xen_netbk_kick_thread(struct xen_netbk *netbk)	229	static void xen_netbk_kick_thread(struct xen_netbk *netbk)
230	{	230	{
231	wake_up(&netbk->wq);	231	wake_up(&netbk->wq);
232	}	232	}
233		233
234	static int max_required_rx_slots(struct xenvif *vif)	234	static int max_required_rx_slots(struct xenvif *vif)
235	{	235	{
236	int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);	236	int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
237		237
238	if (vif->can_sg \|\| vif->gso \|\| vif->gso_prefix)	238	if (vif->can_sg \|\| vif->gso \|\| vif->gso_prefix)
239	max += MAX_SKB_FRAGS + 1; /* extra_info + frags */	239	max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
240		240
241	return max;	241	return max;
242	}	242	}
243		243
244	int xen_netbk_rx_ring_full(struct xenvif *vif)	244	int xen_netbk_rx_ring_full(struct xenvif *vif)
245	{	245	{
246	RING_IDX peek = vif->rx_req_cons_peek;	246	RING_IDX peek = vif->rx_req_cons_peek;
247	RING_IDX needed = max_required_rx_slots(vif);	247	RING_IDX needed = max_required_rx_slots(vif);
248		248
249	return ((vif->rx.sring->req_prod - peek) < needed) \|\|	249	return ((vif->rx.sring->req_prod - peek) < needed) \|\|
250	((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed);	250	((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed);
251	}	251	}
252		252
253	int xen_netbk_must_stop_queue(struct xenvif *vif)	253	int xen_netbk_must_stop_queue(struct xenvif *vif)
254	{	254	{
255	if (!xen_netbk_rx_ring_full(vif))	255	if (!xen_netbk_rx_ring_full(vif))
256	return 0;	256	return 0;
257		257
258	vif->rx.sring->req_event = vif->rx_req_cons_peek +	258	vif->rx.sring->req_event = vif->rx_req_cons_peek +
259	max_required_rx_slots(vif);	259	max_required_rx_slots(vif);
260	mb(); /* request notification /then/ check the queue */	260	mb(); /* request notification /then/ check the queue */
261		261
262	return xen_netbk_rx_ring_full(vif);	262	return xen_netbk_rx_ring_full(vif);
263	}	263	}
264		264
265	/*	265	/*
266	* Returns true if we should start a new receive buffer instead of	266	* Returns true if we should start a new receive buffer instead of
267	* adding 'size' bytes to a buffer which currently contains 'offset'	267	* adding 'size' bytes to a buffer which currently contains 'offset'
268	* bytes.	268	* bytes.
269	*/	269	*/
270	static bool start_new_rx_buffer(int offset, unsigned long size, int head)	270	static bool start_new_rx_buffer(int offset, unsigned long size, int head)
271	{	271	{
272	/* simple case: we have completely filled the current buffer. */	272	/* simple case: we have completely filled the current buffer. */
273	if (offset == MAX_BUFFER_OFFSET)	273	if (offset == MAX_BUFFER_OFFSET)
274	return true;	274	return true;
275		275
276	/*	276	/*
277	* complex case: start a fresh buffer if the current frag	277	* complex case: start a fresh buffer if the current frag
278	* would overflow the current buffer but only if:	278	* would overflow the current buffer but only if:
279	* (i) this frag would fit completely in the next buffer	279	* (i) this frag would fit completely in the next buffer
280	* and (ii) there is already some data in the current buffer	280	* and (ii) there is already some data in the current buffer
281	* and (iii) this is not the head buffer.	281	* and (iii) this is not the head buffer.
282	*	282	*
283	* Where:	283	* Where:
284	* - (i) stops us splitting a frag into two copies	284	* - (i) stops us splitting a frag into two copies
285	* unless the frag is too large for a single buffer.	285	* unless the frag is too large for a single buffer.
286	* - (ii) stops us from leaving a buffer pointlessly empty.	286	* - (ii) stops us from leaving a buffer pointlessly empty.
287	* - (iii) stops us leaving the first buffer	287	* - (iii) stops us leaving the first buffer
288	* empty. Strictly speaking this is already covered	288	* empty. Strictly speaking this is already covered
289	* by (ii) but is explicitly checked because	289	* by (ii) but is explicitly checked because
290	* netfront relies on the first buffer being	290	* netfront relies on the first buffer being
291	* non-empty and can crash otherwise.	291	* non-empty and can crash otherwise.
292	*	292	*
293	* This means we will effectively linearise small	293	* This means we will effectively linearise small
294	* frags but do not needlessly split large buffers	294	* frags but do not needlessly split large buffers
295	* into multiple copies tend to give large frags their	295	* into multiple copies tend to give large frags their
296	* own buffers as before.	296	* own buffers as before.
297	*/	297	*/
298	if ((offset + size > MAX_BUFFER_OFFSET) &&	298	if ((offset + size > MAX_BUFFER_OFFSET) &&
299	(size <= MAX_BUFFER_OFFSET) && offset && !head)	299	(size <= MAX_BUFFER_OFFSET) && offset && !head)
300	return true;	300	return true;
301		301
302	return false;	302	return false;
303	}	303	}
304		304
305	/*	305	/*
306	* Figure out how many ring slots we're going to need to send @skb to	306	* Figure out how many ring slots we're going to need to send @skb to
307	* the guest. This function is essentially a dry run of	307	* the guest. This function is essentially a dry run of
308	* netbk_gop_frag_copy.	308	* netbk_gop_frag_copy.
309	*/	309	*/
310	unsigned int xen_netbk_count_skb_slots(struct xenvif vif, struct sk_buff skb)	310	unsigned int xen_netbk_count_skb_slots(struct xenvif vif, struct sk_buff skb)
311	{	311	{
312	unsigned int count;	312	unsigned int count;
313	int i, copy_off;	313	int i, copy_off;
314		314
315	count = DIV_ROUND_UP(	315	count = DIV_ROUND_UP(
316	offset_in_page(skb->data)+skb_headlen(skb), PAGE_SIZE);	316	offset_in_page(skb->data)+skb_headlen(skb), PAGE_SIZE);
317		317
318	copy_off = skb_headlen(skb) % PAGE_SIZE;	318	copy_off = skb_headlen(skb) % PAGE_SIZE;
319		319
320	if (skb_shinfo(skb)->gso_size)	320	if (skb_shinfo(skb)->gso_size)
321	count++;	321	count++;
322		322
323	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {	323	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
324	unsigned long size = skb_shinfo(skb)->frags[i].size;	324	unsigned long size = skb_shinfo(skb)->frags[i].size;
325	unsigned long bytes;	325	unsigned long bytes;
326	while (size > 0) {	326	while (size > 0) {
327	BUG_ON(copy_off > MAX_BUFFER_OFFSET);	327	BUG_ON(copy_off > MAX_BUFFER_OFFSET);
328		328
329	if (start_new_rx_buffer(copy_off, size, 0)) {	329	if (start_new_rx_buffer(copy_off, size, 0)) {
330	count++;	330	count++;
331	copy_off = 0;	331	copy_off = 0;
332	}	332	}
333		333
334	bytes = size;	334	bytes = size;
335	if (copy_off + bytes > MAX_BUFFER_OFFSET)	335	if (copy_off + bytes > MAX_BUFFER_OFFSET)
336	bytes = MAX_BUFFER_OFFSET - copy_off;	336	bytes = MAX_BUFFER_OFFSET - copy_off;
337		337
338	copy_off += bytes;	338	copy_off += bytes;
339	size -= bytes;	339	size -= bytes;
340	}	340	}
341	}	341	}
342	return count;	342	return count;
343	}	343	}
344		344
345	struct netrx_pending_operations {	345	struct netrx_pending_operations {
346	unsigned copy_prod, copy_cons;	346	unsigned copy_prod, copy_cons;
347	unsigned meta_prod, meta_cons;	347	unsigned meta_prod, meta_cons;
348	struct gnttab_copy *copy;	348	struct gnttab_copy *copy;
349	struct netbk_rx_meta *meta;	349	struct netbk_rx_meta *meta;
350	int copy_off;	350	int copy_off;
351	grant_ref_t copy_gref;	351	grant_ref_t copy_gref;
352	};	352	};
353		353
354	static struct netbk_rx_meta get_next_rx_buffer(struct xenvif vif,	354	static struct netbk_rx_meta get_next_rx_buffer(struct xenvif vif,
355	struct netrx_pending_operations *npo)	355	struct netrx_pending_operations *npo)
356	{	356	{
357	struct netbk_rx_meta *meta;	357	struct netbk_rx_meta *meta;
358	struct xen_netif_rx_request *req;	358	struct xen_netif_rx_request *req;
359		359
360	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);	360	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
361		361
362	meta = npo->meta + npo->meta_prod++;	362	meta = npo->meta + npo->meta_prod++;
363	meta->gso_size = 0;	363	meta->gso_size = 0;
364	meta->size = 0;	364	meta->size = 0;
365	meta->id = req->id;	365	meta->id = req->id;
366		366
367	npo->copy_off = 0;	367	npo->copy_off = 0;
368	npo->copy_gref = req->gref;	368	npo->copy_gref = req->gref;
369		369
370	return meta;	370	return meta;
371	}	371	}
372		372
373	/*	373	/*
374	* Set up the grant operations for this fragment. If it's a flipping	374	* Set up the grant operations for this fragment. If it's a flipping
375	* interface, we also set up the unmap request from here.	375	* interface, we also set up the unmap request from here.
376	*/	376	*/
377	static void netbk_gop_frag_copy(struct xenvif vif, struct sk_buff skb,	377	static void netbk_gop_frag_copy(struct xenvif vif, struct sk_buff skb,
378	struct netrx_pending_operations *npo,	378	struct netrx_pending_operations *npo,
379	struct page *page, unsigned long size,	379	struct page *page, unsigned long size,
380	unsigned long offset, int *head)	380	unsigned long offset, int *head)
381	{	381	{
382	struct gnttab_copy *copy_gop;	382	struct gnttab_copy *copy_gop;
383	struct netbk_rx_meta *meta;	383	struct netbk_rx_meta *meta;
384	/*	384	/*
385	* These variables a used iff get_page_ext returns true,	385	* These variables a used iff get_page_ext returns true,
386	* in which case they are guaranteed to be initialized.	386	* in which case they are guaranteed to be initialized.
387	*/	387	*/
388	unsigned int uninitialized_var(group), uninitialized_var(idx);	388	unsigned int uninitialized_var(group), uninitialized_var(idx);
389	int foreign = get_page_ext(page, &group, &idx);	389	int foreign = get_page_ext(page, &group, &idx);
390	unsigned long bytes;	390	unsigned long bytes;
391		391
392	/* Data must not cross a page boundary. */	392	/* Data must not cross a page boundary. */
393	BUG_ON(size + offset > PAGE_SIZE);	393	BUG_ON(size + offset > PAGE_SIZE);
394		394
395	meta = npo->meta + npo->meta_prod - 1;	395	meta = npo->meta + npo->meta_prod - 1;
396		396
397	while (size > 0) {	397	while (size > 0) {
398	BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);	398	BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
399		399
400	if (start_new_rx_buffer(npo->copy_off, size, *head)) {	400	if (start_new_rx_buffer(npo->copy_off, size, *head)) {
401	/*	401	/*
402	* Netfront requires there to be some data in the head	402	* Netfront requires there to be some data in the head
403	* buffer.	403	* buffer.
404	*/	404	*/
405	BUG_ON(*head);	405	BUG_ON(*head);
406		406
407	meta = get_next_rx_buffer(vif, npo);	407	meta = get_next_rx_buffer(vif, npo);
408	}	408	}
409		409
410	bytes = size;	410	bytes = size;
411	if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)	411	if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
412	bytes = MAX_BUFFER_OFFSET - npo->copy_off;	412	bytes = MAX_BUFFER_OFFSET - npo->copy_off;
413		413
414	copy_gop = npo->copy + npo->copy_prod++;	414	copy_gop = npo->copy + npo->copy_prod++;
415	copy_gop->flags = GNTCOPY_dest_gref;	415	copy_gop->flags = GNTCOPY_dest_gref;
416	if (foreign) {	416	if (foreign) {
417	struct xen_netbk *netbk = &xen_netbk[group];	417	struct xen_netbk *netbk = &xen_netbk[group];
418	struct pending_tx_info *src_pend;	418	struct pending_tx_info *src_pend;
419		419
420	src_pend = &netbk->pending_tx_info[idx];	420	src_pend = &netbk->pending_tx_info[idx];
421		421
422	copy_gop->source.domid = src_pend->vif->domid;	422	copy_gop->source.domid = src_pend->vif->domid;
423	copy_gop->source.u.ref = src_pend->req.gref;	423	copy_gop->source.u.ref = src_pend->req.gref;
424	copy_gop->flags \|= GNTCOPY_source_gref;	424	copy_gop->flags \|= GNTCOPY_source_gref;
425	} else {	425	} else {
426	void *vaddr = page_address(page);	426	void *vaddr = page_address(page);
427	copy_gop->source.domid = DOMID_SELF;	427	copy_gop->source.domid = DOMID_SELF;
428	copy_gop->source.u.gmfn = virt_to_mfn(vaddr);	428	copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
429	}	429	}
430	copy_gop->source.offset = offset;	430	copy_gop->source.offset = offset;
431	copy_gop->dest.domid = vif->domid;	431	copy_gop->dest.domid = vif->domid;
432		432
433	copy_gop->dest.offset = npo->copy_off;	433	copy_gop->dest.offset = npo->copy_off;
434	copy_gop->dest.u.ref = npo->copy_gref;	434	copy_gop->dest.u.ref = npo->copy_gref;
435	copy_gop->len = bytes;	435	copy_gop->len = bytes;
436		436
437	npo->copy_off += bytes;	437	npo->copy_off += bytes;
438	meta->size += bytes;	438	meta->size += bytes;
439		439
440	offset += bytes;	440	offset += bytes;
441	size -= bytes;	441	size -= bytes;
442		442
443	/* Leave a gap for the GSO descriptor. */	443	/* Leave a gap for the GSO descriptor. */
444	if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix)	444	if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix)
445	vif->rx.req_cons++;	445	vif->rx.req_cons++;
446		446
447	head = 0; / There must be something in this buffer now. */	447	head = 0; / There must be something in this buffer now. */
448		448
449	}	449	}
450	}	450	}
451		451
452	/*	452	/*
453	* Prepare an SKB to be transmitted to the frontend.	453	* Prepare an SKB to be transmitted to the frontend.
454	*	454	*
455	* This function is responsible for allocating grant operations, meta	455	* This function is responsible for allocating grant operations, meta
456	* structures, etc.	456	* structures, etc.
457	*	457	*
458	* It returns the number of meta structures consumed. The number of	458	* It returns the number of meta structures consumed. The number of
459	* ring slots used is always equal to the number of meta slots used	459	* ring slots used is always equal to the number of meta slots used
460	* plus the number of GSO descriptors used. Currently, we use either	460	* plus the number of GSO descriptors used. Currently, we use either
461	* zero GSO descriptors (for non-GSO packets) or one descriptor (for	461	* zero GSO descriptors (for non-GSO packets) or one descriptor (for
462	* frontend-side LRO).	462	* frontend-side LRO).
463	*/	463	*/
464	static int netbk_gop_skb(struct sk_buff *skb,	464	static int netbk_gop_skb(struct sk_buff *skb,
465	struct netrx_pending_operations *npo)	465	struct netrx_pending_operations *npo)
466	{	466	{
467	struct xenvif *vif = netdev_priv(skb->dev);	467	struct xenvif *vif = netdev_priv(skb->dev);
468	int nr_frags = skb_shinfo(skb)->nr_frags;	468	int nr_frags = skb_shinfo(skb)->nr_frags;
469	int i;	469	int i;
470	struct xen_netif_rx_request *req;	470	struct xen_netif_rx_request *req;
471	struct netbk_rx_meta *meta;	471	struct netbk_rx_meta *meta;
472	unsigned char *data;	472	unsigned char *data;
473	int head = 1;	473	int head = 1;
474	int old_meta_prod;	474	int old_meta_prod;
475		475
476	old_meta_prod = npo->meta_prod;	476	old_meta_prod = npo->meta_prod;
477		477
478	/* Set up a GSO prefix descriptor, if necessary */	478	/* Set up a GSO prefix descriptor, if necessary */
479	if (skb_shinfo(skb)->gso_size && vif->gso_prefix) {	479	if (skb_shinfo(skb)->gso_size && vif->gso_prefix) {
480	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);	480	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
481	meta = npo->meta + npo->meta_prod++;	481	meta = npo->meta + npo->meta_prod++;
482	meta->gso_size = skb_shinfo(skb)->gso_size;	482	meta->gso_size = skb_shinfo(skb)->gso_size;
483	meta->size = 0;	483	meta->size = 0;
484	meta->id = req->id;	484	meta->id = req->id;
485	}	485	}
486		486
487	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);	487	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
488	meta = npo->meta + npo->meta_prod++;	488	meta = npo->meta + npo->meta_prod++;
489		489
490	if (!vif->gso_prefix)	490	if (!vif->gso_prefix)
491	meta->gso_size = skb_shinfo(skb)->gso_size;	491	meta->gso_size = skb_shinfo(skb)->gso_size;
492	else	492	else
493	meta->gso_size = 0;	493	meta->gso_size = 0;
494		494
495	meta->size = 0;	495	meta->size = 0;
496	meta->id = req->id;	496	meta->id = req->id;
497	npo->copy_off = 0;	497	npo->copy_off = 0;
498	npo->copy_gref = req->gref;	498	npo->copy_gref = req->gref;
499		499
500	data = skb->data;	500	data = skb->data;
501	while (data < skb_tail_pointer(skb)) {	501	while (data < skb_tail_pointer(skb)) {
502	unsigned int offset = offset_in_page(data);	502	unsigned int offset = offset_in_page(data);
503	unsigned int len = PAGE_SIZE - offset;	503	unsigned int len = PAGE_SIZE - offset;
504		504
505	if (data + len > skb_tail_pointer(skb))	505	if (data + len > skb_tail_pointer(skb))
506	len = skb_tail_pointer(skb) - data;	506	len = skb_tail_pointer(skb) - data;
507		507
508	netbk_gop_frag_copy(vif, skb, npo,	508	netbk_gop_frag_copy(vif, skb, npo,
509	virt_to_page(data), len, offset, &head);	509	virt_to_page(data), len, offset, &head);
510	data += len;	510	data += len;
511	}	511	}
512		512
513	for (i = 0; i < nr_frags; i++) {	513	for (i = 0; i < nr_frags; i++) {
514	netbk_gop_frag_copy(vif, skb, npo,	514	netbk_gop_frag_copy(vif, skb, npo,
515	skb_shinfo(skb)->frags[i].page,	515	skb_shinfo(skb)->frags[i].page,
516	skb_shinfo(skb)->frags[i].size,	516	skb_shinfo(skb)->frags[i].size,
517	skb_shinfo(skb)->frags[i].page_offset,	517	skb_shinfo(skb)->frags[i].page_offset,
518	&head);	518	&head);
519	}	519	}
520		520
521	return npo->meta_prod - old_meta_prod;	521	return npo->meta_prod - old_meta_prod;
522	}	522	}
523		523
524	/*	524	/*
525	* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was	525	* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
526	* used to set up the operations on the top of	526	* used to set up the operations on the top of
527	* netrx_pending_operations, which have since been done. Check that	527	* netrx_pending_operations, which have since been done. Check that
528	* they didn't give any errors and advance over them.	528	* they didn't give any errors and advance over them.
529	*/	529	*/
530	static int netbk_check_gop(struct xenvif *vif, int nr_meta_slots,	530	static int netbk_check_gop(struct xenvif *vif, int nr_meta_slots,
531	struct netrx_pending_operations *npo)	531	struct netrx_pending_operations *npo)
532	{	532	{
533	struct gnttab_copy *copy_op;	533	struct gnttab_copy *copy_op;
534	int status = XEN_NETIF_RSP_OKAY;	534	int status = XEN_NETIF_RSP_OKAY;
535	int i;	535	int i;
536		536
537	for (i = 0; i < nr_meta_slots; i++) {	537	for (i = 0; i < nr_meta_slots; i++) {
538	copy_op = npo->copy + npo->copy_cons++;	538	copy_op = npo->copy + npo->copy_cons++;
539	if (copy_op->status != GNTST_okay) {	539	if (copy_op->status != GNTST_okay) {
540	netdev_dbg(vif->dev,	540	netdev_dbg(vif->dev,
541	"Bad status %d from copy to DOM%d.\n",	541	"Bad status %d from copy to DOM%d.\n",
542	copy_op->status, vif->domid);	542	copy_op->status, vif->domid);
543	status = XEN_NETIF_RSP_ERROR;	543	status = XEN_NETIF_RSP_ERROR;
544	}	544	}
545	}	545	}
546		546
547	return status;	547	return status;
548	}	548	}
549		549
550	static void netbk_add_frag_responses(struct xenvif *vif, int status,	550	static void netbk_add_frag_responses(struct xenvif *vif, int status,
551	struct netbk_rx_meta *meta,	551	struct netbk_rx_meta *meta,
552	int nr_meta_slots)	552	int nr_meta_slots)
553	{	553	{
554	int i;	554	int i;
555	unsigned long offset;	555	unsigned long offset;
556		556
557	/* No fragments used */	557	/* No fragments used */
558	if (nr_meta_slots <= 1)	558	if (nr_meta_slots <= 1)
559	return;	559	return;
560		560
561	nr_meta_slots--;	561	nr_meta_slots--;
562		562
563	for (i = 0; i < nr_meta_slots; i++) {	563	for (i = 0; i < nr_meta_slots; i++) {
564	int flags;	564	int flags;
565	if (i == nr_meta_slots - 1)	565	if (i == nr_meta_slots - 1)
566	flags = 0;	566	flags = 0;
567	else	567	else
568	flags = XEN_NETRXF_more_data;	568	flags = XEN_NETRXF_more_data;
569		569
570	offset = 0;	570	offset = 0;
571	make_rx_response(vif, meta[i].id, status, offset,	571	make_rx_response(vif, meta[i].id, status, offset,
572	meta[i].size, flags);	572	meta[i].size, flags);
573	}	573	}
574	}	574	}
575		575
576	struct skb_cb_overlay {	576	struct skb_cb_overlay {
577	int meta_slots_used;	577	int meta_slots_used;
578	};	578	};
579		579
580	static void xen_netbk_rx_action(struct xen_netbk *netbk)	580	static void xen_netbk_rx_action(struct xen_netbk *netbk)
581	{	581	{
582	struct xenvif vif = NULL, tmp;	582	struct xenvif vif = NULL, tmp;
583	s8 status;	583	s8 status;
584	u16 irq, flags;	584	u16 irq, flags;
585	struct xen_netif_rx_response *resp;	585	struct xen_netif_rx_response *resp;
586	struct sk_buff_head rxq;	586	struct sk_buff_head rxq;
587	struct sk_buff *skb;	587	struct sk_buff *skb;
588	LIST_HEAD(notify);	588	LIST_HEAD(notify);
589	int ret;	589	int ret;
590	int nr_frags;	590	int nr_frags;
591	int count;	591	int count;
592	unsigned long offset;	592	unsigned long offset;
593	struct skb_cb_overlay *sco;	593	struct skb_cb_overlay *sco;
594		594
595	struct netrx_pending_operations npo = {	595	struct netrx_pending_operations npo = {
596	.copy = netbk->grant_copy_op,	596	.copy = netbk->grant_copy_op,
597	.meta = netbk->meta,	597	.meta = netbk->meta,
598	};	598	};
599		599
600	skb_queue_head_init(&rxq);	600	skb_queue_head_init(&rxq);
601		601
602	count = 0;	602	count = 0;
603		603
604	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {	604	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
605	vif = netdev_priv(skb->dev);	605	vif = netdev_priv(skb->dev);
606	nr_frags = skb_shinfo(skb)->nr_frags;	606	nr_frags = skb_shinfo(skb)->nr_frags;
607		607
608	sco = (struct skb_cb_overlay *)skb->cb;	608	sco = (struct skb_cb_overlay *)skb->cb;
609	sco->meta_slots_used = netbk_gop_skb(skb, &npo);	609	sco->meta_slots_used = netbk_gop_skb(skb, &npo);
610		610
611	count += nr_frags + 1;	611	count += nr_frags + 1;
612		612
613	__skb_queue_tail(&rxq, skb);	613	__skb_queue_tail(&rxq, skb);
614		614
615	/* Filled the batch queue? */	615	/* Filled the batch queue? */
616	if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)	616	if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
617	break;	617	break;
618	}	618	}
619		619
620	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));	620	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
621		621
622	if (!npo.copy_prod)	622	if (!npo.copy_prod)
623	return;	623	return;
624		624
625	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));	625	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
626	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,	626	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
627	npo.copy_prod);	627	npo.copy_prod);
628	BUG_ON(ret != 0);	628	BUG_ON(ret != 0);
629		629
630	while ((skb = __skb_dequeue(&rxq)) != NULL) {	630	while ((skb = __skb_dequeue(&rxq)) != NULL) {
631	sco = (struct skb_cb_overlay *)skb->cb;	631	sco = (struct skb_cb_overlay *)skb->cb;
632		632
633	vif = netdev_priv(skb->dev);	633	vif = netdev_priv(skb->dev);
634		634
635	if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) {	635	if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
636	resp = RING_GET_RESPONSE(&vif->rx,	636	resp = RING_GET_RESPONSE(&vif->rx,
637	vif->rx.rsp_prod_pvt++);	637	vif->rx.rsp_prod_pvt++);
638		638
639	resp->flags = XEN_NETRXF_gso_prefix \| XEN_NETRXF_more_data;	639	resp->flags = XEN_NETRXF_gso_prefix \| XEN_NETRXF_more_data;
640		640
641	resp->offset = netbk->meta[npo.meta_cons].gso_size;	641	resp->offset = netbk->meta[npo.meta_cons].gso_size;
642	resp->id = netbk->meta[npo.meta_cons].id;	642	resp->id = netbk->meta[npo.meta_cons].id;
643	resp->status = sco->meta_slots_used;	643	resp->status = sco->meta_slots_used;
644		644
645	npo.meta_cons++;	645	npo.meta_cons++;
646	sco->meta_slots_used--;	646	sco->meta_slots_used--;
647	}	647	}
648		648
649		649
650	vif->dev->stats.tx_bytes += skb->len;	650	vif->dev->stats.tx_bytes += skb->len;
651	vif->dev->stats.tx_packets++;	651	vif->dev->stats.tx_packets++;
652		652
653	status = netbk_check_gop(vif, sco->meta_slots_used, &npo);	653	status = netbk_check_gop(vif, sco->meta_slots_used, &npo);
654		654
655	if (sco->meta_slots_used == 1)	655	if (sco->meta_slots_used == 1)
656	flags = 0;	656	flags = 0;
657	else	657	else
658	flags = XEN_NETRXF_more_data;	658	flags = XEN_NETRXF_more_data;
659		659
660	if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */	660	if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
661	flags \|= XEN_NETRXF_csum_blank \| XEN_NETRXF_data_validated;	661	flags \|= XEN_NETRXF_csum_blank \| XEN_NETRXF_data_validated;
662	else if (skb->ip_summed == CHECKSUM_UNNECESSARY)	662	else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
663	/* remote but checksummed. */	663	/* remote but checksummed. */
664	flags \|= XEN_NETRXF_data_validated;	664	flags \|= XEN_NETRXF_data_validated;
665		665
666	offset = 0;	666	offset = 0;
667	resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id,	667	resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id,
668	status, offset,	668	status, offset,
669	netbk->meta[npo.meta_cons].size,	669	netbk->meta[npo.meta_cons].size,
670	flags);	670	flags);
671		671
672	if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {	672	if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
673	struct xen_netif_extra_info *gso =	673	struct xen_netif_extra_info *gso =
674	(struct xen_netif_extra_info *)	674	(struct xen_netif_extra_info *)
675	RING_GET_RESPONSE(&vif->rx,	675	RING_GET_RESPONSE(&vif->rx,
676	vif->rx.rsp_prod_pvt++);	676	vif->rx.rsp_prod_pvt++);
677		677
678	resp->flags \|= XEN_NETRXF_extra_info;	678	resp->flags \|= XEN_NETRXF_extra_info;
679		679
680	gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;	680	gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
681	gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;	681	gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
682	gso->u.gso.pad = 0;	682	gso->u.gso.pad = 0;
683	gso->u.gso.features = 0;	683	gso->u.gso.features = 0;
684		684
685	gso->type = XEN_NETIF_EXTRA_TYPE_GSO;	685	gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
686	gso->flags = 0;	686	gso->flags = 0;
687	}	687	}
688		688
689	netbk_add_frag_responses(vif, status,	689	netbk_add_frag_responses(vif, status,
690	netbk->meta + npo.meta_cons + 1,	690	netbk->meta + npo.meta_cons + 1,
691	sco->meta_slots_used);	691	sco->meta_slots_used);
692		692
693	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);	693	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
694	irq = vif->irq;	694	irq = vif->irq;
695	if (ret && list_empty(&vif->notify_list))	695	if (ret && list_empty(&vif->notify_list))
696	list_add_tail(&vif->notify_list, &notify);	696	list_add_tail(&vif->notify_list, &notify);
697		697
698	xenvif_notify_tx_completion(vif);	698	xenvif_notify_tx_completion(vif);
699		699
700	xenvif_put(vif);	700	xenvif_put(vif);
701	npo.meta_cons += sco->meta_slots_used;	701	npo.meta_cons += sco->meta_slots_used;
702	dev_kfree_skb(skb);	702	dev_kfree_skb(skb);
703	}	703	}
704		704
705	list_for_each_entry_safe(vif, tmp, &notify, notify_list) {	705	list_for_each_entry_safe(vif, tmp, &notify, notify_list) {
706	notify_remote_via_irq(vif->irq);	706	notify_remote_via_irq(vif->irq);
707	list_del_init(&vif->notify_list);	707	list_del_init(&vif->notify_list);
708	}	708	}
709		709
710	/* More work to do? */	710	/* More work to do? */
711	if (!skb_queue_empty(&netbk->rx_queue) &&	711	if (!skb_queue_empty(&netbk->rx_queue) &&
712	!timer_pending(&netbk->net_timer))	712	!timer_pending(&netbk->net_timer))
713	xen_netbk_kick_thread(netbk);	713	xen_netbk_kick_thread(netbk);
714	}	714	}
715		715
716	void xen_netbk_queue_tx_skb(struct xenvif vif, struct sk_buff skb)	716	void xen_netbk_queue_tx_skb(struct xenvif vif, struct sk_buff skb)
717	{	717	{
718	struct xen_netbk *netbk = vif->netbk;	718	struct xen_netbk *netbk = vif->netbk;
719		719
720	skb_queue_tail(&netbk->rx_queue, skb);	720	skb_queue_tail(&netbk->rx_queue, skb);
721		721
722	xen_netbk_kick_thread(netbk);	722	xen_netbk_kick_thread(netbk);
723	}	723	}
724		724
725	static void xen_netbk_alarm(unsigned long data)	725	static void xen_netbk_alarm(unsigned long data)
726	{	726	{
727	struct xen_netbk netbk = (struct xen_netbk )data;	727	struct xen_netbk netbk = (struct xen_netbk )data;
728	xen_netbk_kick_thread(netbk);	728	xen_netbk_kick_thread(netbk);
729	}	729	}
730		730
731	static int __on_net_schedule_list(struct xenvif *vif)	731	static int __on_net_schedule_list(struct xenvif *vif)
732	{	732	{
733	return !list_empty(&vif->schedule_list);	733	return !list_empty(&vif->schedule_list);
734	}	734	}
735		735
736	/* Must be called with net_schedule_list_lock held */	736	/* Must be called with net_schedule_list_lock held */
737	static void remove_from_net_schedule_list(struct xenvif *vif)	737	static void remove_from_net_schedule_list(struct xenvif *vif)
738	{	738	{
739	if (likely(__on_net_schedule_list(vif))) {	739	if (likely(__on_net_schedule_list(vif))) {
740	list_del_init(&vif->schedule_list);	740	list_del_init(&vif->schedule_list);
741	xenvif_put(vif);	741	xenvif_put(vif);
742	}	742	}
743	}	743	}
744		744
745	static struct xenvif poll_net_schedule_list(struct xen_netbk netbk)	745	static struct xenvif poll_net_schedule_list(struct xen_netbk netbk)
746	{	746	{
747	struct xenvif *vif = NULL;	747	struct xenvif *vif = NULL;
748		748
749	spin_lock_irq(&netbk->net_schedule_list_lock);	749	spin_lock_irq(&netbk->net_schedule_list_lock);
750	if (list_empty(&netbk->net_schedule_list))	750	if (list_empty(&netbk->net_schedule_list))
751	goto out;	751	goto out;
752		752
753	vif = list_first_entry(&netbk->net_schedule_list,	753	vif = list_first_entry(&netbk->net_schedule_list,
754	struct xenvif, schedule_list);	754	struct xenvif, schedule_list);
755	if (!vif)	755	if (!vif)
756	goto out;	756	goto out;
757		757
758	xenvif_get(vif);	758	xenvif_get(vif);
759		759
760	remove_from_net_schedule_list(vif);	760	remove_from_net_schedule_list(vif);
761	out:	761	out:
762	spin_unlock_irq(&netbk->net_schedule_list_lock);	762	spin_unlock_irq(&netbk->net_schedule_list_lock);
763	return vif;	763	return vif;
764	}	764	}
765		765
766	void xen_netbk_schedule_xenvif(struct xenvif *vif)	766	void xen_netbk_schedule_xenvif(struct xenvif *vif)
767	{	767	{
768	unsigned long flags;	768	unsigned long flags;
769	struct xen_netbk *netbk = vif->netbk;	769	struct xen_netbk *netbk = vif->netbk;
770		770
771	if (__on_net_schedule_list(vif))	771	if (__on_net_schedule_list(vif))
772	goto kick;	772	goto kick;
773		773
774	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);	774	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
775	if (!__on_net_schedule_list(vif) &&	775	if (!__on_net_schedule_list(vif) &&
776	likely(xenvif_schedulable(vif))) {	776	likely(xenvif_schedulable(vif))) {
777	list_add_tail(&vif->schedule_list, &netbk->net_schedule_list);	777	list_add_tail(&vif->schedule_list, &netbk->net_schedule_list);
778	xenvif_get(vif);	778	xenvif_get(vif);
779	}	779	}
780	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);	780	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
781		781
782	kick:	782	kick:
783	smp_mb();	783	smp_mb();
784	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&	784	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
785	!list_empty(&netbk->net_schedule_list))	785	!list_empty(&netbk->net_schedule_list))
786	xen_netbk_kick_thread(netbk);	786	xen_netbk_kick_thread(netbk);
787	}	787	}
788		788
789	void xen_netbk_deschedule_xenvif(struct xenvif *vif)	789	void xen_netbk_deschedule_xenvif(struct xenvif *vif)
790	{	790	{
791	struct xen_netbk *netbk = vif->netbk;	791	struct xen_netbk *netbk = vif->netbk;
792	spin_lock_irq(&netbk->net_schedule_list_lock);	792	spin_lock_irq(&netbk->net_schedule_list_lock);
793	remove_from_net_schedule_list(vif);	793	remove_from_net_schedule_list(vif);
794	spin_unlock_irq(&netbk->net_schedule_list_lock);	794	spin_unlock_irq(&netbk->net_schedule_list_lock);
795	}	795	}
796		796
797	void xen_netbk_check_rx_xenvif(struct xenvif *vif)	797	void xen_netbk_check_rx_xenvif(struct xenvif *vif)
798	{	798	{
799	int more_to_do;	799	int more_to_do;
800		800
801	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);	801	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
802		802
803	if (more_to_do)	803	if (more_to_do)
804	xen_netbk_schedule_xenvif(vif);	804	xen_netbk_schedule_xenvif(vif);
805	}	805	}
806		806
807	static void tx_add_credit(struct xenvif *vif)	807	static void tx_add_credit(struct xenvif *vif)
808	{	808	{
809	unsigned long max_burst, max_credit;	809	unsigned long max_burst, max_credit;
810		810
811	/*	811	/*
812	* Allow a burst big enough to transmit a jumbo packet of up to 128kB.	812	* Allow a burst big enough to transmit a jumbo packet of up to 128kB.
813	* Otherwise the interface can seize up due to insufficient credit.	813	* Otherwise the interface can seize up due to insufficient credit.
814	*/	814	*/
815	max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size;	815	max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size;
816	max_burst = min(max_burst, 131072UL);	816	max_burst = min(max_burst, 131072UL);
817	max_burst = max(max_burst, vif->credit_bytes);	817	max_burst = max(max_burst, vif->credit_bytes);
818		818
819	/* Take care that adding a new chunk of credit doesn't wrap to zero. */	819	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
820	max_credit = vif->remaining_credit + vif->credit_bytes;	820	max_credit = vif->remaining_credit + vif->credit_bytes;
821	if (max_credit < vif->remaining_credit)	821	if (max_credit < vif->remaining_credit)
822	max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */	822	max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
823		823
824	vif->remaining_credit = min(max_credit, max_burst);	824	vif->remaining_credit = min(max_credit, max_burst);
825	}	825	}
826		826
827	static void tx_credit_callback(unsigned long data)	827	static void tx_credit_callback(unsigned long data)
828	{	828	{
829	struct xenvif vif = (struct xenvif )data;	829	struct xenvif vif = (struct xenvif )data;
830	tx_add_credit(vif);	830	tx_add_credit(vif);
831	xen_netbk_check_rx_xenvif(vif);	831	xen_netbk_check_rx_xenvif(vif);
832	}	832	}
833		833
834	static void netbk_tx_err(struct xenvif *vif,	834	static void netbk_tx_err(struct xenvif *vif,
835	struct xen_netif_tx_request *txp, RING_IDX end)	835	struct xen_netif_tx_request *txp, RING_IDX end)
836	{	836	{
837	RING_IDX cons = vif->tx.req_cons;	837	RING_IDX cons = vif->tx.req_cons;
838		838
839	do {	839	do {
840	make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);	840	make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
841	if (cons >= end)	841	if (cons >= end)
842	break;	842	break;
843	txp = RING_GET_REQUEST(&vif->tx, cons++);	843	txp = RING_GET_REQUEST(&vif->tx, cons++);
844	} while (1);	844	} while (1);
845	vif->tx.req_cons = cons;	845	vif->tx.req_cons = cons;
846	xen_netbk_check_rx_xenvif(vif);	846	xen_netbk_check_rx_xenvif(vif);
847	xenvif_put(vif);	847	xenvif_put(vif);
848	}	848	}
849		849
850	static int netbk_count_requests(struct xenvif *vif,	850	static int netbk_count_requests(struct xenvif *vif,
851	struct xen_netif_tx_request *first,	851	struct xen_netif_tx_request *first,
852	struct xen_netif_tx_request *txp,	852	struct xen_netif_tx_request *txp,
853	int work_to_do)	853	int work_to_do)
854	{	854	{
855	RING_IDX cons = vif->tx.req_cons;	855	RING_IDX cons = vif->tx.req_cons;
856	int frags = 0;	856	int frags = 0;
857		857
858	if (!(first->flags & XEN_NETTXF_more_data))	858	if (!(first->flags & XEN_NETTXF_more_data))
859	return 0;	859	return 0;
860		860
861	do {	861	do {
862	if (frags >= work_to_do) {	862	if (frags >= work_to_do) {
863	netdev_dbg(vif->dev, "Need more frags\n");	863	netdev_dbg(vif->dev, "Need more frags\n");
864	return -frags;	864	return -frags;
865	}	865	}
866		866
867	if (unlikely(frags >= MAX_SKB_FRAGS)) {	867	if (unlikely(frags >= MAX_SKB_FRAGS)) {
868	netdev_dbg(vif->dev, "Too many frags\n");	868	netdev_dbg(vif->dev, "Too many frags\n");
869	return -frags;	869	return -frags;
870	}	870	}
871		871
872	memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags),	872	memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags),
873	sizeof(*txp));	873	sizeof(*txp));
874	if (txp->size > first->size) {	874	if (txp->size > first->size) {
875	netdev_dbg(vif->dev, "Frags galore\n");	875	netdev_dbg(vif->dev, "Frags galore\n");
876	return -frags;	876	return -frags;
877	}	877	}
878		878
879	first->size -= txp->size;	879	first->size -= txp->size;
880	frags++;	880	frags++;
881		881
882	if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {	882	if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
883	netdev_dbg(vif->dev, "txp->offset: %x, size: %u\n",	883	netdev_dbg(vif->dev, "txp->offset: %x, size: %u\n",
884	txp->offset, txp->size);	884	txp->offset, txp->size);
885	return -frags;	885	return -frags;
886	}	886	}
887	} while ((txp++)->flags & XEN_NETTXF_more_data);	887	} while ((txp++)->flags & XEN_NETTXF_more_data);
888	return frags;	888	return frags;
889	}	889	}
890		890
891	static struct page xen_netbk_alloc_page(struct xen_netbk netbk,	891	static struct page xen_netbk_alloc_page(struct xen_netbk netbk,
892	struct sk_buff *skb,	892	struct sk_buff *skb,
893	unsigned long pending_idx)	893	unsigned long pending_idx)
894	{	894	{
895	struct page *page;	895	struct page *page;
896	page = alloc_page(GFP_KERNEL\|__GFP_COLD);	896	page = alloc_page(GFP_KERNEL\|__GFP_COLD);
897	if (!page)	897	if (!page)
898	return NULL;	898	return NULL;
899	set_page_ext(page, netbk, pending_idx);	899	set_page_ext(page, netbk, pending_idx);
900	netbk->mmap_pages[pending_idx] = page;	900	netbk->mmap_pages[pending_idx] = page;
901	return page;	901	return page;
902	}	902	}
903		903
904	static struct gnttab_copy xen_netbk_get_requests(struct xen_netbk netbk,	904	static struct gnttab_copy xen_netbk_get_requests(struct xen_netbk netbk,
905	struct xenvif *vif,	905	struct xenvif *vif,
906	struct sk_buff *skb,	906	struct sk_buff *skb,
907	struct xen_netif_tx_request *txp,	907	struct xen_netif_tx_request *txp,
908	struct gnttab_copy *gop)	908	struct gnttab_copy *gop)
909	{	909	{
910	struct skb_shared_info *shinfo = skb_shinfo(skb);	910	struct skb_shared_info *shinfo = skb_shinfo(skb);
911	skb_frag_t *frags = shinfo->frags;	911	skb_frag_t *frags = shinfo->frags;
912	unsigned long pending_idx = ((u16 )skb->data);	912	unsigned long pending_idx = ((u16 )skb->data);
913	int i, start;	913	int i, start;
914		914
915	/* Skip first skb fragment if it is on same page as header fragment. */	915	/* Skip first skb fragment if it is on same page as header fragment. */
916	start = ((unsigned long)shinfo->frags[0].page == pending_idx);	916	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
917		917
918	for (i = start; i < shinfo->nr_frags; i++, txp++) {	918	for (i = start; i < shinfo->nr_frags; i++, txp++) {
919	struct page *page;	919	struct page *page;
920	pending_ring_idx_t index;	920	pending_ring_idx_t index;
921	struct pending_tx_info *pending_tx_info =	921	struct pending_tx_info *pending_tx_info =
922	netbk->pending_tx_info;	922	netbk->pending_tx_info;
923		923
924	index = pending_index(netbk->pending_cons++);	924	index = pending_index(netbk->pending_cons++);
925	pending_idx = netbk->pending_ring[index];	925	pending_idx = netbk->pending_ring[index];
926	page = xen_netbk_alloc_page(netbk, skb, pending_idx);	926	page = xen_netbk_alloc_page(netbk, skb, pending_idx);
927	if (!page)	927	if (!page)
928	return NULL;	928	return NULL;
929		929
930	netbk->mmap_pages[pending_idx] = page;	930	netbk->mmap_pages[pending_idx] = page;
931		931
932	gop->source.u.ref = txp->gref;	932	gop->source.u.ref = txp->gref;
933	gop->source.domid = vif->domid;	933	gop->source.domid = vif->domid;
934	gop->source.offset = txp->offset;	934	gop->source.offset = txp->offset;
935		935
936	gop->dest.u.gmfn = virt_to_mfn(page_address(page));	936	gop->dest.u.gmfn = virt_to_mfn(page_address(page));
937	gop->dest.domid = DOMID_SELF;	937	gop->dest.domid = DOMID_SELF;
938	gop->dest.offset = txp->offset;	938	gop->dest.offset = txp->offset;
939		939
940	gop->len = txp->size;	940	gop->len = txp->size;
941	gop->flags = GNTCOPY_source_gref;	941	gop->flags = GNTCOPY_source_gref;
942		942
943	gop++;	943	gop++;
944		944
945	memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));	945	memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
946	xenvif_get(vif);	946	xenvif_get(vif);
947	pending_tx_info[pending_idx].vif = vif;	947	pending_tx_info[pending_idx].vif = vif;
948	frags[i].page = (void *)pending_idx;	948	frags[i].page = (void *)pending_idx;
949	}	949	}
950		950
951	return gop;	951	return gop;
952	}	952	}
953		953
954	static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,	954	static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
955	struct sk_buff *skb,	955	struct sk_buff *skb,
956	struct gnttab_copy **gopp)	956	struct gnttab_copy **gopp)
957	{	957	{
958	struct gnttab_copy gop = gopp;	958	struct gnttab_copy gop = gopp;
959	int pending_idx = ((u16 )skb->data);	959	int pending_idx = ((u16 )skb->data);
960	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;	960	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
961	struct xenvif *vif = pending_tx_info[pending_idx].vif;	961	struct xenvif *vif = pending_tx_info[pending_idx].vif;
962	struct xen_netif_tx_request *txp;	962	struct xen_netif_tx_request *txp;
963	struct skb_shared_info *shinfo = skb_shinfo(skb);	963	struct skb_shared_info *shinfo = skb_shinfo(skb);
964	int nr_frags = shinfo->nr_frags;	964	int nr_frags = shinfo->nr_frags;
965	int i, err, start;	965	int i, err, start;
966		966
967	/* Check status of header. */	967	/* Check status of header. */
968	err = gop->status;	968	err = gop->status;
969	if (unlikely(err)) {	969	if (unlikely(err)) {
970	pending_ring_idx_t index;	970	pending_ring_idx_t index;
971	index = pending_index(netbk->pending_prod++);	971	index = pending_index(netbk->pending_prod++);
972	txp = &pending_tx_info[pending_idx].req;	972	txp = &pending_tx_info[pending_idx].req;
973	make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);	973	make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
974	netbk->pending_ring[index] = pending_idx;	974	netbk->pending_ring[index] = pending_idx;
975	xenvif_put(vif);	975	xenvif_put(vif);
976	}	976	}
977		977
978	/* Skip first skb fragment if it is on same page as header fragment. */	978	/* Skip first skb fragment if it is on same page as header fragment. */
979	start = ((unsigned long)shinfo->frags[0].page == pending_idx);	979	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
980		980
981	for (i = start; i < nr_frags; i++) {	981	for (i = start; i < nr_frags; i++) {
982	int j, newerr;	982	int j, newerr;
983	pending_ring_idx_t index;	983	pending_ring_idx_t index;
984		984
985	pending_idx = (unsigned long)shinfo->frags[i].page;	985	pending_idx = (unsigned long)shinfo->frags[i].page;
986		986
987	/* Check error status: if okay then remember grant handle. */	987	/* Check error status: if okay then remember grant handle. */
988	newerr = (++gop)->status;	988	newerr = (++gop)->status;
989	if (likely(!newerr)) {	989	if (likely(!newerr)) {
990	/* Had a previous error? Invalidate this fragment. */	990	/* Had a previous error? Invalidate this fragment. */
991	if (unlikely(err))	991	if (unlikely(err))
992	xen_netbk_idx_release(netbk, pending_idx);	992	xen_netbk_idx_release(netbk, pending_idx);
993	continue;	993	continue;
994	}	994	}
995		995
996	/* Error on this fragment: respond to client with an error. */	996	/* Error on this fragment: respond to client with an error. */
997	txp = &netbk->pending_tx_info[pending_idx].req;	997	txp = &netbk->pending_tx_info[pending_idx].req;
998	make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);	998	make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
999	index = pending_index(netbk->pending_prod++);	999	index = pending_index(netbk->pending_prod++);
1000	netbk->pending_ring[index] = pending_idx;	1000	netbk->pending_ring[index] = pending_idx;
1001	xenvif_put(vif);	1001	xenvif_put(vif);
1002		1002
1003	/* Not the first error? Preceding frags already invalidated. */	1003	/* Not the first error? Preceding frags already invalidated. */
1004	if (err)	1004	if (err)
1005	continue;	1005	continue;
1006		1006
1007	/* First error: invalidate header and preceding fragments. */	1007	/* First error: invalidate header and preceding fragments. */
1008	pending_idx = ((u16 )skb->data);	1008	pending_idx = ((u16 )skb->data);
1009	xen_netbk_idx_release(netbk, pending_idx);	1009	xen_netbk_idx_release(netbk, pending_idx);
1010	for (j = start; j < i; j++) {	1010	for (j = start; j < i; j++) {
1011	pending_idx = (unsigned long)shinfo->frags[i].page;	1011	pending_idx = (unsigned long)shinfo->frags[i].page;
1012	xen_netbk_idx_release(netbk, pending_idx);	1012	xen_netbk_idx_release(netbk, pending_idx);
1013	}	1013	}
1014		1014
1015	/* Remember the error: invalidate all subsequent fragments. */	1015	/* Remember the error: invalidate all subsequent fragments. */
1016	err = newerr;	1016	err = newerr;
1017	}	1017	}
1018		1018
1019	*gopp = gop + 1;	1019	*gopp = gop + 1;
1020	return err;	1020	return err;
1021	}	1021	}
1022		1022
1023	static void xen_netbk_fill_frags(struct xen_netbk netbk, struct sk_buff skb)	1023	static void xen_netbk_fill_frags(struct xen_netbk netbk, struct sk_buff skb)
1024	{	1024	{
1025	struct skb_shared_info *shinfo = skb_shinfo(skb);	1025	struct skb_shared_info *shinfo = skb_shinfo(skb);
1026	int nr_frags = shinfo->nr_frags;	1026	int nr_frags = shinfo->nr_frags;
1027	int i;	1027	int i;
1028		1028
1029	for (i = 0; i < nr_frags; i++) {	1029	for (i = 0; i < nr_frags; i++) {
1030	skb_frag_t *frag = shinfo->frags + i;	1030	skb_frag_t *frag = shinfo->frags + i;
1031	struct xen_netif_tx_request *txp;	1031	struct xen_netif_tx_request *txp;
1032	unsigned long pending_idx;	1032	unsigned long pending_idx;
1033		1033
1034	pending_idx = (unsigned long)frag->page;	1034	pending_idx = (unsigned long)frag->page;
1035		1035
1036	txp = &netbk->pending_tx_info[pending_idx].req;	1036	txp = &netbk->pending_tx_info[pending_idx].req;
1037	frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));	1037	frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
1038	frag->size = txp->size;	1038	frag->size = txp->size;
1039	frag->page_offset = txp->offset;	1039	frag->page_offset = txp->offset;
1040		1040
1041	skb->len += txp->size;	1041	skb->len += txp->size;
1042	skb->data_len += txp->size;	1042	skb->data_len += txp->size;
1043	skb->truesize += txp->size;	1043	skb->truesize += txp->size;
1044		1044
1045	/* Take an extra reference to offset xen_netbk_idx_release */	1045	/* Take an extra reference to offset xen_netbk_idx_release */
1046	get_page(netbk->mmap_pages[pending_idx]);	1046	get_page(netbk->mmap_pages[pending_idx]);
1047	xen_netbk_idx_release(netbk, pending_idx);	1047	xen_netbk_idx_release(netbk, pending_idx);
1048	}	1048	}
1049	}	1049	}
1050		1050
1051	static int xen_netbk_get_extras(struct xenvif *vif,	1051	static int xen_netbk_get_extras(struct xenvif *vif,
1052	struct xen_netif_extra_info *extras,	1052	struct xen_netif_extra_info *extras,
1053	int work_to_do)	1053	int work_to_do)
1054	{	1054	{
1055	struct xen_netif_extra_info extra;	1055	struct xen_netif_extra_info extra;
1056	RING_IDX cons = vif->tx.req_cons;	1056	RING_IDX cons = vif->tx.req_cons;
1057		1057
1058	do {	1058	do {
1059	if (unlikely(work_to_do-- <= 0)) {	1059	if (unlikely(work_to_do-- <= 0)) {
1060	netdev_dbg(vif->dev, "Missing extra info\n");	1060	netdev_dbg(vif->dev, "Missing extra info\n");
1061	return -EBADR;	1061	return -EBADR;
1062	}	1062	}
1063		1063
1064	memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons),	1064	memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons),
1065	sizeof(extra));	1065	sizeof(extra));
1066	if (unlikely(!extra.type \|\|	1066	if (unlikely(!extra.type \|\|
1067	extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {	1067	extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1068	vif->tx.req_cons = ++cons;	1068	vif->tx.req_cons = ++cons;
1069	netdev_dbg(vif->dev,	1069	netdev_dbg(vif->dev,
1070	"Invalid extra type: %d\n", extra.type);	1070	"Invalid extra type: %d\n", extra.type);
1071	return -EINVAL;	1071	return -EINVAL;
1072	}	1072	}
1073		1073
1074	memcpy(&extras[extra.type - 1], &extra, sizeof(extra));	1074	memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
1075	vif->tx.req_cons = ++cons;	1075	vif->tx.req_cons = ++cons;
1076	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);	1076	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
1077		1077
1078	return work_to_do;	1078	return work_to_do;
1079	}	1079	}
1080		1080
1081	static int netbk_set_skb_gso(struct xenvif *vif,	1081	static int netbk_set_skb_gso(struct xenvif *vif,
1082	struct sk_buff *skb,	1082	struct sk_buff *skb,
1083	struct xen_netif_extra_info *gso)	1083	struct xen_netif_extra_info *gso)
1084	{	1084	{
1085	if (!gso->u.gso.size) {	1085	if (!gso->u.gso.size) {
1086	netdev_dbg(vif->dev, "GSO size must not be zero.\n");	1086	netdev_dbg(vif->dev, "GSO size must not be zero.\n");
1087	return -EINVAL;	1087	return -EINVAL;
1088	}	1088	}
1089		1089
1090	/* Currently only TCPv4 S.O. is supported. */	1090	/* Currently only TCPv4 S.O. is supported. */
1091	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {	1091	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
1092	netdev_dbg(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);	1092	netdev_dbg(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
1093	return -EINVAL;	1093	return -EINVAL;
1094	}	1094	}
1095		1095
1096	skb_shinfo(skb)->gso_size = gso->u.gso.size;	1096	skb_shinfo(skb)->gso_size = gso->u.gso.size;
1097	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;	1097	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1098		1098
1099	/* Header must be checked, and gso_segs computed. */	1099	/* Header must be checked, and gso_segs computed. */
1100	skb_shinfo(skb)->gso_type \|= SKB_GSO_DODGY;	1100	skb_shinfo(skb)->gso_type \|= SKB_GSO_DODGY;
1101	skb_shinfo(skb)->gso_segs = 0;	1101	skb_shinfo(skb)->gso_segs = 0;
1102		1102
1103	return 0;	1103	return 0;
1104	}	1104	}
1105		1105
1106	static int checksum_setup(struct xenvif vif, struct sk_buff skb)	1106	static int checksum_setup(struct xenvif vif, struct sk_buff skb)
1107	{	1107	{
1108	struct iphdr *iph;	1108	struct iphdr *iph;
1109	unsigned char *th;	1109	unsigned char *th;
1110	int err = -EPROTO;	1110	int err = -EPROTO;
1111	int recalculate_partial_csum = 0;	1111	int recalculate_partial_csum = 0;
1112		1112
1113	/*	1113	/*
1114	* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy	1114	* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1115	* peers can fail to set NETRXF_csum_blank when sending a GSO	1115	* peers can fail to set NETRXF_csum_blank when sending a GSO
1116	* frame. In this case force the SKB to CHECKSUM_PARTIAL and	1116	* frame. In this case force the SKB to CHECKSUM_PARTIAL and
1117	* recalculate the partial checksum.	1117	* recalculate the partial checksum.
1118	*/	1118	*/
1119	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {	1119	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1120	vif->rx_gso_checksum_fixup++;	1120	vif->rx_gso_checksum_fixup++;
1121	skb->ip_summed = CHECKSUM_PARTIAL;	1121	skb->ip_summed = CHECKSUM_PARTIAL;
1122	recalculate_partial_csum = 1;	1122	recalculate_partial_csum = 1;
1123	}	1123	}
1124		1124
1125	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */	1125	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1126	if (skb->ip_summed != CHECKSUM_PARTIAL)	1126	if (skb->ip_summed != CHECKSUM_PARTIAL)
1127	return 0;	1127	return 0;
1128		1128
1129	if (skb->protocol != htons(ETH_P_IP))	1129	if (skb->protocol != htons(ETH_P_IP))
1130	goto out;	1130	goto out;
1131		1131
1132	iph = (void *)skb->data;	1132	iph = (void *)skb->data;
1133	th = skb->data + 4 * iph->ihl;	1133	th = skb->data + 4 * iph->ihl;
1134	if (th >= skb_tail_pointer(skb))	1134	if (th >= skb_tail_pointer(skb))
1135	goto out;	1135	goto out;
1136		1136
1137	skb->csum_start = th - skb->head;	1137	skb->csum_start = th - skb->head;
1138	switch (iph->protocol) {	1138	switch (iph->protocol) {
1139	case IPPROTO_TCP:	1139	case IPPROTO_TCP:
1140	skb->csum_offset = offsetof(struct tcphdr, check);	1140	skb->csum_offset = offsetof(struct tcphdr, check);
1141		1141
1142	if (recalculate_partial_csum) {	1142	if (recalculate_partial_csum) {
1143	struct tcphdr tcph = (struct tcphdr )th;	1143	struct tcphdr tcph = (struct tcphdr )th;
1144	tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,	1144	tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1145	skb->len - iph->ihl*4,	1145	skb->len - iph->ihl*4,
1146	IPPROTO_TCP, 0);	1146	IPPROTO_TCP, 0);
1147	}	1147	}
1148	break;	1148	break;
1149	case IPPROTO_UDP:	1149	case IPPROTO_UDP:
1150	skb->csum_offset = offsetof(struct udphdr, check);	1150	skb->csum_offset = offsetof(struct udphdr, check);
1151		1151
1152	if (recalculate_partial_csum) {	1152	if (recalculate_partial_csum) {
1153	struct udphdr udph = (struct udphdr )th;	1153	struct udphdr udph = (struct udphdr )th;
1154	udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,	1154	udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1155	skb->len - iph->ihl*4,	1155	skb->len - iph->ihl*4,
1156	IPPROTO_UDP, 0);	1156	IPPROTO_UDP, 0);
1157	}	1157	}
1158	break;	1158	break;
1159	default:	1159	default:
1160	if (net_ratelimit())	1160	if (net_ratelimit())
1161	netdev_err(vif->dev,	1161	netdev_err(vif->dev,
1162	"Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",	1162	"Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
1163	iph->protocol);	1163	iph->protocol);
1164	goto out;	1164	goto out;
1165	}	1165	}
1166		1166
1167	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))	1167	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
1168	goto out;	1168	goto out;
1169		1169
1170	err = 0;	1170	err = 0;
1171		1171
1172	out:	1172	out:
1173	return err;	1173	return err;
1174	}	1174	}
1175		1175
1176	static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)	1176	static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
1177	{	1177	{
1178	unsigned long now = jiffies;	1178	unsigned long now = jiffies;
1179	unsigned long next_credit =	1179	unsigned long next_credit =
1180	vif->credit_timeout.expires +	1180	vif->credit_timeout.expires +
1181	msecs_to_jiffies(vif->credit_usec / 1000);	1181	msecs_to_jiffies(vif->credit_usec / 1000);
1182		1182
1183	/* Timer could already be pending in rare cases. */	1183	/* Timer could already be pending in rare cases. */
1184	if (timer_pending(&vif->credit_timeout))	1184	if (timer_pending(&vif->credit_timeout))
1185	return true;	1185	return true;
1186		1186
1187	/* Passed the point where we can replenish credit? */	1187	/* Passed the point where we can replenish credit? */
1188	if (time_after_eq(now, next_credit)) {	1188	if (time_after_eq(now, next_credit)) {
1189	vif->credit_timeout.expires = now;	1189	vif->credit_timeout.expires = now;
1190	tx_add_credit(vif);	1190	tx_add_credit(vif);
1191	}	1191	}
1192		1192
1193	/* Still too big to send right now? Set a callback. */	1193	/* Still too big to send right now? Set a callback. */
1194	if (size > vif->remaining_credit) {	1194	if (size > vif->remaining_credit) {
1195	vif->credit_timeout.data =	1195	vif->credit_timeout.data =
1196	(unsigned long)vif;	1196	(unsigned long)vif;
1197	vif->credit_timeout.function =	1197	vif->credit_timeout.function =
1198	tx_credit_callback;	1198	tx_credit_callback;
1199	mod_timer(&vif->credit_timeout,	1199	mod_timer(&vif->credit_timeout,
1200	next_credit);	1200	next_credit);
1201		1201
1202	return true;	1202	return true;
1203	}	1203	}
1204		1204
1205	return false;	1205	return false;
1206	}	1206	}
1207		1207
1208	static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)	1208	static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1209	{	1209	{
1210	struct gnttab_copy gop = netbk->tx_copy_ops, request_gop;	1210	struct gnttab_copy gop = netbk->tx_copy_ops, request_gop;
1211	struct sk_buff *skb;	1211	struct sk_buff *skb;
1212	int ret;	1212	int ret;
1213		1213
1214	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&	1214	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
1215	!list_empty(&netbk->net_schedule_list)) {	1215	!list_empty(&netbk->net_schedule_list)) {
1216	struct xenvif *vif;	1216	struct xenvif *vif;
1217	struct xen_netif_tx_request txreq;	1217	struct xen_netif_tx_request txreq;
1218	struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];	1218	struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
1219	struct page *page;	1219	struct page *page;
1220	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];	1220	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
1221	u16 pending_idx;	1221	u16 pending_idx;
1222	RING_IDX idx;	1222	RING_IDX idx;
1223	int work_to_do;	1223	int work_to_do;
1224	unsigned int data_len;	1224	unsigned int data_len;
1225	pending_ring_idx_t index;	1225	pending_ring_idx_t index;
1226		1226
1227	/* Get a netif from the list with work to do. */	1227	/* Get a netif from the list with work to do. */
1228	vif = poll_net_schedule_list(netbk);	1228	vif = poll_net_schedule_list(netbk);
1229	if (!vif)	1229	if (!vif)
1230	continue;	1230	continue;
1231		1231
1232	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do);	1232	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do);
1233	if (!work_to_do) {	1233	if (!work_to_do) {
1234	xenvif_put(vif);	1234	xenvif_put(vif);
1235	continue;	1235	continue;
1236	}	1236	}
1237		1237
1238	idx = vif->tx.req_cons;	1238	idx = vif->tx.req_cons;
1239	rmb(); /* Ensure that we see the request before we copy it. */	1239	rmb(); /* Ensure that we see the request before we copy it. */
1240	memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq));	1240	memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq));
1241		1241
1242	/* Credit-based scheduling. */	1242	/* Credit-based scheduling. */
1243	if (txreq.size > vif->remaining_credit &&	1243	if (txreq.size > vif->remaining_credit &&
1244	tx_credit_exceeded(vif, txreq.size)) {	1244	tx_credit_exceeded(vif, txreq.size)) {
1245	xenvif_put(vif);	1245	xenvif_put(vif);
1246	continue;	1246	continue;
1247	}	1247	}
1248		1248
1249	vif->remaining_credit -= txreq.size;	1249	vif->remaining_credit -= txreq.size;
1250		1250
1251	work_to_do--;	1251	work_to_do--;
1252	vif->tx.req_cons = ++idx;	1252	vif->tx.req_cons = ++idx;
1253		1253
1254	memset(extras, 0, sizeof(extras));	1254	memset(extras, 0, sizeof(extras));
1255	if (txreq.flags & XEN_NETTXF_extra_info) {	1255	if (txreq.flags & XEN_NETTXF_extra_info) {
1256	work_to_do = xen_netbk_get_extras(vif, extras,	1256	work_to_do = xen_netbk_get_extras(vif, extras,
1257	work_to_do);	1257	work_to_do);
1258	idx = vif->tx.req_cons;	1258	idx = vif->tx.req_cons;
1259	if (unlikely(work_to_do < 0)) {	1259	if (unlikely(work_to_do < 0)) {
1260	netbk_tx_err(vif, &txreq, idx);	1260	netbk_tx_err(vif, &txreq, idx);
1261	continue;	1261	continue;
1262	}	1262	}
1263	}	1263	}
1264		1264
1265	ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do);	1265	ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do);
1266	if (unlikely(ret < 0)) {	1266	if (unlikely(ret < 0)) {
1267	netbk_tx_err(vif, &txreq, idx - ret);	1267	netbk_tx_err(vif, &txreq, idx - ret);
1268	continue;	1268	continue;
1269	}	1269	}
1270	idx += ret;	1270	idx += ret;
1271		1271
1272	if (unlikely(txreq.size < ETH_HLEN)) {	1272	if (unlikely(txreq.size < ETH_HLEN)) {
1273	netdev_dbg(vif->dev,	1273	netdev_dbg(vif->dev,
1274	"Bad packet size: %d\n", txreq.size);	1274	"Bad packet size: %d\n", txreq.size);
1275	netbk_tx_err(vif, &txreq, idx);	1275	netbk_tx_err(vif, &txreq, idx);
1276	continue;	1276	continue;
1277	}	1277	}
1278		1278
1279	/* No crossing a page as the payload mustn't fragment. */	1279	/* No crossing a page as the payload mustn't fragment. */
1280	if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {	1280	if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
1281	netdev_dbg(vif->dev,	1281	netdev_dbg(vif->dev,
1282	"txreq.offset: %x, size: %u, end: %lu\n",	1282	"txreq.offset: %x, size: %u, end: %lu\n",
1283	txreq.offset, txreq.size,	1283	txreq.offset, txreq.size,
1284	(txreq.offset&~PAGE_MASK) + txreq.size);	1284	(txreq.offset&~PAGE_MASK) + txreq.size);
1285	netbk_tx_err(vif, &txreq, idx);	1285	netbk_tx_err(vif, &txreq, idx);
1286	continue;	1286	continue;
1287	}	1287	}
1288		1288
1289	index = pending_index(netbk->pending_cons);	1289	index = pending_index(netbk->pending_cons);
1290	pending_idx = netbk->pending_ring[index];	1290	pending_idx = netbk->pending_ring[index];
1291		1291
1292	data_len = (txreq.size > PKT_PROT_LEN &&	1292	data_len = (txreq.size > PKT_PROT_LEN &&
1293	ret < MAX_SKB_FRAGS) ?	1293	ret < MAX_SKB_FRAGS) ?
1294	PKT_PROT_LEN : txreq.size;	1294	PKT_PROT_LEN : txreq.size;
1295		1295
1296	skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,	1296	skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
1297	GFP_ATOMIC \| __GFP_NOWARN);	1297	GFP_ATOMIC \| __GFP_NOWARN);
1298	if (unlikely(skb == NULL)) {	1298	if (unlikely(skb == NULL)) {
1299	netdev_dbg(vif->dev,	1299	netdev_dbg(vif->dev,
1300	"Can't allocate a skb in start_xmit.\n");	1300	"Can't allocate a skb in start_xmit.\n");
1301	netbk_tx_err(vif, &txreq, idx);	1301	netbk_tx_err(vif, &txreq, idx);
1302	break;	1302	break;
1303	}	1303	}
1304		1304
1305	/* Packets passed to netif_rx() must have some headroom. */	1305	/* Packets passed to netif_rx() must have some headroom. */
1306	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);	1306	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
1307		1307
1308	if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {	1308	if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1309	struct xen_netif_extra_info *gso;	1309	struct xen_netif_extra_info *gso;
1310	gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];	1310	gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1311		1311
1312	if (netbk_set_skb_gso(vif, skb, gso)) {	1312	if (netbk_set_skb_gso(vif, skb, gso)) {
1313	kfree_skb(skb);	1313	kfree_skb(skb);
1314	netbk_tx_err(vif, &txreq, idx);	1314	netbk_tx_err(vif, &txreq, idx);
1315	continue;	1315	continue;
1316	}	1316	}
1317	}	1317	}
1318		1318
1319	/* XXX could copy straight to head */	1319	/* XXX could copy straight to head */
1320	page = xen_netbk_alloc_page(netbk, skb, pending_idx);	1320	page = xen_netbk_alloc_page(netbk, skb, pending_idx);
1321	if (!page) {	1321	if (!page) {
1322	kfree_skb(skb);	1322	kfree_skb(skb);
1323	netbk_tx_err(vif, &txreq, idx);	1323	netbk_tx_err(vif, &txreq, idx);
1324	continue;	1324	continue;
1325	}	1325	}
1326		1326
1327	netbk->mmap_pages[pending_idx] = page;	1327	netbk->mmap_pages[pending_idx] = page;
1328		1328
1329	gop->source.u.ref = txreq.gref;	1329	gop->source.u.ref = txreq.gref;
1330	gop->source.domid = vif->domid;	1330	gop->source.domid = vif->domid;
1331	gop->source.offset = txreq.offset;	1331	gop->source.offset = txreq.offset;
1332		1332
1333	gop->dest.u.gmfn = virt_to_mfn(page_address(page));	1333	gop->dest.u.gmfn = virt_to_mfn(page_address(page));
1334	gop->dest.domid = DOMID_SELF;	1334	gop->dest.domid = DOMID_SELF;
1335	gop->dest.offset = txreq.offset;	1335	gop->dest.offset = txreq.offset;
1336		1336
1337	gop->len = txreq.size;	1337	gop->len = txreq.size;
1338	gop->flags = GNTCOPY_source_gref;	1338	gop->flags = GNTCOPY_source_gref;
1339		1339
1340	gop++;	1340	gop++;
1341		1341
1342	memcpy(&netbk->pending_tx_info[pending_idx].req,	1342	memcpy(&netbk->pending_tx_info[pending_idx].req,
1343	&txreq, sizeof(txreq));	1343	&txreq, sizeof(txreq));
1344	netbk->pending_tx_info[pending_idx].vif = vif;	1344	netbk->pending_tx_info[pending_idx].vif = vif;
1345	((u16 )skb->data) = pending_idx;	1345	((u16 )skb->data) = pending_idx;
1346		1346
1347	__skb_put(skb, data_len);	1347	__skb_put(skb, data_len);
1348		1348
1349	skb_shinfo(skb)->nr_frags = ret;	1349	skb_shinfo(skb)->nr_frags = ret;
1350	if (data_len < txreq.size) {	1350	if (data_len < txreq.size) {
1351	skb_shinfo(skb)->nr_frags++;	1351	skb_shinfo(skb)->nr_frags++;
1352	skb_shinfo(skb)->frags[0].page =	1352	skb_shinfo(skb)->frags[0].page =
1353	(void *)(unsigned long)pending_idx;	1353	(void *)(unsigned long)pending_idx;
1354	} else {	1354	} else {
1355	/* Discriminate from any valid pending_idx value. */	1355	/* Discriminate from any valid pending_idx value. */
1356	skb_shinfo(skb)->frags[0].page = (void *)~0UL;	1356	skb_shinfo(skb)->frags[0].page = (void *)~0UL;
1357	}	1357	}
1358		1358
1359	__skb_queue_tail(&netbk->tx_queue, skb);	1359	__skb_queue_tail(&netbk->tx_queue, skb);
1360		1360
1361	netbk->pending_cons++;	1361	netbk->pending_cons++;
1362		1362
1363	request_gop = xen_netbk_get_requests(netbk, vif,	1363	request_gop = xen_netbk_get_requests(netbk, vif,
1364	skb, txfrags, gop);	1364	skb, txfrags, gop);
1365	if (request_gop == NULL) {	1365	if (request_gop == NULL) {
1366	kfree_skb(skb);	1366	kfree_skb(skb);
1367	netbk_tx_err(vif, &txreq, idx);	1367	netbk_tx_err(vif, &txreq, idx);
1368	continue;	1368	continue;
1369	}	1369	}
1370	gop = request_gop;	1370	gop = request_gop;
1371		1371
1372	vif->tx.req_cons = idx;	1372	vif->tx.req_cons = idx;
1373	xen_netbk_check_rx_xenvif(vif);	1373	xen_netbk_check_rx_xenvif(vif);
1374		1374
1375	if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))	1375	if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
1376	break;	1376	break;
1377	}	1377	}
1378		1378
1379	return gop - netbk->tx_copy_ops;	1379	return gop - netbk->tx_copy_ops;
1380	}	1380	}
1381		1381
1382	static void xen_netbk_tx_submit(struct xen_netbk *netbk)	1382	static void xen_netbk_tx_submit(struct xen_netbk *netbk)
1383	{	1383	{
1384	struct gnttab_copy *gop = netbk->tx_copy_ops;	1384	struct gnttab_copy *gop = netbk->tx_copy_ops;
1385	struct sk_buff *skb;	1385	struct sk_buff *skb;
1386		1386
1387	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {	1387	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
1388	struct xen_netif_tx_request *txp;	1388	struct xen_netif_tx_request *txp;
1389	struct xenvif *vif;	1389	struct xenvif *vif;
1390	u16 pending_idx;	1390	u16 pending_idx;
1391	unsigned data_len;	1391	unsigned data_len;
1392		1392
1393	pending_idx = ((u16 )skb->data);	1393	pending_idx = ((u16 )skb->data);
1394	vif = netbk->pending_tx_info[pending_idx].vif;	1394	vif = netbk->pending_tx_info[pending_idx].vif;
1395	txp = &netbk->pending_tx_info[pending_idx].req;	1395	txp = &netbk->pending_tx_info[pending_idx].req;
1396		1396
1397	/* Check the remap error code. */	1397	/* Check the remap error code. */
1398	if (unlikely(xen_netbk_tx_check_gop(netbk, skb, &gop))) {	1398	if (unlikely(xen_netbk_tx_check_gop(netbk, skb, &gop))) {
1399	netdev_dbg(vif->dev, "netback grant failed.\n");	1399	netdev_dbg(vif->dev, "netback grant failed.\n");
1400	skb_shinfo(skb)->nr_frags = 0;	1400	skb_shinfo(skb)->nr_frags = 0;
1401	kfree_skb(skb);	1401	kfree_skb(skb);
1402	continue;	1402	continue;
1403	}	1403	}
1404		1404
1405	data_len = skb->len;	1405	data_len = skb->len;
1406	memcpy(skb->data,	1406	memcpy(skb->data,
1407	(void *)(idx_to_kaddr(netbk, pending_idx)\|txp->offset),	1407	(void *)(idx_to_kaddr(netbk, pending_idx)\|txp->offset),
1408	data_len);	1408	data_len);
1409	if (data_len < txp->size) {	1409	if (data_len < txp->size) {
1410	/* Append the packet payload as a fragment. */	1410	/* Append the packet payload as a fragment. */
1411	txp->offset += data_len;	1411	txp->offset += data_len;
1412	txp->size -= data_len;	1412	txp->size -= data_len;
1413	} else {	1413	} else {
1414	/* Schedule a response immediately. */	1414	/* Schedule a response immediately. */
1415	xen_netbk_idx_release(netbk, pending_idx);	1415	xen_netbk_idx_release(netbk, pending_idx);
1416	}	1416	}
1417		1417
1418	if (txp->flags & XEN_NETTXF_csum_blank)	1418	if (txp->flags & XEN_NETTXF_csum_blank)
1419	skb->ip_summed = CHECKSUM_PARTIAL;	1419	skb->ip_summed = CHECKSUM_PARTIAL;
1420	else if (txp->flags & XEN_NETTXF_data_validated)	1420	else if (txp->flags & XEN_NETTXF_data_validated)
1421	skb->ip_summed = CHECKSUM_UNNECESSARY;	1421	skb->ip_summed = CHECKSUM_UNNECESSARY;
1422		1422
1423	xen_netbk_fill_frags(netbk, skb);	1423	xen_netbk_fill_frags(netbk, skb);
1424		1424
1425	/*	1425	/*
1426	* If the initial fragment was < PKT_PROT_LEN then	1426	* If the initial fragment was < PKT_PROT_LEN then
1427	* pull through some bytes from the other fragments to	1427	* pull through some bytes from the other fragments to
1428	* increase the linear region to PKT_PROT_LEN bytes.	1428	* increase the linear region to PKT_PROT_LEN bytes.
1429	*/	1429	*/
1430	if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {	1430	if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
1431	int target = min_t(int, skb->len, PKT_PROT_LEN);	1431	int target = min_t(int, skb->len, PKT_PROT_LEN);
1432	__pskb_pull_tail(skb, target - skb_headlen(skb));	1432	__pskb_pull_tail(skb, target - skb_headlen(skb));
1433	}	1433	}
1434		1434
1435	skb->dev = vif->dev;	1435	skb->dev = vif->dev;
1436	skb->protocol = eth_type_trans(skb, skb->dev);	1436	skb->protocol = eth_type_trans(skb, skb->dev);
1437		1437
1438	if (checksum_setup(vif, skb)) {	1438	if (checksum_setup(vif, skb)) {
1439	netdev_dbg(vif->dev,	1439	netdev_dbg(vif->dev,
1440	"Can't setup checksum in net_tx_action\n");	1440	"Can't setup checksum in net_tx_action\n");
1441	kfree_skb(skb);	1441	kfree_skb(skb);
1442	continue;	1442	continue;
1443	}	1443	}
1444		1444
1445	vif->dev->stats.rx_bytes += skb->len;	1445	vif->dev->stats.rx_bytes += skb->len;
1446	vif->dev->stats.rx_packets++;	1446	vif->dev->stats.rx_packets++;
1447		1447
1448	xenvif_receive_skb(vif, skb);	1448	xenvif_receive_skb(vif, skb);
1449	}	1449	}
1450	}	1450	}
1451		1451
1452	/* Called after netfront has transmitted */	1452	/* Called after netfront has transmitted */
1453	static void xen_netbk_tx_action(struct xen_netbk *netbk)	1453	static void xen_netbk_tx_action(struct xen_netbk *netbk)
1454	{	1454	{
1455	unsigned nr_gops;	1455	unsigned nr_gops;
1456	int ret;	1456	int ret;
1457		1457
1458	nr_gops = xen_netbk_tx_build_gops(netbk);	1458	nr_gops = xen_netbk_tx_build_gops(netbk);
1459		1459
1460	if (nr_gops == 0)	1460	if (nr_gops == 0)
1461	return;	1461	return;
1462	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,	1462	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
1463	netbk->tx_copy_ops, nr_gops);	1463	netbk->tx_copy_ops, nr_gops);
1464	BUG_ON(ret);	1464	BUG_ON(ret);
1465		1465
1466	xen_netbk_tx_submit(netbk);	1466	xen_netbk_tx_submit(netbk);
1467		1467
1468	}	1468	}
1469		1469
1470	static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)	1470	static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
1471	{	1471	{
1472	struct xenvif *vif;	1472	struct xenvif *vif;
1473	struct pending_tx_info *pending_tx_info;	1473	struct pending_tx_info *pending_tx_info;
1474	pending_ring_idx_t index;	1474	pending_ring_idx_t index;
1475		1475
1476	/* Already complete? */	1476	/* Already complete? */
1477	if (netbk->mmap_pages[pending_idx] == NULL)	1477	if (netbk->mmap_pages[pending_idx] == NULL)
1478	return;	1478	return;
1479		1479
1480	pending_tx_info = &netbk->pending_tx_info[pending_idx];	1480	pending_tx_info = &netbk->pending_tx_info[pending_idx];
1481		1481
1482	vif = pending_tx_info->vif;	1482	vif = pending_tx_info->vif;
1483		1483
1484	make_tx_response(vif, &pending_tx_info->req, XEN_NETIF_RSP_OKAY);	1484	make_tx_response(vif, &pending_tx_info->req, XEN_NETIF_RSP_OKAY);
1485		1485
1486	index = pending_index(netbk->pending_prod++);	1486	index = pending_index(netbk->pending_prod++);
1487	netbk->pending_ring[index] = pending_idx;	1487	netbk->pending_ring[index] = pending_idx;
1488		1488
1489	xenvif_put(vif);	1489	xenvif_put(vif);
1490		1490
1491	netbk->mmap_pages[pending_idx]->mapping = 0;	1491	netbk->mmap_pages[pending_idx]->mapping = 0;
1492	put_page(netbk->mmap_pages[pending_idx]);	1492	put_page(netbk->mmap_pages[pending_idx]);
1493	netbk->mmap_pages[pending_idx] = NULL;	1493	netbk->mmap_pages[pending_idx] = NULL;
1494	}	1494	}
1495		1495
1496	static void make_tx_response(struct xenvif *vif,	1496	static void make_tx_response(struct xenvif *vif,
1497	struct xen_netif_tx_request *txp,	1497	struct xen_netif_tx_request *txp,
1498	s8 st)	1498	s8 st)
1499	{	1499	{
1500	RING_IDX i = vif->tx.rsp_prod_pvt;	1500	RING_IDX i = vif->tx.rsp_prod_pvt;
1501	struct xen_netif_tx_response *resp;	1501	struct xen_netif_tx_response *resp;
1502	int notify;	1502	int notify;
1503		1503
1504	resp = RING_GET_RESPONSE(&vif->tx, i);	1504	resp = RING_GET_RESPONSE(&vif->tx, i);
1505	resp->id = txp->id;	1505	resp->id = txp->id;
1506	resp->status = st;	1506	resp->status = st;
1507		1507
1508	if (txp->flags & XEN_NETTXF_extra_info)	1508	if (txp->flags & XEN_NETTXF_extra_info)
1509	RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL;	1509	RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
1510		1510
1511	vif->tx.rsp_prod_pvt = ++i;	1511	vif->tx.rsp_prod_pvt = ++i;
1512	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);	1512	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);
1513	if (notify)	1513	if (notify)
1514	notify_remote_via_irq(vif->irq);	1514	notify_remote_via_irq(vif->irq);
1515	}	1515	}
1516		1516
1517	static struct xen_netif_rx_response make_rx_response(struct xenvif vif,	1517	static struct xen_netif_rx_response make_rx_response(struct xenvif vif,
1518	u16 id,	1518	u16 id,
1519	s8 st,	1519	s8 st,
1520	u16 offset,	1520	u16 offset,
1521	u16 size,	1521	u16 size,
1522	u16 flags)	1522	u16 flags)
1523	{	1523	{
1524	RING_IDX i = vif->rx.rsp_prod_pvt;	1524	RING_IDX i = vif->rx.rsp_prod_pvt;
1525	struct xen_netif_rx_response *resp;	1525	struct xen_netif_rx_response *resp;
1526		1526
1527	resp = RING_GET_RESPONSE(&vif->rx, i);	1527	resp = RING_GET_RESPONSE(&vif->rx, i);
1528	resp->offset = offset;	1528	resp->offset = offset;
1529	resp->flags = flags;	1529	resp->flags = flags;
1530	resp->id = id;	1530	resp->id = id;
1531	resp->status = (s16)size;	1531	resp->status = (s16)size;
1532	if (st < 0)	1532	if (st < 0)
1533	resp->status = (s16)st;	1533	resp->status = (s16)st;
1534		1534
1535	vif->rx.rsp_prod_pvt = ++i;	1535	vif->rx.rsp_prod_pvt = ++i;
1536		1536
1537	return resp;	1537	return resp;
1538	}	1538	}
1539		1539
1540	static inline int rx_work_todo(struct xen_netbk *netbk)	1540	static inline int rx_work_todo(struct xen_netbk *netbk)
1541	{	1541	{
1542	return !skb_queue_empty(&netbk->rx_queue);	1542	return !skb_queue_empty(&netbk->rx_queue);
1543	}	1543	}
1544		1544
1545	static inline int tx_work_todo(struct xen_netbk *netbk)	1545	static inline int tx_work_todo(struct xen_netbk *netbk)
1546	{	1546	{
1547		1547
1548	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&	1548	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
1549	!list_empty(&netbk->net_schedule_list))	1549	!list_empty(&netbk->net_schedule_list))
1550	return 1;	1550	return 1;
1551		1551
1552	return 0;	1552	return 0;
1553	}	1553	}
1554		1554
1555	static int xen_netbk_kthread(void *data)	1555	static int xen_netbk_kthread(void *data)
1556	{	1556	{
1557	struct xen_netbk *netbk = data;	1557	struct xen_netbk *netbk = data;
1558	while (!kthread_should_stop()) {	1558	while (!kthread_should_stop()) {
1559	wait_event_interruptible(netbk->wq,	1559	wait_event_interruptible(netbk->wq,
1560	rx_work_todo(netbk) \|\|	1560	rx_work_todo(netbk) \|\|
1561	tx_work_todo(netbk) \|\|	1561	tx_work_todo(netbk) \|\|
1562	kthread_should_stop());	1562	kthread_should_stop());
1563	cond_resched();	1563	cond_resched();
1564		1564
1565	if (kthread_should_stop())	1565	if (kthread_should_stop())
1566	break;	1566	break;
1567		1567
1568	if (rx_work_todo(netbk))	1568	if (rx_work_todo(netbk))
1569	xen_netbk_rx_action(netbk);	1569	xen_netbk_rx_action(netbk);
1570		1570
1571	if (tx_work_todo(netbk))	1571	if (tx_work_todo(netbk))
1572	xen_netbk_tx_action(netbk);	1572	xen_netbk_tx_action(netbk);
1573	}	1573	}
1574		1574
1575	return 0;	1575	return 0;
1576	}	1576	}
1577		1577
1578	void xen_netbk_unmap_frontend_rings(struct xenvif *vif)	1578	void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
1579	{	1579	{
1580	struct gnttab_unmap_grant_ref op;	1580	if (vif->tx.sring)
1581		1581	xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
1582	if (vif->tx.sring) {	1582	vif->tx.sring);
1583	gnttab_set_unmap_op(&op, (unsigned long)vif->tx_comms_area->addr,	1583	if (vif->rx.sring)
1584	GNTMAP_host_map, vif->tx_shmem_handle);	1584	xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
1585		1585	vif->rx.sring);
1586	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
1587	BUG();
1588	}
1589
1590	if (vif->rx.sring) {
1591	gnttab_set_unmap_op(&op, (unsigned long)vif->rx_comms_area->addr,
1592	GNTMAP_host_map, vif->rx_shmem_handle);
1593
1594	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
1595	BUG();
1596	}
1597	if (vif->rx_comms_area)
1598	free_vm_area(vif->rx_comms_area);
1599	if (vif->tx_comms_area)
1600	free_vm_area(vif->tx_comms_area);
1601	}	1586	}
1602		1587
1603	int xen_netbk_map_frontend_rings(struct xenvif *vif,	1588	int xen_netbk_map_frontend_rings(struct xenvif *vif,
1604	grant_ref_t tx_ring_ref,	1589	grant_ref_t tx_ring_ref,
1605	grant_ref_t rx_ring_ref)	1590	grant_ref_t rx_ring_ref)
1606	{	1591	{
1607	struct gnttab_map_grant_ref op;	1592	void *addr;
1608	struct xen_netif_tx_sring *txs;	1593	struct xen_netif_tx_sring *txs;
1609	struct xen_netif_rx_sring *rxs;	1594	struct xen_netif_rx_sring *rxs;
1610		1595
1611	int err = -ENOMEM;	1596	int err = -ENOMEM;
1612		1597
1613	vif->tx_comms_area = alloc_vm_area(PAGE_SIZE);	1598	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
1614	if (vif->tx_comms_area == NULL)	1599	tx_ring_ref, &addr);
		1600	if (err)
1615	goto err;	1601	goto err;
1616		1602
1617	vif->rx_comms_area = alloc_vm_area(PAGE_SIZE);	1603	txs = (struct xen_netif_tx_sring *)addr;
1618	if (vif->rx_comms_area == NULL)
1619	goto err;
1620
1621	gnttab_set_map_op(&op, (unsigned long)vif->tx_comms_area->addr,
1622	GNTMAP_host_map, tx_ring_ref, vif->domid);
1623
1624	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
1625	BUG();
1626
1627	if (op.status) {
1628	netdev_warn(vif->dev,
1629	"failed to map tx ring. err=%d status=%d\n",
1630	err, op.status);
1631	err = op.status;
1632	goto err;
1633	}
1634
1635	vif->tx_shmem_ref = tx_ring_ref;
1636	vif->tx_shmem_handle = op.handle;
1637
1638	txs = (struct xen_netif_tx_sring *)vif->tx_comms_area->addr;
1639	BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);	1604	BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
1640		1605
1641	gnttab_set_map_op(&op, (unsigned long)vif->rx_comms_area->addr,	1606	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
1642	GNTMAP_host_map, rx_ring_ref, vif->domid);	1607	rx_ring_ref, &addr);
1643		1608	if (err)
1644	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
1645	BUG();
1646
1647	if (op.status) {
1648	netdev_warn(vif->dev,
1649	"failed to map rx ring. err=%d status=%d\n",
1650	err, op.status);
1651	err = op.status;
1652	goto err;	1609	goto err;
1653	}
1654		1610
1655	vif->rx_shmem_ref = rx_ring_ref;	1611	rxs = (struct xen_netif_rx_sring *)addr;
1656	vif->rx_shmem_handle = op.handle;
1657	vif->rx_req_cons_peek = 0;
1658
1659	rxs = (struct xen_netif_rx_sring *)vif->rx_comms_area->addr;
1660	BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);	1612	BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
		1613
		1614	vif->rx_req_cons_peek = 0;
1661		1615
1662	return 0;	1616	return 0;
1663		1617
1664	err:	1618	err:
1665	xen_netbk_unmap_frontend_rings(vif);	1619	xen_netbk_unmap_frontend_rings(vif);
1666	return err;	1620	return err;
1667	}	1621	}
1668		1622
1669	static int __init netback_init(void)	1623	static int __init netback_init(void)
1670	{	1624	{
1671	int i;	1625	int i;
1672	int rc = 0;	1626	int rc = 0;
1673	int group;	1627	int group;
1674		1628
1675	if (!xen_pv_domain())	1629	if (!xen_pv_domain())
1676	return -ENODEV;	1630	return -ENODEV;
1677		1631
1678	xen_netbk_group_nr = num_online_cpus();	1632	xen_netbk_group_nr = num_online_cpus();
1679	xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);	1633	xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
1680	if (!xen_netbk) {	1634	if (!xen_netbk) {
1681	printk(KERN_ALERT "%s: out of memory\n", __func__);	1635	printk(KERN_ALERT "%s: out of memory\n", __func__);
1682	return -ENOMEM;	1636	return -ENOMEM;
1683	}	1637	}
1684		1638
1685	for (group = 0; group < xen_netbk_group_nr; group++) {	1639	for (group = 0; group < xen_netbk_group_nr; group++) {
1686	struct xen_netbk *netbk = &xen_netbk[group];	1640	struct xen_netbk *netbk = &xen_netbk[group];
1687	skb_queue_head_init(&netbk->rx_queue);	1641	skb_queue_head_init(&netbk->rx_queue);
1688	skb_queue_head_init(&netbk->tx_queue);	1642	skb_queue_head_init(&netbk->tx_queue);
1689		1643
1690	init_timer(&netbk->net_timer);	1644	init_timer(&netbk->net_timer);
1691	netbk->net_timer.data = (unsigned long)netbk;	1645	netbk->net_timer.data = (unsigned long)netbk;
1692	netbk->net_timer.function = xen_netbk_alarm;	1646	netbk->net_timer.function = xen_netbk_alarm;
1693		1647
1694	netbk->pending_cons = 0;	1648	netbk->pending_cons = 0;
1695	netbk->pending_prod = MAX_PENDING_REQS;	1649	netbk->pending_prod = MAX_PENDING_REQS;
1696	for (i = 0; i < MAX_PENDING_REQS; i++)	1650	for (i = 0; i < MAX_PENDING_REQS; i++)
1697	netbk->pending_ring[i] = i;	1651	netbk->pending_ring[i] = i;
1698		1652
1699	init_waitqueue_head(&netbk->wq);	1653	init_waitqueue_head(&netbk->wq);
1700	netbk->task = kthread_create(xen_netbk_kthread,	1654	netbk->task = kthread_create(xen_netbk_kthread,
1701	(void *)netbk,	1655	(void *)netbk,
1702	"netback/%u", group);	1656	"netback/%u", group);
1703		1657
1704	if (IS_ERR(netbk->task)) {	1658	if (IS_ERR(netbk->task)) {
1705	printk(KERN_ALERT "kthread_run() fails at netback\n");	1659	printk(KERN_ALERT "kthread_run() fails at netback\n");
1706	del_timer(&netbk->net_timer);	1660	del_timer(&netbk->net_timer);
1707	rc = PTR_ERR(netbk->task);	1661	rc = PTR_ERR(netbk->task);
1708	goto failed_init;	1662	goto failed_init;
1709	}	1663	}
1710		1664
1711	kthread_bind(netbk->task, group);	1665	kthread_bind(netbk->task, group);
1712		1666
1713	INIT_LIST_HEAD(&netbk->net_schedule_list);	1667	INIT_LIST_HEAD(&netbk->net_schedule_list);
1714		1668
1715	spin_lock_init(&netbk->net_schedule_list_lock);	1669	spin_lock_init(&netbk->net_schedule_list_lock);
1716		1670
1717	atomic_set(&netbk->netfront_count, 0);	1671	atomic_set(&netbk->netfront_count, 0);
1718		1672
1719	wake_up_process(netbk->task);	1673	wake_up_process(netbk->task);
1720	}	1674	}
1721		1675
1722	rc = xenvif_xenbus_init();	1676	rc = xenvif_xenbus_init();
1723	if (rc)	1677	if (rc)
1724	goto failed_init;	1678	goto failed_init;
1725		1679
1726	return 0;	1680	return 0;
1727		1681
1728	failed_init:	1682	failed_init:
1729	while (--group >= 0) {	1683	while (--group >= 0) {
1730	struct xen_netbk *netbk = &xen_netbk[group];	1684	struct xen_netbk *netbk = &xen_netbk[group];
1731	for (i = 0; i < MAX_PENDING_REQS; i++) {	1685	for (i = 0; i < MAX_PENDING_REQS; i++) {
1732	if (netbk->mmap_pages[i])	1686	if (netbk->mmap_pages[i])
1733	__free_page(netbk->mmap_pages[i]);	1687	__free_page(netbk->mmap_pages[i]);
1734	}	1688	}
1735	del_timer(&netbk->net_timer);	1689	del_timer(&netbk->net_timer);
1736	kthread_stop(netbk->task);	1690	kthread_stop(netbk->task);
1737	}	1691	}
1738	vfree(xen_netbk);	1692	vfree(xen_netbk);
1739	return rc;	1693	return rc;
1740		1694
1741	}	1695	}
1742		1696
1743	module_init(netback_init);	1697	module_init(netback_init);
1744		1698