Commit 4262e5ccbbb5171abd2921eed16ed339633d6478

Authored by Daniel Borkmann
Committed by David S. Miller
1 parent 34f9f43710

net: dev: move inline skb_needs_linearize helper to header

As we need it elsewhere, move the inline helper function of
skb_needs_linearize() over to skbuff.h include file. While
at it, also convert the return to 'bool' instead of 'int'
and add a proper kernel doc.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 2 changed files with 18 additions and 15 deletions Inline Diff

include/linux/skbuff.h
1 /* 1 /*
2 * Definitions for the 'struct sk_buff' memory handlers. 2 * Definitions for the 'struct sk_buff' memory handlers.
3 * 3 *
4 * Authors: 4 * Authors:
5 * Alan Cox, <gw4pts@gw4pts.ampr.org> 5 * Alan Cox, <gw4pts@gw4pts.ampr.org>
6 * Florian La Roche, <rzsfl@rz.uni-sb.de> 6 * Florian La Roche, <rzsfl@rz.uni-sb.de>
7 * 7 *
8 * This program is free software; you can redistribute it and/or 8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License 9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version. 11 * 2 of the License, or (at your option) any later version.
12 */ 12 */
13 13
14 #ifndef _LINUX_SKBUFF_H 14 #ifndef _LINUX_SKBUFF_H
15 #define _LINUX_SKBUFF_H 15 #define _LINUX_SKBUFF_H
16 16
17 #include <linux/kernel.h> 17 #include <linux/kernel.h>
18 #include <linux/kmemcheck.h> 18 #include <linux/kmemcheck.h>
19 #include <linux/compiler.h> 19 #include <linux/compiler.h>
20 #include <linux/time.h> 20 #include <linux/time.h>
21 #include <linux/bug.h> 21 #include <linux/bug.h>
22 #include <linux/cache.h> 22 #include <linux/cache.h>
23 23
24 #include <linux/atomic.h> 24 #include <linux/atomic.h>
25 #include <asm/types.h> 25 #include <asm/types.h>
26 #include <linux/spinlock.h> 26 #include <linux/spinlock.h>
27 #include <linux/net.h> 27 #include <linux/net.h>
28 #include <linux/textsearch.h> 28 #include <linux/textsearch.h>
29 #include <net/checksum.h> 29 #include <net/checksum.h>
30 #include <linux/rcupdate.h> 30 #include <linux/rcupdate.h>
31 #include <linux/dmaengine.h> 31 #include <linux/dmaengine.h>
32 #include <linux/hrtimer.h> 32 #include <linux/hrtimer.h>
33 #include <linux/dma-mapping.h> 33 #include <linux/dma-mapping.h>
34 #include <linux/netdev_features.h> 34 #include <linux/netdev_features.h>
35 #include <net/flow_keys.h> 35 #include <net/flow_keys.h>
36 36
37 /* Don't change this without changing skb_csum_unnecessary! */ 37 /* Don't change this without changing skb_csum_unnecessary! */
38 #define CHECKSUM_NONE 0 38 #define CHECKSUM_NONE 0
39 #define CHECKSUM_UNNECESSARY 1 39 #define CHECKSUM_UNNECESSARY 1
40 #define CHECKSUM_COMPLETE 2 40 #define CHECKSUM_COMPLETE 2
41 #define CHECKSUM_PARTIAL 3 41 #define CHECKSUM_PARTIAL 3
42 42
43 #define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ 43 #define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \
44 ~(SMP_CACHE_BYTES - 1)) 44 ~(SMP_CACHE_BYTES - 1))
45 #define SKB_WITH_OVERHEAD(X) \ 45 #define SKB_WITH_OVERHEAD(X) \
46 ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) 46 ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
47 #define SKB_MAX_ORDER(X, ORDER) \ 47 #define SKB_MAX_ORDER(X, ORDER) \
48 SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X)) 48 SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X))
49 #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) 49 #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0))
50 #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) 50 #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2))
51 51
52 /* return minimum truesize of one skb containing X bytes of data */ 52 /* return minimum truesize of one skb containing X bytes of data */
53 #define SKB_TRUESIZE(X) ((X) + \ 53 #define SKB_TRUESIZE(X) ((X) + \
54 SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ 54 SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \
55 SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) 55 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
56 56
57 /* A. Checksumming of received packets by device. 57 /* A. Checksumming of received packets by device.
58 * 58 *
59 * NONE: device failed to checksum this packet. 59 * NONE: device failed to checksum this packet.
60 * skb->csum is undefined. 60 * skb->csum is undefined.
61 * 61 *
62 * UNNECESSARY: device parsed packet and wouldbe verified checksum. 62 * UNNECESSARY: device parsed packet and wouldbe verified checksum.
63 * skb->csum is undefined. 63 * skb->csum is undefined.
64 * It is bad option, but, unfortunately, many of vendors do this. 64 * It is bad option, but, unfortunately, many of vendors do this.
65 * Apparently with secret goal to sell you new device, when you 65 * Apparently with secret goal to sell you new device, when you
66 * will add new protocol to your host. F.e. IPv6. 8) 66 * will add new protocol to your host. F.e. IPv6. 8)
67 * 67 *
68 * COMPLETE: the most generic way. Device supplied checksum of _all_ 68 * COMPLETE: the most generic way. Device supplied checksum of _all_
69 * the packet as seen by netif_rx in skb->csum. 69 * the packet as seen by netif_rx in skb->csum.
70 * NOTE: Even if device supports only some protocols, but 70 * NOTE: Even if device supports only some protocols, but
71 * is able to produce some skb->csum, it MUST use COMPLETE, 71 * is able to produce some skb->csum, it MUST use COMPLETE,
72 * not UNNECESSARY. 72 * not UNNECESSARY.
73 * 73 *
74 * PARTIAL: identical to the case for output below. This may occur 74 * PARTIAL: identical to the case for output below. This may occur
75 * on a packet received directly from another Linux OS, e.g., 75 * on a packet received directly from another Linux OS, e.g.,
76 * a virtualised Linux kernel on the same host. The packet can 76 * a virtualised Linux kernel on the same host. The packet can
77 * be treated in the same way as UNNECESSARY except that on 77 * be treated in the same way as UNNECESSARY except that on
78 * output (i.e., forwarding) the checksum must be filled in 78 * output (i.e., forwarding) the checksum must be filled in
79 * by the OS or the hardware. 79 * by the OS or the hardware.
80 * 80 *
81 * B. Checksumming on output. 81 * B. Checksumming on output.
82 * 82 *
83 * NONE: skb is checksummed by protocol or csum is not required. 83 * NONE: skb is checksummed by protocol or csum is not required.
84 * 84 *
85 * PARTIAL: device is required to csum packet as seen by hard_start_xmit 85 * PARTIAL: device is required to csum packet as seen by hard_start_xmit
86 * from skb->csum_start to the end and to record the checksum 86 * from skb->csum_start to the end and to record the checksum
87 * at skb->csum_start + skb->csum_offset. 87 * at skb->csum_start + skb->csum_offset.
88 * 88 *
89 * Device must show its capabilities in dev->features, set 89 * Device must show its capabilities in dev->features, set
90 * at device setup time. 90 * at device setup time.
91 * NETIF_F_HW_CSUM - it is clever device, it is able to checksum 91 * NETIF_F_HW_CSUM - it is clever device, it is able to checksum
92 * everything. 92 * everything.
93 * NETIF_F_IP_CSUM - device is dumb. It is able to csum only 93 * NETIF_F_IP_CSUM - device is dumb. It is able to csum only
94 * TCP/UDP over IPv4. Sigh. Vendors like this 94 * TCP/UDP over IPv4. Sigh. Vendors like this
95 * way by an unknown reason. Though, see comment above 95 * way by an unknown reason. Though, see comment above
96 * about CHECKSUM_UNNECESSARY. 8) 96 * about CHECKSUM_UNNECESSARY. 8)
97 * NETIF_F_IPV6_CSUM about as dumb as the last one but does IPv6 instead. 97 * NETIF_F_IPV6_CSUM about as dumb as the last one but does IPv6 instead.
98 * 98 *
99 * UNNECESSARY: device will do per protocol specific csum. Protocol drivers 99 * UNNECESSARY: device will do per protocol specific csum. Protocol drivers
100 * that do not want net to perform the checksum calculation should use 100 * that do not want net to perform the checksum calculation should use
101 * this flag in their outgoing skbs. 101 * this flag in their outgoing skbs.
102 * NETIF_F_FCOE_CRC this indicates the device can do FCoE FC CRC 102 * NETIF_F_FCOE_CRC this indicates the device can do FCoE FC CRC
103 * offload. Correspondingly, the FCoE protocol driver 103 * offload. Correspondingly, the FCoE protocol driver
104 * stack should use CHECKSUM_UNNECESSARY. 104 * stack should use CHECKSUM_UNNECESSARY.
105 * 105 *
106 * Any questions? No questions, good. --ANK 106 * Any questions? No questions, good. --ANK
107 */ 107 */
108 108
109 struct net_device; 109 struct net_device;
110 struct scatterlist; 110 struct scatterlist;
111 struct pipe_inode_info; 111 struct pipe_inode_info;
112 112
113 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 113 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
114 struct nf_conntrack { 114 struct nf_conntrack {
115 atomic_t use; 115 atomic_t use;
116 }; 116 };
117 #endif 117 #endif
118 118
119 #ifdef CONFIG_BRIDGE_NETFILTER 119 #ifdef CONFIG_BRIDGE_NETFILTER
120 struct nf_bridge_info { 120 struct nf_bridge_info {
121 atomic_t use; 121 atomic_t use;
122 unsigned int mask; 122 unsigned int mask;
123 struct net_device *physindev; 123 struct net_device *physindev;
124 struct net_device *physoutdev; 124 struct net_device *physoutdev;
125 unsigned long data[32 / sizeof(unsigned long)]; 125 unsigned long data[32 / sizeof(unsigned long)];
126 }; 126 };
127 #endif 127 #endif
128 128
129 struct sk_buff_head { 129 struct sk_buff_head {
130 /* These two members must be first. */ 130 /* These two members must be first. */
131 struct sk_buff *next; 131 struct sk_buff *next;
132 struct sk_buff *prev; 132 struct sk_buff *prev;
133 133
134 __u32 qlen; 134 __u32 qlen;
135 spinlock_t lock; 135 spinlock_t lock;
136 }; 136 };
137 137
138 struct sk_buff; 138 struct sk_buff;
139 139
140 /* To allow 64K frame to be packed as single skb without frag_list we 140 /* To allow 64K frame to be packed as single skb without frag_list we
141 * require 64K/PAGE_SIZE pages plus 1 additional page to allow for 141 * require 64K/PAGE_SIZE pages plus 1 additional page to allow for
142 * buffers which do not start on a page boundary. 142 * buffers which do not start on a page boundary.
143 * 143 *
144 * Since GRO uses frags we allocate at least 16 regardless of page 144 * Since GRO uses frags we allocate at least 16 regardless of page
145 * size. 145 * size.
146 */ 146 */
147 #if (65536/PAGE_SIZE + 1) < 16 147 #if (65536/PAGE_SIZE + 1) < 16
148 #define MAX_SKB_FRAGS 16UL 148 #define MAX_SKB_FRAGS 16UL
149 #else 149 #else
150 #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1) 150 #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1)
151 #endif 151 #endif
152 152
153 typedef struct skb_frag_struct skb_frag_t; 153 typedef struct skb_frag_struct skb_frag_t;
154 154
155 struct skb_frag_struct { 155 struct skb_frag_struct {
156 struct { 156 struct {
157 struct page *p; 157 struct page *p;
158 } page; 158 } page;
159 #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) 159 #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
160 __u32 page_offset; 160 __u32 page_offset;
161 __u32 size; 161 __u32 size;
162 #else 162 #else
163 __u16 page_offset; 163 __u16 page_offset;
164 __u16 size; 164 __u16 size;
165 #endif 165 #endif
166 }; 166 };
167 167
168 static inline unsigned int skb_frag_size(const skb_frag_t *frag) 168 static inline unsigned int skb_frag_size(const skb_frag_t *frag)
169 { 169 {
170 return frag->size; 170 return frag->size;
171 } 171 }
172 172
173 static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) 173 static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
174 { 174 {
175 frag->size = size; 175 frag->size = size;
176 } 176 }
177 177
178 static inline void skb_frag_size_add(skb_frag_t *frag, int delta) 178 static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
179 { 179 {
180 frag->size += delta; 180 frag->size += delta;
181 } 181 }
182 182
183 static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) 183 static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
184 { 184 {
185 frag->size -= delta; 185 frag->size -= delta;
186 } 186 }
187 187
188 #define HAVE_HW_TIME_STAMP 188 #define HAVE_HW_TIME_STAMP
189 189
190 /** 190 /**
191 * struct skb_shared_hwtstamps - hardware time stamps 191 * struct skb_shared_hwtstamps - hardware time stamps
192 * @hwtstamp: hardware time stamp transformed into duration 192 * @hwtstamp: hardware time stamp transformed into duration
193 * since arbitrary point in time 193 * since arbitrary point in time
194 * @syststamp: hwtstamp transformed to system time base 194 * @syststamp: hwtstamp transformed to system time base
195 * 195 *
196 * Software time stamps generated by ktime_get_real() are stored in 196 * Software time stamps generated by ktime_get_real() are stored in
197 * skb->tstamp. The relation between the different kinds of time 197 * skb->tstamp. The relation between the different kinds of time
198 * stamps is as follows: 198 * stamps is as follows:
199 * 199 *
200 * syststamp and tstamp can be compared against each other in 200 * syststamp and tstamp can be compared against each other in
201 * arbitrary combinations. The accuracy of a 201 * arbitrary combinations. The accuracy of a
202 * syststamp/tstamp/"syststamp from other device" comparison is 202 * syststamp/tstamp/"syststamp from other device" comparison is
203 * limited by the accuracy of the transformation into system time 203 * limited by the accuracy of the transformation into system time
204 * base. This depends on the device driver and its underlying 204 * base. This depends on the device driver and its underlying
205 * hardware. 205 * hardware.
206 * 206 *
207 * hwtstamps can only be compared against other hwtstamps from 207 * hwtstamps can only be compared against other hwtstamps from
208 * the same device. 208 * the same device.
209 * 209 *
210 * This structure is attached to packets as part of the 210 * This structure is attached to packets as part of the
211 * &skb_shared_info. Use skb_hwtstamps() to get a pointer. 211 * &skb_shared_info. Use skb_hwtstamps() to get a pointer.
212 */ 212 */
213 struct skb_shared_hwtstamps { 213 struct skb_shared_hwtstamps {
214 ktime_t hwtstamp; 214 ktime_t hwtstamp;
215 ktime_t syststamp; 215 ktime_t syststamp;
216 }; 216 };
217 217
218 /* Definitions for tx_flags in struct skb_shared_info */ 218 /* Definitions for tx_flags in struct skb_shared_info */
219 enum { 219 enum {
220 /* generate hardware time stamp */ 220 /* generate hardware time stamp */
221 SKBTX_HW_TSTAMP = 1 << 0, 221 SKBTX_HW_TSTAMP = 1 << 0,
222 222
223 /* generate software time stamp */ 223 /* generate software time stamp */
224 SKBTX_SW_TSTAMP = 1 << 1, 224 SKBTX_SW_TSTAMP = 1 << 1,
225 225
226 /* device driver is going to provide hardware time stamp */ 226 /* device driver is going to provide hardware time stamp */
227 SKBTX_IN_PROGRESS = 1 << 2, 227 SKBTX_IN_PROGRESS = 1 << 2,
228 228
229 /* device driver supports TX zero-copy buffers */ 229 /* device driver supports TX zero-copy buffers */
230 SKBTX_DEV_ZEROCOPY = 1 << 3, 230 SKBTX_DEV_ZEROCOPY = 1 << 3,
231 231
232 /* generate wifi status information (where possible) */ 232 /* generate wifi status information (where possible) */
233 SKBTX_WIFI_STATUS = 1 << 4, 233 SKBTX_WIFI_STATUS = 1 << 4,
234 234
235 /* This indicates at least one fragment might be overwritten 235 /* This indicates at least one fragment might be overwritten
236 * (as in vmsplice(), sendfile() ...) 236 * (as in vmsplice(), sendfile() ...)
237 * If we need to compute a TX checksum, we'll need to copy 237 * If we need to compute a TX checksum, we'll need to copy
238 * all frags to avoid possible bad checksum 238 * all frags to avoid possible bad checksum
239 */ 239 */
240 SKBTX_SHARED_FRAG = 1 << 5, 240 SKBTX_SHARED_FRAG = 1 << 5,
241 }; 241 };
242 242
243 /* 243 /*
244 * The callback notifies userspace to release buffers when skb DMA is done in 244 * The callback notifies userspace to release buffers when skb DMA is done in
245 * lower device, the skb last reference should be 0 when calling this. 245 * lower device, the skb last reference should be 0 when calling this.
246 * The zerocopy_success argument is true if zero copy transmit occurred, 246 * The zerocopy_success argument is true if zero copy transmit occurred,
247 * false on data copy or out of memory error caused by data copy attempt. 247 * false on data copy or out of memory error caused by data copy attempt.
248 * The ctx field is used to track device context. 248 * The ctx field is used to track device context.
249 * The desc field is used to track userspace buffer index. 249 * The desc field is used to track userspace buffer index.
250 */ 250 */
251 struct ubuf_info { 251 struct ubuf_info {
252 void (*callback)(struct ubuf_info *, bool zerocopy_success); 252 void (*callback)(struct ubuf_info *, bool zerocopy_success);
253 void *ctx; 253 void *ctx;
254 unsigned long desc; 254 unsigned long desc;
255 }; 255 };
256 256
257 /* This data is invariant across clones and lives at 257 /* This data is invariant across clones and lives at
258 * the end of the header data, ie. at skb->end. 258 * the end of the header data, ie. at skb->end.
259 */ 259 */
260 struct skb_shared_info { 260 struct skb_shared_info {
261 unsigned char nr_frags; 261 unsigned char nr_frags;
262 __u8 tx_flags; 262 __u8 tx_flags;
263 unsigned short gso_size; 263 unsigned short gso_size;
264 /* Warning: this field is not always filled in (UFO)! */ 264 /* Warning: this field is not always filled in (UFO)! */
265 unsigned short gso_segs; 265 unsigned short gso_segs;
266 unsigned short gso_type; 266 unsigned short gso_type;
267 struct sk_buff *frag_list; 267 struct sk_buff *frag_list;
268 struct skb_shared_hwtstamps hwtstamps; 268 struct skb_shared_hwtstamps hwtstamps;
269 __be32 ip6_frag_id; 269 __be32 ip6_frag_id;
270 270
271 /* 271 /*
272 * Warning : all fields before dataref are cleared in __alloc_skb() 272 * Warning : all fields before dataref are cleared in __alloc_skb()
273 */ 273 */
274 atomic_t dataref; 274 atomic_t dataref;
275 275
276 /* Intermediate layers must ensure that destructor_arg 276 /* Intermediate layers must ensure that destructor_arg
277 * remains valid until skb destructor */ 277 * remains valid until skb destructor */
278 void * destructor_arg; 278 void * destructor_arg;
279 279
280 /* must be last field, see pskb_expand_head() */ 280 /* must be last field, see pskb_expand_head() */
281 skb_frag_t frags[MAX_SKB_FRAGS]; 281 skb_frag_t frags[MAX_SKB_FRAGS];
282 }; 282 };
283 283
284 /* We divide dataref into two halves. The higher 16 bits hold references 284 /* We divide dataref into two halves. The higher 16 bits hold references
285 * to the payload part of skb->data. The lower 16 bits hold references to 285 * to the payload part of skb->data. The lower 16 bits hold references to
286 * the entire skb->data. A clone of a headerless skb holds the length of 286 * the entire skb->data. A clone of a headerless skb holds the length of
287 * the header in skb->hdr_len. 287 * the header in skb->hdr_len.
288 * 288 *
289 * All users must obey the rule that the skb->data reference count must be 289 * All users must obey the rule that the skb->data reference count must be
290 * greater than or equal to the payload reference count. 290 * greater than or equal to the payload reference count.
291 * 291 *
292 * Holding a reference to the payload part means that the user does not 292 * Holding a reference to the payload part means that the user does not
293 * care about modifications to the header part of skb->data. 293 * care about modifications to the header part of skb->data.
294 */ 294 */
295 #define SKB_DATAREF_SHIFT 16 295 #define SKB_DATAREF_SHIFT 16
296 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) 296 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1)
297 297
298 298
299 enum { 299 enum {
300 SKB_FCLONE_UNAVAILABLE, 300 SKB_FCLONE_UNAVAILABLE,
301 SKB_FCLONE_ORIG, 301 SKB_FCLONE_ORIG,
302 SKB_FCLONE_CLONE, 302 SKB_FCLONE_CLONE,
303 }; 303 };
304 304
305 enum { 305 enum {
306 SKB_GSO_TCPV4 = 1 << 0, 306 SKB_GSO_TCPV4 = 1 << 0,
307 SKB_GSO_UDP = 1 << 1, 307 SKB_GSO_UDP = 1 << 1,
308 308
309 /* This indicates the skb is from an untrusted source. */ 309 /* This indicates the skb is from an untrusted source. */
310 SKB_GSO_DODGY = 1 << 2, 310 SKB_GSO_DODGY = 1 << 2,
311 311
312 /* This indicates the tcp segment has CWR set. */ 312 /* This indicates the tcp segment has CWR set. */
313 SKB_GSO_TCP_ECN = 1 << 3, 313 SKB_GSO_TCP_ECN = 1 << 3,
314 314
315 SKB_GSO_TCPV6 = 1 << 4, 315 SKB_GSO_TCPV6 = 1 << 4,
316 316
317 SKB_GSO_FCOE = 1 << 5, 317 SKB_GSO_FCOE = 1 << 5,
318 318
319 SKB_GSO_GRE = 1 << 6, 319 SKB_GSO_GRE = 1 << 6,
320 320
321 SKB_GSO_IPIP = 1 << 7, 321 SKB_GSO_IPIP = 1 << 7,
322 322
323 SKB_GSO_SIT = 1 << 8, 323 SKB_GSO_SIT = 1 << 8,
324 324
325 SKB_GSO_UDP_TUNNEL = 1 << 9, 325 SKB_GSO_UDP_TUNNEL = 1 << 9,
326 326
327 SKB_GSO_MPLS = 1 << 10, 327 SKB_GSO_MPLS = 1 << 10,
328 }; 328 };
329 329
330 #if BITS_PER_LONG > 32 330 #if BITS_PER_LONG > 32
331 #define NET_SKBUFF_DATA_USES_OFFSET 1 331 #define NET_SKBUFF_DATA_USES_OFFSET 1
332 #endif 332 #endif
333 333
334 #ifdef NET_SKBUFF_DATA_USES_OFFSET 334 #ifdef NET_SKBUFF_DATA_USES_OFFSET
335 typedef unsigned int sk_buff_data_t; 335 typedef unsigned int sk_buff_data_t;
336 #else 336 #else
337 typedef unsigned char *sk_buff_data_t; 337 typedef unsigned char *sk_buff_data_t;
338 #endif 338 #endif
339 339
340 /** 340 /**
341 * struct sk_buff - socket buffer 341 * struct sk_buff - socket buffer
342 * @next: Next buffer in list 342 * @next: Next buffer in list
343 * @prev: Previous buffer in list 343 * @prev: Previous buffer in list
344 * @tstamp: Time we arrived 344 * @tstamp: Time we arrived
345 * @sk: Socket we are owned by 345 * @sk: Socket we are owned by
346 * @dev: Device we arrived on/are leaving by 346 * @dev: Device we arrived on/are leaving by
347 * @cb: Control buffer. Free for use by every layer. Put private vars here 347 * @cb: Control buffer. Free for use by every layer. Put private vars here
348 * @_skb_refdst: destination entry (with norefcount bit) 348 * @_skb_refdst: destination entry (with norefcount bit)
349 * @sp: the security path, used for xfrm 349 * @sp: the security path, used for xfrm
350 * @len: Length of actual data 350 * @len: Length of actual data
351 * @data_len: Data length 351 * @data_len: Data length
352 * @mac_len: Length of link layer header 352 * @mac_len: Length of link layer header
353 * @hdr_len: writable header length of cloned skb 353 * @hdr_len: writable header length of cloned skb
354 * @csum: Checksum (must include start/offset pair) 354 * @csum: Checksum (must include start/offset pair)
355 * @csum_start: Offset from skb->head where checksumming should start 355 * @csum_start: Offset from skb->head where checksumming should start
356 * @csum_offset: Offset from csum_start where checksum should be stored 356 * @csum_offset: Offset from csum_start where checksum should be stored
357 * @priority: Packet queueing priority 357 * @priority: Packet queueing priority
358 * @local_df: allow local fragmentation 358 * @local_df: allow local fragmentation
359 * @cloned: Head may be cloned (check refcnt to be sure) 359 * @cloned: Head may be cloned (check refcnt to be sure)
360 * @ip_summed: Driver fed us an IP checksum 360 * @ip_summed: Driver fed us an IP checksum
361 * @nohdr: Payload reference only, must not modify header 361 * @nohdr: Payload reference only, must not modify header
362 * @nfctinfo: Relationship of this skb to the connection 362 * @nfctinfo: Relationship of this skb to the connection
363 * @pkt_type: Packet class 363 * @pkt_type: Packet class
364 * @fclone: skbuff clone status 364 * @fclone: skbuff clone status
365 * @ipvs_property: skbuff is owned by ipvs 365 * @ipvs_property: skbuff is owned by ipvs
366 * @peeked: this packet has been seen already, so stats have been 366 * @peeked: this packet has been seen already, so stats have been
367 * done for it, don't do them again 367 * done for it, don't do them again
368 * @nf_trace: netfilter packet trace flag 368 * @nf_trace: netfilter packet trace flag
369 * @protocol: Packet protocol from driver 369 * @protocol: Packet protocol from driver
370 * @destructor: Destruct function 370 * @destructor: Destruct function
371 * @nfct: Associated connection, if any 371 * @nfct: Associated connection, if any
372 * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c 372 * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
373 * @skb_iif: ifindex of device we arrived on 373 * @skb_iif: ifindex of device we arrived on
374 * @tc_index: Traffic control index 374 * @tc_index: Traffic control index
375 * @tc_verd: traffic control verdict 375 * @tc_verd: traffic control verdict
376 * @rxhash: the packet hash computed on receive 376 * @rxhash: the packet hash computed on receive
377 * @queue_mapping: Queue mapping for multiqueue devices 377 * @queue_mapping: Queue mapping for multiqueue devices
378 * @ndisc_nodetype: router type (from link layer) 378 * @ndisc_nodetype: router type (from link layer)
379 * @ooo_okay: allow the mapping of a socket to a queue to be changed 379 * @ooo_okay: allow the mapping of a socket to a queue to be changed
380 * @l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport 380 * @l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport
381 * ports. 381 * ports.
382 * @wifi_acked_valid: wifi_acked was set 382 * @wifi_acked_valid: wifi_acked was set
383 * @wifi_acked: whether frame was acked on wifi or not 383 * @wifi_acked: whether frame was acked on wifi or not
384 * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS 384 * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
385 * @dma_cookie: a cookie to one of several possible DMA operations 385 * @dma_cookie: a cookie to one of several possible DMA operations
386 * done by skb DMA functions 386 * done by skb DMA functions
387 * @napi_id: id of the NAPI struct this skb came from 387 * @napi_id: id of the NAPI struct this skb came from
388 * @secmark: security marking 388 * @secmark: security marking
389 * @mark: Generic packet mark 389 * @mark: Generic packet mark
390 * @dropcount: total number of sk_receive_queue overflows 390 * @dropcount: total number of sk_receive_queue overflows
391 * @vlan_proto: vlan encapsulation protocol 391 * @vlan_proto: vlan encapsulation protocol
392 * @vlan_tci: vlan tag control information 392 * @vlan_tci: vlan tag control information
393 * @inner_protocol: Protocol (encapsulation) 393 * @inner_protocol: Protocol (encapsulation)
394 * @inner_transport_header: Inner transport layer header (encapsulation) 394 * @inner_transport_header: Inner transport layer header (encapsulation)
395 * @inner_network_header: Network layer header (encapsulation) 395 * @inner_network_header: Network layer header (encapsulation)
396 * @inner_mac_header: Link layer header (encapsulation) 396 * @inner_mac_header: Link layer header (encapsulation)
397 * @transport_header: Transport layer header 397 * @transport_header: Transport layer header
398 * @network_header: Network layer header 398 * @network_header: Network layer header
399 * @mac_header: Link layer header 399 * @mac_header: Link layer header
400 * @tail: Tail pointer 400 * @tail: Tail pointer
401 * @end: End pointer 401 * @end: End pointer
402 * @head: Head of buffer 402 * @head: Head of buffer
403 * @data: Data head pointer 403 * @data: Data head pointer
404 * @truesize: Buffer size 404 * @truesize: Buffer size
405 * @users: User count - see {datagram,tcp}.c 405 * @users: User count - see {datagram,tcp}.c
406 */ 406 */
407 407
408 struct sk_buff { 408 struct sk_buff {
409 /* These two members must be first. */ 409 /* These two members must be first. */
410 struct sk_buff *next; 410 struct sk_buff *next;
411 struct sk_buff *prev; 411 struct sk_buff *prev;
412 412
413 ktime_t tstamp; 413 ktime_t tstamp;
414 414
415 struct sock *sk; 415 struct sock *sk;
416 struct net_device *dev; 416 struct net_device *dev;
417 417
418 /* 418 /*
419 * This is the control buffer. It is free to use for every 419 * This is the control buffer. It is free to use for every
420 * layer. Please put your private variables there. If you 420 * layer. Please put your private variables there. If you
421 * want to keep them across layers you have to do a skb_clone() 421 * want to keep them across layers you have to do a skb_clone()
422 * first. This is owned by whoever has the skb queued ATM. 422 * first. This is owned by whoever has the skb queued ATM.
423 */ 423 */
424 char cb[48] __aligned(8); 424 char cb[48] __aligned(8);
425 425
426 unsigned long _skb_refdst; 426 unsigned long _skb_refdst;
427 #ifdef CONFIG_XFRM 427 #ifdef CONFIG_XFRM
428 struct sec_path *sp; 428 struct sec_path *sp;
429 #endif 429 #endif
430 unsigned int len, 430 unsigned int len,
431 data_len; 431 data_len;
432 __u16 mac_len, 432 __u16 mac_len,
433 hdr_len; 433 hdr_len;
434 union { 434 union {
435 __wsum csum; 435 __wsum csum;
436 struct { 436 struct {
437 __u16 csum_start; 437 __u16 csum_start;
438 __u16 csum_offset; 438 __u16 csum_offset;
439 }; 439 };
440 }; 440 };
441 __u32 priority; 441 __u32 priority;
442 kmemcheck_bitfield_begin(flags1); 442 kmemcheck_bitfield_begin(flags1);
443 __u8 local_df:1, 443 __u8 local_df:1,
444 cloned:1, 444 cloned:1,
445 ip_summed:2, 445 ip_summed:2,
446 nohdr:1, 446 nohdr:1,
447 nfctinfo:3; 447 nfctinfo:3;
448 __u8 pkt_type:3, 448 __u8 pkt_type:3,
449 fclone:2, 449 fclone:2,
450 ipvs_property:1, 450 ipvs_property:1,
451 peeked:1, 451 peeked:1,
452 nf_trace:1; 452 nf_trace:1;
453 kmemcheck_bitfield_end(flags1); 453 kmemcheck_bitfield_end(flags1);
454 __be16 protocol; 454 __be16 protocol;
455 455
456 void (*destructor)(struct sk_buff *skb); 456 void (*destructor)(struct sk_buff *skb);
457 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 457 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
458 struct nf_conntrack *nfct; 458 struct nf_conntrack *nfct;
459 #endif 459 #endif
460 #ifdef CONFIG_BRIDGE_NETFILTER 460 #ifdef CONFIG_BRIDGE_NETFILTER
461 struct nf_bridge_info *nf_bridge; 461 struct nf_bridge_info *nf_bridge;
462 #endif 462 #endif
463 463
464 int skb_iif; 464 int skb_iif;
465 465
466 __u32 rxhash; 466 __u32 rxhash;
467 467
468 __be16 vlan_proto; 468 __be16 vlan_proto;
469 __u16 vlan_tci; 469 __u16 vlan_tci;
470 470
471 #ifdef CONFIG_NET_SCHED 471 #ifdef CONFIG_NET_SCHED
472 __u16 tc_index; /* traffic control index */ 472 __u16 tc_index; /* traffic control index */
473 #ifdef CONFIG_NET_CLS_ACT 473 #ifdef CONFIG_NET_CLS_ACT
474 __u16 tc_verd; /* traffic control verdict */ 474 __u16 tc_verd; /* traffic control verdict */
475 #endif 475 #endif
476 #endif 476 #endif
477 477
478 __u16 queue_mapping; 478 __u16 queue_mapping;
479 kmemcheck_bitfield_begin(flags2); 479 kmemcheck_bitfield_begin(flags2);
480 #ifdef CONFIG_IPV6_NDISC_NODETYPE 480 #ifdef CONFIG_IPV6_NDISC_NODETYPE
481 __u8 ndisc_nodetype:2; 481 __u8 ndisc_nodetype:2;
482 #endif 482 #endif
483 __u8 pfmemalloc:1; 483 __u8 pfmemalloc:1;
484 __u8 ooo_okay:1; 484 __u8 ooo_okay:1;
485 __u8 l4_rxhash:1; 485 __u8 l4_rxhash:1;
486 __u8 wifi_acked_valid:1; 486 __u8 wifi_acked_valid:1;
487 __u8 wifi_acked:1; 487 __u8 wifi_acked:1;
488 __u8 no_fcs:1; 488 __u8 no_fcs:1;
489 __u8 head_frag:1; 489 __u8 head_frag:1;
490 /* Encapsulation protocol and NIC drivers should use 490 /* Encapsulation protocol and NIC drivers should use
491 * this flag to indicate to each other if the skb contains 491 * this flag to indicate to each other if the skb contains
492 * encapsulated packet or not and maybe use the inner packet 492 * encapsulated packet or not and maybe use the inner packet
493 * headers if needed 493 * headers if needed
494 */ 494 */
495 __u8 encapsulation:1; 495 __u8 encapsulation:1;
496 /* 6/8 bit hole (depending on ndisc_nodetype presence) */ 496 /* 6/8 bit hole (depending on ndisc_nodetype presence) */
497 kmemcheck_bitfield_end(flags2); 497 kmemcheck_bitfield_end(flags2);
498 498
499 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL 499 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
500 union { 500 union {
501 unsigned int napi_id; 501 unsigned int napi_id;
502 dma_cookie_t dma_cookie; 502 dma_cookie_t dma_cookie;
503 }; 503 };
504 #endif 504 #endif
505 #ifdef CONFIG_NETWORK_SECMARK 505 #ifdef CONFIG_NETWORK_SECMARK
506 __u32 secmark; 506 __u32 secmark;
507 #endif 507 #endif
508 union { 508 union {
509 __u32 mark; 509 __u32 mark;
510 __u32 dropcount; 510 __u32 dropcount;
511 __u32 reserved_tailroom; 511 __u32 reserved_tailroom;
512 }; 512 };
513 513
514 __be16 inner_protocol; 514 __be16 inner_protocol;
515 __u16 inner_transport_header; 515 __u16 inner_transport_header;
516 __u16 inner_network_header; 516 __u16 inner_network_header;
517 __u16 inner_mac_header; 517 __u16 inner_mac_header;
518 __u16 transport_header; 518 __u16 transport_header;
519 __u16 network_header; 519 __u16 network_header;
520 __u16 mac_header; 520 __u16 mac_header;
521 /* These elements must be at the end, see alloc_skb() for details. */ 521 /* These elements must be at the end, see alloc_skb() for details. */
522 sk_buff_data_t tail; 522 sk_buff_data_t tail;
523 sk_buff_data_t end; 523 sk_buff_data_t end;
524 unsigned char *head, 524 unsigned char *head,
525 *data; 525 *data;
526 unsigned int truesize; 526 unsigned int truesize;
527 atomic_t users; 527 atomic_t users;
528 }; 528 };
529 529
530 #ifdef __KERNEL__ 530 #ifdef __KERNEL__
531 /* 531 /*
532 * Handling routines are only of interest to the kernel 532 * Handling routines are only of interest to the kernel
533 */ 533 */
534 #include <linux/slab.h> 534 #include <linux/slab.h>
535 535
536 536
537 #define SKB_ALLOC_FCLONE 0x01 537 #define SKB_ALLOC_FCLONE 0x01
538 #define SKB_ALLOC_RX 0x02 538 #define SKB_ALLOC_RX 0x02
539 539
540 /* Returns true if the skb was allocated from PFMEMALLOC reserves */ 540 /* Returns true if the skb was allocated from PFMEMALLOC reserves */
541 static inline bool skb_pfmemalloc(const struct sk_buff *skb) 541 static inline bool skb_pfmemalloc(const struct sk_buff *skb)
542 { 542 {
543 return unlikely(skb->pfmemalloc); 543 return unlikely(skb->pfmemalloc);
544 } 544 }
545 545
546 /* 546 /*
547 * skb might have a dst pointer attached, refcounted or not. 547 * skb might have a dst pointer attached, refcounted or not.
548 * _skb_refdst low order bit is set if refcount was _not_ taken 548 * _skb_refdst low order bit is set if refcount was _not_ taken
549 */ 549 */
550 #define SKB_DST_NOREF 1UL 550 #define SKB_DST_NOREF 1UL
551 #define SKB_DST_PTRMASK ~(SKB_DST_NOREF) 551 #define SKB_DST_PTRMASK ~(SKB_DST_NOREF)
552 552
553 /** 553 /**
554 * skb_dst - returns skb dst_entry 554 * skb_dst - returns skb dst_entry
555 * @skb: buffer 555 * @skb: buffer
556 * 556 *
557 * Returns skb dst_entry, regardless of reference taken or not. 557 * Returns skb dst_entry, regardless of reference taken or not.
558 */ 558 */
559 static inline struct dst_entry *skb_dst(const struct sk_buff *skb) 559 static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
560 { 560 {
561 /* If refdst was not refcounted, check we still are in a 561 /* If refdst was not refcounted, check we still are in a
562 * rcu_read_lock section 562 * rcu_read_lock section
563 */ 563 */
564 WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) && 564 WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) &&
565 !rcu_read_lock_held() && 565 !rcu_read_lock_held() &&
566 !rcu_read_lock_bh_held()); 566 !rcu_read_lock_bh_held());
567 return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK); 567 return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK);
568 } 568 }
569 569
570 /** 570 /**
571 * skb_dst_set - sets skb dst 571 * skb_dst_set - sets skb dst
572 * @skb: buffer 572 * @skb: buffer
573 * @dst: dst entry 573 * @dst: dst entry
574 * 574 *
575 * Sets skb dst, assuming a reference was taken on dst and should 575 * Sets skb dst, assuming a reference was taken on dst and should
576 * be released by skb_dst_drop() 576 * be released by skb_dst_drop()
577 */ 577 */
578 static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) 578 static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
579 { 579 {
580 skb->_skb_refdst = (unsigned long)dst; 580 skb->_skb_refdst = (unsigned long)dst;
581 } 581 }
582 582
583 void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, 583 void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
584 bool force); 584 bool force);
585 585
586 /** 586 /**
587 * skb_dst_set_noref - sets skb dst, hopefully, without taking reference 587 * skb_dst_set_noref - sets skb dst, hopefully, without taking reference
588 * @skb: buffer 588 * @skb: buffer
589 * @dst: dst entry 589 * @dst: dst entry
590 * 590 *
591 * Sets skb dst, assuming a reference was not taken on dst. 591 * Sets skb dst, assuming a reference was not taken on dst.
592 * If dst entry is cached, we do not take reference and dst_release 592 * If dst entry is cached, we do not take reference and dst_release
593 * will be avoided by refdst_drop. If dst entry is not cached, we take 593 * will be avoided by refdst_drop. If dst entry is not cached, we take
594 * reference, so that last dst_release can destroy the dst immediately. 594 * reference, so that last dst_release can destroy the dst immediately.
595 */ 595 */
596 static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) 596 static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
597 { 597 {
598 __skb_dst_set_noref(skb, dst, false); 598 __skb_dst_set_noref(skb, dst, false);
599 } 599 }
600 600
601 /** 601 /**
602 * skb_dst_set_noref_force - sets skb dst, without taking reference 602 * skb_dst_set_noref_force - sets skb dst, without taking reference
603 * @skb: buffer 603 * @skb: buffer
604 * @dst: dst entry 604 * @dst: dst entry
605 * 605 *
606 * Sets skb dst, assuming a reference was not taken on dst. 606 * Sets skb dst, assuming a reference was not taken on dst.
607 * No reference is taken and no dst_release will be called. While for 607 * No reference is taken and no dst_release will be called. While for
608 * cached dsts deferred reclaim is a basic feature, for entries that are 608 * cached dsts deferred reclaim is a basic feature, for entries that are
609 * not cached it is caller's job to guarantee that last dst_release for 609 * not cached it is caller's job to guarantee that last dst_release for
610 * provided dst happens when nobody uses it, eg. after a RCU grace period. 610 * provided dst happens when nobody uses it, eg. after a RCU grace period.
611 */ 611 */
612 static inline void skb_dst_set_noref_force(struct sk_buff *skb, 612 static inline void skb_dst_set_noref_force(struct sk_buff *skb,
613 struct dst_entry *dst) 613 struct dst_entry *dst)
614 { 614 {
615 __skb_dst_set_noref(skb, dst, true); 615 __skb_dst_set_noref(skb, dst, true);
616 } 616 }
617 617
618 /** 618 /**
619 * skb_dst_is_noref - Test if skb dst isn't refcounted 619 * skb_dst_is_noref - Test if skb dst isn't refcounted
620 * @skb: buffer 620 * @skb: buffer
621 */ 621 */
622 static inline bool skb_dst_is_noref(const struct sk_buff *skb) 622 static inline bool skb_dst_is_noref(const struct sk_buff *skb)
623 { 623 {
624 return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb); 624 return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb);
625 } 625 }
626 626
627 static inline struct rtable *skb_rtable(const struct sk_buff *skb) 627 static inline struct rtable *skb_rtable(const struct sk_buff *skb)
628 { 628 {
629 return (struct rtable *)skb_dst(skb); 629 return (struct rtable *)skb_dst(skb);
630 } 630 }
631 631
632 void kfree_skb(struct sk_buff *skb); 632 void kfree_skb(struct sk_buff *skb);
633 void kfree_skb_list(struct sk_buff *segs); 633 void kfree_skb_list(struct sk_buff *segs);
634 void skb_tx_error(struct sk_buff *skb); 634 void skb_tx_error(struct sk_buff *skb);
635 void consume_skb(struct sk_buff *skb); 635 void consume_skb(struct sk_buff *skb);
636 void __kfree_skb(struct sk_buff *skb); 636 void __kfree_skb(struct sk_buff *skb);
637 extern struct kmem_cache *skbuff_head_cache; 637 extern struct kmem_cache *skbuff_head_cache;
638 638
639 void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); 639 void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
640 bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, 640 bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
641 bool *fragstolen, int *delta_truesize); 641 bool *fragstolen, int *delta_truesize);
642 642
643 struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, 643 struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
644 int node); 644 int node);
645 struct sk_buff *build_skb(void *data, unsigned int frag_size); 645 struct sk_buff *build_skb(void *data, unsigned int frag_size);
646 static inline struct sk_buff *alloc_skb(unsigned int size, 646 static inline struct sk_buff *alloc_skb(unsigned int size,
647 gfp_t priority) 647 gfp_t priority)
648 { 648 {
649 return __alloc_skb(size, priority, 0, NUMA_NO_NODE); 649 return __alloc_skb(size, priority, 0, NUMA_NO_NODE);
650 } 650 }
651 651
652 static inline struct sk_buff *alloc_skb_fclone(unsigned int size, 652 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
653 gfp_t priority) 653 gfp_t priority)
654 { 654 {
655 return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); 655 return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);
656 } 656 }
657 657
658 struct sk_buff *__alloc_skb_head(gfp_t priority, int node); 658 struct sk_buff *__alloc_skb_head(gfp_t priority, int node);
659 static inline struct sk_buff *alloc_skb_head(gfp_t priority) 659 static inline struct sk_buff *alloc_skb_head(gfp_t priority)
660 { 660 {
661 return __alloc_skb_head(priority, -1); 661 return __alloc_skb_head(priority, -1);
662 } 662 }
663 663
664 struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); 664 struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
665 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); 665 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
666 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); 666 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
667 struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority); 667 struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);
668 struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask); 668 struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask);
669 669
670 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask); 670 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
671 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, 671 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
672 unsigned int headroom); 672 unsigned int headroom);
673 struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, 673 struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
674 int newtailroom, gfp_t priority); 674 int newtailroom, gfp_t priority);
675 int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, 675 int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset,
676 int len); 676 int len);
677 int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); 677 int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
678 int skb_pad(struct sk_buff *skb, int pad); 678 int skb_pad(struct sk_buff *skb, int pad);
679 #define dev_kfree_skb(a) consume_skb(a) 679 #define dev_kfree_skb(a) consume_skb(a)
680 680
681 int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, 681 int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
682 int getfrag(void *from, char *to, int offset, 682 int getfrag(void *from, char *to, int offset,
683 int len, int odd, struct sk_buff *skb), 683 int len, int odd, struct sk_buff *skb),
684 void *from, int length); 684 void *from, int length);
685 685
686 struct skb_seq_state { 686 struct skb_seq_state {
687 __u32 lower_offset; 687 __u32 lower_offset;
688 __u32 upper_offset; 688 __u32 upper_offset;
689 __u32 frag_idx; 689 __u32 frag_idx;
690 __u32 stepped_offset; 690 __u32 stepped_offset;
691 struct sk_buff *root_skb; 691 struct sk_buff *root_skb;
692 struct sk_buff *cur_skb; 692 struct sk_buff *cur_skb;
693 __u8 *frag_data; 693 __u8 *frag_data;
694 }; 694 };
695 695
696 void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, 696 void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
697 unsigned int to, struct skb_seq_state *st); 697 unsigned int to, struct skb_seq_state *st);
698 unsigned int skb_seq_read(unsigned int consumed, const u8 **data, 698 unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
699 struct skb_seq_state *st); 699 struct skb_seq_state *st);
700 void skb_abort_seq_read(struct skb_seq_state *st); 700 void skb_abort_seq_read(struct skb_seq_state *st);
701 701
702 unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, 702 unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
703 unsigned int to, struct ts_config *config, 703 unsigned int to, struct ts_config *config,
704 struct ts_state *state); 704 struct ts_state *state);
705 705
706 void __skb_get_rxhash(struct sk_buff *skb); 706 void __skb_get_rxhash(struct sk_buff *skb);
707 static inline __u32 skb_get_rxhash(struct sk_buff *skb) 707 static inline __u32 skb_get_rxhash(struct sk_buff *skb)
708 { 708 {
709 if (!skb->l4_rxhash) 709 if (!skb->l4_rxhash)
710 __skb_get_rxhash(skb); 710 __skb_get_rxhash(skb);
711 711
712 return skb->rxhash; 712 return skb->rxhash;
713 } 713 }
714 714
715 #ifdef NET_SKBUFF_DATA_USES_OFFSET 715 #ifdef NET_SKBUFF_DATA_USES_OFFSET
716 static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) 716 static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
717 { 717 {
718 return skb->head + skb->end; 718 return skb->head + skb->end;
719 } 719 }
720 720
721 static inline unsigned int skb_end_offset(const struct sk_buff *skb) 721 static inline unsigned int skb_end_offset(const struct sk_buff *skb)
722 { 722 {
723 return skb->end; 723 return skb->end;
724 } 724 }
725 #else 725 #else
726 static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) 726 static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
727 { 727 {
728 return skb->end; 728 return skb->end;
729 } 729 }
730 730
731 static inline unsigned int skb_end_offset(const struct sk_buff *skb) 731 static inline unsigned int skb_end_offset(const struct sk_buff *skb)
732 { 732 {
733 return skb->end - skb->head; 733 return skb->end - skb->head;
734 } 734 }
735 #endif 735 #endif
736 736
737 /* Internal */ 737 /* Internal */
738 #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) 738 #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB)))
739 739
740 static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) 740 static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb)
741 { 741 {
742 return &skb_shinfo(skb)->hwtstamps; 742 return &skb_shinfo(skb)->hwtstamps;
743 } 743 }
744 744
745 /** 745 /**
746 * skb_queue_empty - check if a queue is empty 746 * skb_queue_empty - check if a queue is empty
747 * @list: queue head 747 * @list: queue head
748 * 748 *
749 * Returns true if the queue is empty, false otherwise. 749 * Returns true if the queue is empty, false otherwise.
750 */ 750 */
751 static inline int skb_queue_empty(const struct sk_buff_head *list) 751 static inline int skb_queue_empty(const struct sk_buff_head *list)
752 { 752 {
753 return list->next == (struct sk_buff *)list; 753 return list->next == (struct sk_buff *)list;
754 } 754 }
755 755
756 /** 756 /**
757 * skb_queue_is_last - check if skb is the last entry in the queue 757 * skb_queue_is_last - check if skb is the last entry in the queue
758 * @list: queue head 758 * @list: queue head
759 * @skb: buffer 759 * @skb: buffer
760 * 760 *
761 * Returns true if @skb is the last buffer on the list. 761 * Returns true if @skb is the last buffer on the list.
762 */ 762 */
763 static inline bool skb_queue_is_last(const struct sk_buff_head *list, 763 static inline bool skb_queue_is_last(const struct sk_buff_head *list,
764 const struct sk_buff *skb) 764 const struct sk_buff *skb)
765 { 765 {
766 return skb->next == (struct sk_buff *)list; 766 return skb->next == (struct sk_buff *)list;
767 } 767 }
768 768
769 /** 769 /**
770 * skb_queue_is_first - check if skb is the first entry in the queue 770 * skb_queue_is_first - check if skb is the first entry in the queue
771 * @list: queue head 771 * @list: queue head
772 * @skb: buffer 772 * @skb: buffer
773 * 773 *
774 * Returns true if @skb is the first buffer on the list. 774 * Returns true if @skb is the first buffer on the list.
775 */ 775 */
776 static inline bool skb_queue_is_first(const struct sk_buff_head *list, 776 static inline bool skb_queue_is_first(const struct sk_buff_head *list,
777 const struct sk_buff *skb) 777 const struct sk_buff *skb)
778 { 778 {
779 return skb->prev == (struct sk_buff *)list; 779 return skb->prev == (struct sk_buff *)list;
780 } 780 }
781 781
782 /** 782 /**
783 * skb_queue_next - return the next packet in the queue 783 * skb_queue_next - return the next packet in the queue
784 * @list: queue head 784 * @list: queue head
785 * @skb: current buffer 785 * @skb: current buffer
786 * 786 *
787 * Return the next packet in @list after @skb. It is only valid to 787 * Return the next packet in @list after @skb. It is only valid to
788 * call this if skb_queue_is_last() evaluates to false. 788 * call this if skb_queue_is_last() evaluates to false.
789 */ 789 */
790 static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list, 790 static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list,
791 const struct sk_buff *skb) 791 const struct sk_buff *skb)
792 { 792 {
793 /* This BUG_ON may seem severe, but if we just return then we 793 /* This BUG_ON may seem severe, but if we just return then we
794 * are going to dereference garbage. 794 * are going to dereference garbage.
795 */ 795 */
796 BUG_ON(skb_queue_is_last(list, skb)); 796 BUG_ON(skb_queue_is_last(list, skb));
797 return skb->next; 797 return skb->next;
798 } 798 }
799 799
800 /** 800 /**
801 * skb_queue_prev - return the prev packet in the queue 801 * skb_queue_prev - return the prev packet in the queue
802 * @list: queue head 802 * @list: queue head
803 * @skb: current buffer 803 * @skb: current buffer
804 * 804 *
805 * Return the prev packet in @list before @skb. It is only valid to 805 * Return the prev packet in @list before @skb. It is only valid to
806 * call this if skb_queue_is_first() evaluates to false. 806 * call this if skb_queue_is_first() evaluates to false.
807 */ 807 */
808 static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list, 808 static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list,
809 const struct sk_buff *skb) 809 const struct sk_buff *skb)
810 { 810 {
811 /* This BUG_ON may seem severe, but if we just return then we 811 /* This BUG_ON may seem severe, but if we just return then we
812 * are going to dereference garbage. 812 * are going to dereference garbage.
813 */ 813 */
814 BUG_ON(skb_queue_is_first(list, skb)); 814 BUG_ON(skb_queue_is_first(list, skb));
815 return skb->prev; 815 return skb->prev;
816 } 816 }
817 817
818 /** 818 /**
819 * skb_get - reference buffer 819 * skb_get - reference buffer
820 * @skb: buffer to reference 820 * @skb: buffer to reference
821 * 821 *
822 * Makes another reference to a socket buffer and returns a pointer 822 * Makes another reference to a socket buffer and returns a pointer
823 * to the buffer. 823 * to the buffer.
824 */ 824 */
825 static inline struct sk_buff *skb_get(struct sk_buff *skb) 825 static inline struct sk_buff *skb_get(struct sk_buff *skb)
826 { 826 {
827 atomic_inc(&skb->users); 827 atomic_inc(&skb->users);
828 return skb; 828 return skb;
829 } 829 }
830 830
831 /* 831 /*
832 * If users == 1, we are the only owner and are can avoid redundant 832 * If users == 1, we are the only owner and are can avoid redundant
833 * atomic change. 833 * atomic change.
834 */ 834 */
835 835
836 /** 836 /**
837 * skb_cloned - is the buffer a clone 837 * skb_cloned - is the buffer a clone
838 * @skb: buffer to check 838 * @skb: buffer to check
839 * 839 *
840 * Returns true if the buffer was generated with skb_clone() and is 840 * Returns true if the buffer was generated with skb_clone() and is
841 * one of multiple shared copies of the buffer. Cloned buffers are 841 * one of multiple shared copies of the buffer. Cloned buffers are
842 * shared data so must not be written to under normal circumstances. 842 * shared data so must not be written to under normal circumstances.
843 */ 843 */
844 static inline int skb_cloned(const struct sk_buff *skb) 844 static inline int skb_cloned(const struct sk_buff *skb)
845 { 845 {
846 return skb->cloned && 846 return skb->cloned &&
847 (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1; 847 (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1;
848 } 848 }
849 849
850 static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) 850 static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
851 { 851 {
852 might_sleep_if(pri & __GFP_WAIT); 852 might_sleep_if(pri & __GFP_WAIT);
853 853
854 if (skb_cloned(skb)) 854 if (skb_cloned(skb))
855 return pskb_expand_head(skb, 0, 0, pri); 855 return pskb_expand_head(skb, 0, 0, pri);
856 856
857 return 0; 857 return 0;
858 } 858 }
859 859
860 /** 860 /**
861 * skb_header_cloned - is the header a clone 861 * skb_header_cloned - is the header a clone
862 * @skb: buffer to check 862 * @skb: buffer to check
863 * 863 *
864 * Returns true if modifying the header part of the buffer requires 864 * Returns true if modifying the header part of the buffer requires
865 * the data to be copied. 865 * the data to be copied.
866 */ 866 */
867 static inline int skb_header_cloned(const struct sk_buff *skb) 867 static inline int skb_header_cloned(const struct sk_buff *skb)
868 { 868 {
869 int dataref; 869 int dataref;
870 870
871 if (!skb->cloned) 871 if (!skb->cloned)
872 return 0; 872 return 0;
873 873
874 dataref = atomic_read(&skb_shinfo(skb)->dataref); 874 dataref = atomic_read(&skb_shinfo(skb)->dataref);
875 dataref = (dataref & SKB_DATAREF_MASK) - (dataref >> SKB_DATAREF_SHIFT); 875 dataref = (dataref & SKB_DATAREF_MASK) - (dataref >> SKB_DATAREF_SHIFT);
876 return dataref != 1; 876 return dataref != 1;
877 } 877 }
878 878
879 /** 879 /**
880 * skb_header_release - release reference to header 880 * skb_header_release - release reference to header
881 * @skb: buffer to operate on 881 * @skb: buffer to operate on
882 * 882 *
883 * Drop a reference to the header part of the buffer. This is done 883 * Drop a reference to the header part of the buffer. This is done
884 * by acquiring a payload reference. You must not read from the header 884 * by acquiring a payload reference. You must not read from the header
885 * part of skb->data after this. 885 * part of skb->data after this.
886 */ 886 */
887 static inline void skb_header_release(struct sk_buff *skb) 887 static inline void skb_header_release(struct sk_buff *skb)
888 { 888 {
889 BUG_ON(skb->nohdr); 889 BUG_ON(skb->nohdr);
890 skb->nohdr = 1; 890 skb->nohdr = 1;
891 atomic_add(1 << SKB_DATAREF_SHIFT, &skb_shinfo(skb)->dataref); 891 atomic_add(1 << SKB_DATAREF_SHIFT, &skb_shinfo(skb)->dataref);
892 } 892 }
893 893
894 /** 894 /**
895 * skb_shared - is the buffer shared 895 * skb_shared - is the buffer shared
896 * @skb: buffer to check 896 * @skb: buffer to check
897 * 897 *
898 * Returns true if more than one person has a reference to this 898 * Returns true if more than one person has a reference to this
899 * buffer. 899 * buffer.
900 */ 900 */
901 static inline int skb_shared(const struct sk_buff *skb) 901 static inline int skb_shared(const struct sk_buff *skb)
902 { 902 {
903 return atomic_read(&skb->users) != 1; 903 return atomic_read(&skb->users) != 1;
904 } 904 }
905 905
906 /** 906 /**
907 * skb_share_check - check if buffer is shared and if so clone it 907 * skb_share_check - check if buffer is shared and if so clone it
908 * @skb: buffer to check 908 * @skb: buffer to check
909 * @pri: priority for memory allocation 909 * @pri: priority for memory allocation
910 * 910 *
911 * If the buffer is shared the buffer is cloned and the old copy 911 * If the buffer is shared the buffer is cloned and the old copy
912 * drops a reference. A new clone with a single reference is returned. 912 * drops a reference. A new clone with a single reference is returned.
913 * If the buffer is not shared the original buffer is returned. When 913 * If the buffer is not shared the original buffer is returned. When
914 * being called from interrupt status or with spinlocks held pri must 914 * being called from interrupt status or with spinlocks held pri must
915 * be GFP_ATOMIC. 915 * be GFP_ATOMIC.
916 * 916 *
917 * NULL is returned on a memory allocation failure. 917 * NULL is returned on a memory allocation failure.
918 */ 918 */
919 static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) 919 static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
920 { 920 {
921 might_sleep_if(pri & __GFP_WAIT); 921 might_sleep_if(pri & __GFP_WAIT);
922 if (skb_shared(skb)) { 922 if (skb_shared(skb)) {
923 struct sk_buff *nskb = skb_clone(skb, pri); 923 struct sk_buff *nskb = skb_clone(skb, pri);
924 924
925 if (likely(nskb)) 925 if (likely(nskb))
926 consume_skb(skb); 926 consume_skb(skb);
927 else 927 else
928 kfree_skb(skb); 928 kfree_skb(skb);
929 skb = nskb; 929 skb = nskb;
930 } 930 }
931 return skb; 931 return skb;
932 } 932 }
933 933
934 /* 934 /*
935 * Copy shared buffers into a new sk_buff. We effectively do COW on 935 * Copy shared buffers into a new sk_buff. We effectively do COW on
936 * packets to handle cases where we have a local reader and forward 936 * packets to handle cases where we have a local reader and forward
937 * and a couple of other messy ones. The normal one is tcpdumping 937 * and a couple of other messy ones. The normal one is tcpdumping
938 * a packet thats being forwarded. 938 * a packet thats being forwarded.
939 */ 939 */
940 940
941 /** 941 /**
942 * skb_unshare - make a copy of a shared buffer 942 * skb_unshare - make a copy of a shared buffer
943 * @skb: buffer to check 943 * @skb: buffer to check
944 * @pri: priority for memory allocation 944 * @pri: priority for memory allocation
945 * 945 *
946 * If the socket buffer is a clone then this function creates a new 946 * If the socket buffer is a clone then this function creates a new
947 * copy of the data, drops a reference count on the old copy and returns 947 * copy of the data, drops a reference count on the old copy and returns
948 * the new copy with the reference count at 1. If the buffer is not a clone 948 * the new copy with the reference count at 1. If the buffer is not a clone
949 * the original buffer is returned. When called with a spinlock held or 949 * the original buffer is returned. When called with a spinlock held or
950 * from interrupt state @pri must be %GFP_ATOMIC 950 * from interrupt state @pri must be %GFP_ATOMIC
951 * 951 *
952 * %NULL is returned on a memory allocation failure. 952 * %NULL is returned on a memory allocation failure.
953 */ 953 */
954 static inline struct sk_buff *skb_unshare(struct sk_buff *skb, 954 static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
955 gfp_t pri) 955 gfp_t pri)
956 { 956 {
957 might_sleep_if(pri & __GFP_WAIT); 957 might_sleep_if(pri & __GFP_WAIT);
958 if (skb_cloned(skb)) { 958 if (skb_cloned(skb)) {
959 struct sk_buff *nskb = skb_copy(skb, pri); 959 struct sk_buff *nskb = skb_copy(skb, pri);
960 kfree_skb(skb); /* Free our shared copy */ 960 kfree_skb(skb); /* Free our shared copy */
961 skb = nskb; 961 skb = nskb;
962 } 962 }
963 return skb; 963 return skb;
964 } 964 }
965 965
966 /** 966 /**
967 * skb_peek - peek at the head of an &sk_buff_head 967 * skb_peek - peek at the head of an &sk_buff_head
968 * @list_: list to peek at 968 * @list_: list to peek at
969 * 969 *
970 * Peek an &sk_buff. Unlike most other operations you _MUST_ 970 * Peek an &sk_buff. Unlike most other operations you _MUST_
971 * be careful with this one. A peek leaves the buffer on the 971 * be careful with this one. A peek leaves the buffer on the
972 * list and someone else may run off with it. You must hold 972 * list and someone else may run off with it. You must hold
973 * the appropriate locks or have a private queue to do this. 973 * the appropriate locks or have a private queue to do this.
974 * 974 *
975 * Returns %NULL for an empty list or a pointer to the head element. 975 * Returns %NULL for an empty list or a pointer to the head element.
976 * The reference count is not incremented and the reference is therefore 976 * The reference count is not incremented and the reference is therefore
977 * volatile. Use with caution. 977 * volatile. Use with caution.
978 */ 978 */
979 static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_) 979 static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_)
980 { 980 {
981 struct sk_buff *skb = list_->next; 981 struct sk_buff *skb = list_->next;
982 982
983 if (skb == (struct sk_buff *)list_) 983 if (skb == (struct sk_buff *)list_)
984 skb = NULL; 984 skb = NULL;
985 return skb; 985 return skb;
986 } 986 }
987 987
988 /** 988 /**
989 * skb_peek_next - peek skb following the given one from a queue 989 * skb_peek_next - peek skb following the given one from a queue
990 * @skb: skb to start from 990 * @skb: skb to start from
991 * @list_: list to peek at 991 * @list_: list to peek at
992 * 992 *
993 * Returns %NULL when the end of the list is met or a pointer to the 993 * Returns %NULL when the end of the list is met or a pointer to the
994 * next element. The reference count is not incremented and the 994 * next element. The reference count is not incremented and the
995 * reference is therefore volatile. Use with caution. 995 * reference is therefore volatile. Use with caution.
996 */ 996 */
997 static inline struct sk_buff *skb_peek_next(struct sk_buff *skb, 997 static inline struct sk_buff *skb_peek_next(struct sk_buff *skb,
998 const struct sk_buff_head *list_) 998 const struct sk_buff_head *list_)
999 { 999 {
1000 struct sk_buff *next = skb->next; 1000 struct sk_buff *next = skb->next;
1001 1001
1002 if (next == (struct sk_buff *)list_) 1002 if (next == (struct sk_buff *)list_)
1003 next = NULL; 1003 next = NULL;
1004 return next; 1004 return next;
1005 } 1005 }
1006 1006
1007 /** 1007 /**
1008 * skb_peek_tail - peek at the tail of an &sk_buff_head 1008 * skb_peek_tail - peek at the tail of an &sk_buff_head
1009 * @list_: list to peek at 1009 * @list_: list to peek at
1010 * 1010 *
1011 * Peek an &sk_buff. Unlike most other operations you _MUST_ 1011 * Peek an &sk_buff. Unlike most other operations you _MUST_
1012 * be careful with this one. A peek leaves the buffer on the 1012 * be careful with this one. A peek leaves the buffer on the
1013 * list and someone else may run off with it. You must hold 1013 * list and someone else may run off with it. You must hold
1014 * the appropriate locks or have a private queue to do this. 1014 * the appropriate locks or have a private queue to do this.
1015 * 1015 *
1016 * Returns %NULL for an empty list or a pointer to the tail element. 1016 * Returns %NULL for an empty list or a pointer to the tail element.
1017 * The reference count is not incremented and the reference is therefore 1017 * The reference count is not incremented and the reference is therefore
1018 * volatile. Use with caution. 1018 * volatile. Use with caution.
1019 */ 1019 */
1020 static inline struct sk_buff *skb_peek_tail(const struct sk_buff_head *list_) 1020 static inline struct sk_buff *skb_peek_tail(const struct sk_buff_head *list_)
1021 { 1021 {
1022 struct sk_buff *skb = list_->prev; 1022 struct sk_buff *skb = list_->prev;
1023 1023
1024 if (skb == (struct sk_buff *)list_) 1024 if (skb == (struct sk_buff *)list_)
1025 skb = NULL; 1025 skb = NULL;
1026 return skb; 1026 return skb;
1027 1027
1028 } 1028 }
1029 1029
1030 /** 1030 /**
1031 * skb_queue_len - get queue length 1031 * skb_queue_len - get queue length
1032 * @list_: list to measure 1032 * @list_: list to measure
1033 * 1033 *
1034 * Return the length of an &sk_buff queue. 1034 * Return the length of an &sk_buff queue.
1035 */ 1035 */
1036 static inline __u32 skb_queue_len(const struct sk_buff_head *list_) 1036 static inline __u32 skb_queue_len(const struct sk_buff_head *list_)
1037 { 1037 {
1038 return list_->qlen; 1038 return list_->qlen;
1039 } 1039 }
1040 1040
1041 /** 1041 /**
1042 * __skb_queue_head_init - initialize non-spinlock portions of sk_buff_head 1042 * __skb_queue_head_init - initialize non-spinlock portions of sk_buff_head
1043 * @list: queue to initialize 1043 * @list: queue to initialize
1044 * 1044 *
1045 * This initializes only the list and queue length aspects of 1045 * This initializes only the list and queue length aspects of
1046 * an sk_buff_head object. This allows to initialize the list 1046 * an sk_buff_head object. This allows to initialize the list
1047 * aspects of an sk_buff_head without reinitializing things like 1047 * aspects of an sk_buff_head without reinitializing things like
1048 * the spinlock. It can also be used for on-stack sk_buff_head 1048 * the spinlock. It can also be used for on-stack sk_buff_head
1049 * objects where the spinlock is known to not be used. 1049 * objects where the spinlock is known to not be used.
1050 */ 1050 */
1051 static inline void __skb_queue_head_init(struct sk_buff_head *list) 1051 static inline void __skb_queue_head_init(struct sk_buff_head *list)
1052 { 1052 {
1053 list->prev = list->next = (struct sk_buff *)list; 1053 list->prev = list->next = (struct sk_buff *)list;
1054 list->qlen = 0; 1054 list->qlen = 0;
1055 } 1055 }
1056 1056
1057 /* 1057 /*
1058 * This function creates a split out lock class for each invocation; 1058 * This function creates a split out lock class for each invocation;
1059 * this is needed for now since a whole lot of users of the skb-queue 1059 * this is needed for now since a whole lot of users of the skb-queue
1060 * infrastructure in drivers have different locking usage (in hardirq) 1060 * infrastructure in drivers have different locking usage (in hardirq)
1061 * than the networking core (in softirq only). In the long run either the 1061 * than the networking core (in softirq only). In the long run either the
1062 * network layer or drivers should need annotation to consolidate the 1062 * network layer or drivers should need annotation to consolidate the
1063 * main types of usage into 3 classes. 1063 * main types of usage into 3 classes.
1064 */ 1064 */
1065 static inline void skb_queue_head_init(struct sk_buff_head *list) 1065 static inline void skb_queue_head_init(struct sk_buff_head *list)
1066 { 1066 {
1067 spin_lock_init(&list->lock); 1067 spin_lock_init(&list->lock);
1068 __skb_queue_head_init(list); 1068 __skb_queue_head_init(list);
1069 } 1069 }
1070 1070
1071 static inline void skb_queue_head_init_class(struct sk_buff_head *list, 1071 static inline void skb_queue_head_init_class(struct sk_buff_head *list,
1072 struct lock_class_key *class) 1072 struct lock_class_key *class)
1073 { 1073 {
1074 skb_queue_head_init(list); 1074 skb_queue_head_init(list);
1075 lockdep_set_class(&list->lock, class); 1075 lockdep_set_class(&list->lock, class);
1076 } 1076 }
1077 1077
1078 /* 1078 /*
1079 * Insert an sk_buff on a list. 1079 * Insert an sk_buff on a list.
1080 * 1080 *
1081 * The "__skb_xxxx()" functions are the non-atomic ones that 1081 * The "__skb_xxxx()" functions are the non-atomic ones that
1082 * can only be called with interrupts disabled. 1082 * can only be called with interrupts disabled.
1083 */ 1083 */
1084 void skb_insert(struct sk_buff *old, struct sk_buff *newsk, 1084 void skb_insert(struct sk_buff *old, struct sk_buff *newsk,
1085 struct sk_buff_head *list); 1085 struct sk_buff_head *list);
1086 static inline void __skb_insert(struct sk_buff *newsk, 1086 static inline void __skb_insert(struct sk_buff *newsk,
1087 struct sk_buff *prev, struct sk_buff *next, 1087 struct sk_buff *prev, struct sk_buff *next,
1088 struct sk_buff_head *list) 1088 struct sk_buff_head *list)
1089 { 1089 {
1090 newsk->next = next; 1090 newsk->next = next;
1091 newsk->prev = prev; 1091 newsk->prev = prev;
1092 next->prev = prev->next = newsk; 1092 next->prev = prev->next = newsk;
1093 list->qlen++; 1093 list->qlen++;
1094 } 1094 }
1095 1095
1096 static inline void __skb_queue_splice(const struct sk_buff_head *list, 1096 static inline void __skb_queue_splice(const struct sk_buff_head *list,
1097 struct sk_buff *prev, 1097 struct sk_buff *prev,
1098 struct sk_buff *next) 1098 struct sk_buff *next)
1099 { 1099 {
1100 struct sk_buff *first = list->next; 1100 struct sk_buff *first = list->next;
1101 struct sk_buff *last = list->prev; 1101 struct sk_buff *last = list->prev;
1102 1102
1103 first->prev = prev; 1103 first->prev = prev;
1104 prev->next = first; 1104 prev->next = first;
1105 1105
1106 last->next = next; 1106 last->next = next;
1107 next->prev = last; 1107 next->prev = last;
1108 } 1108 }
1109 1109
1110 /** 1110 /**
1111 * skb_queue_splice - join two skb lists, this is designed for stacks 1111 * skb_queue_splice - join two skb lists, this is designed for stacks
1112 * @list: the new list to add 1112 * @list: the new list to add
1113 * @head: the place to add it in the first list 1113 * @head: the place to add it in the first list
1114 */ 1114 */
1115 static inline void skb_queue_splice(const struct sk_buff_head *list, 1115 static inline void skb_queue_splice(const struct sk_buff_head *list,
1116 struct sk_buff_head *head) 1116 struct sk_buff_head *head)
1117 { 1117 {
1118 if (!skb_queue_empty(list)) { 1118 if (!skb_queue_empty(list)) {
1119 __skb_queue_splice(list, (struct sk_buff *) head, head->next); 1119 __skb_queue_splice(list, (struct sk_buff *) head, head->next);
1120 head->qlen += list->qlen; 1120 head->qlen += list->qlen;
1121 } 1121 }
1122 } 1122 }
1123 1123
1124 /** 1124 /**
1125 * skb_queue_splice_init - join two skb lists and reinitialise the emptied list 1125 * skb_queue_splice_init - join two skb lists and reinitialise the emptied list
1126 * @list: the new list to add 1126 * @list: the new list to add
1127 * @head: the place to add it in the first list 1127 * @head: the place to add it in the first list
1128 * 1128 *
1129 * The list at @list is reinitialised 1129 * The list at @list is reinitialised
1130 */ 1130 */
1131 static inline void skb_queue_splice_init(struct sk_buff_head *list, 1131 static inline void skb_queue_splice_init(struct sk_buff_head *list,
1132 struct sk_buff_head *head) 1132 struct sk_buff_head *head)
1133 { 1133 {
1134 if (!skb_queue_empty(list)) { 1134 if (!skb_queue_empty(list)) {
1135 __skb_queue_splice(list, (struct sk_buff *) head, head->next); 1135 __skb_queue_splice(list, (struct sk_buff *) head, head->next);
1136 head->qlen += list->qlen; 1136 head->qlen += list->qlen;
1137 __skb_queue_head_init(list); 1137 __skb_queue_head_init(list);
1138 } 1138 }
1139 } 1139 }
1140 1140
1141 /** 1141 /**
1142 * skb_queue_splice_tail - join two skb lists, each list being a queue 1142 * skb_queue_splice_tail - join two skb lists, each list being a queue
1143 * @list: the new list to add 1143 * @list: the new list to add
1144 * @head: the place to add it in the first list 1144 * @head: the place to add it in the first list
1145 */ 1145 */
1146 static inline void skb_queue_splice_tail(const struct sk_buff_head *list, 1146 static inline void skb_queue_splice_tail(const struct sk_buff_head *list,
1147 struct sk_buff_head *head) 1147 struct sk_buff_head *head)
1148 { 1148 {
1149 if (!skb_queue_empty(list)) { 1149 if (!skb_queue_empty(list)) {
1150 __skb_queue_splice(list, head->prev, (struct sk_buff *) head); 1150 __skb_queue_splice(list, head->prev, (struct sk_buff *) head);
1151 head->qlen += list->qlen; 1151 head->qlen += list->qlen;
1152 } 1152 }
1153 } 1153 }
1154 1154
1155 /** 1155 /**
1156 * skb_queue_splice_tail_init - join two skb lists and reinitialise the emptied list 1156 * skb_queue_splice_tail_init - join two skb lists and reinitialise the emptied list
1157 * @list: the new list to add 1157 * @list: the new list to add
1158 * @head: the place to add it in the first list 1158 * @head: the place to add it in the first list
1159 * 1159 *
1160 * Each of the lists is a queue. 1160 * Each of the lists is a queue.
1161 * The list at @list is reinitialised 1161 * The list at @list is reinitialised
1162 */ 1162 */
1163 static inline void skb_queue_splice_tail_init(struct sk_buff_head *list, 1163 static inline void skb_queue_splice_tail_init(struct sk_buff_head *list,
1164 struct sk_buff_head *head) 1164 struct sk_buff_head *head)
1165 { 1165 {
1166 if (!skb_queue_empty(list)) { 1166 if (!skb_queue_empty(list)) {
1167 __skb_queue_splice(list, head->prev, (struct sk_buff *) head); 1167 __skb_queue_splice(list, head->prev, (struct sk_buff *) head);
1168 head->qlen += list->qlen; 1168 head->qlen += list->qlen;
1169 __skb_queue_head_init(list); 1169 __skb_queue_head_init(list);
1170 } 1170 }
1171 } 1171 }
1172 1172
1173 /** 1173 /**
1174 * __skb_queue_after - queue a buffer at the list head 1174 * __skb_queue_after - queue a buffer at the list head
1175 * @list: list to use 1175 * @list: list to use
1176 * @prev: place after this buffer 1176 * @prev: place after this buffer
1177 * @newsk: buffer to queue 1177 * @newsk: buffer to queue
1178 * 1178 *
1179 * Queue a buffer int the middle of a list. This function takes no locks 1179 * Queue a buffer int the middle of a list. This function takes no locks
1180 * and you must therefore hold required locks before calling it. 1180 * and you must therefore hold required locks before calling it.
1181 * 1181 *
1182 * A buffer cannot be placed on two lists at the same time. 1182 * A buffer cannot be placed on two lists at the same time.
1183 */ 1183 */
1184 static inline void __skb_queue_after(struct sk_buff_head *list, 1184 static inline void __skb_queue_after(struct sk_buff_head *list,
1185 struct sk_buff *prev, 1185 struct sk_buff *prev,
1186 struct sk_buff *newsk) 1186 struct sk_buff *newsk)
1187 { 1187 {
1188 __skb_insert(newsk, prev, prev->next, list); 1188 __skb_insert(newsk, prev, prev->next, list);
1189 } 1189 }
1190 1190
1191 void skb_append(struct sk_buff *old, struct sk_buff *newsk, 1191 void skb_append(struct sk_buff *old, struct sk_buff *newsk,
1192 struct sk_buff_head *list); 1192 struct sk_buff_head *list);
1193 1193
1194 static inline void __skb_queue_before(struct sk_buff_head *list, 1194 static inline void __skb_queue_before(struct sk_buff_head *list,
1195 struct sk_buff *next, 1195 struct sk_buff *next,
1196 struct sk_buff *newsk) 1196 struct sk_buff *newsk)
1197 { 1197 {
1198 __skb_insert(newsk, next->prev, next, list); 1198 __skb_insert(newsk, next->prev, next, list);
1199 } 1199 }
1200 1200
1201 /** 1201 /**
1202 * __skb_queue_head - queue a buffer at the list head 1202 * __skb_queue_head - queue a buffer at the list head
1203 * @list: list to use 1203 * @list: list to use
1204 * @newsk: buffer to queue 1204 * @newsk: buffer to queue
1205 * 1205 *
1206 * Queue a buffer at the start of a list. This function takes no locks 1206 * Queue a buffer at the start of a list. This function takes no locks
1207 * and you must therefore hold required locks before calling it. 1207 * and you must therefore hold required locks before calling it.
1208 * 1208 *
1209 * A buffer cannot be placed on two lists at the same time. 1209 * A buffer cannot be placed on two lists at the same time.
1210 */ 1210 */
1211 void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); 1211 void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
1212 static inline void __skb_queue_head(struct sk_buff_head *list, 1212 static inline void __skb_queue_head(struct sk_buff_head *list,
1213 struct sk_buff *newsk) 1213 struct sk_buff *newsk)
1214 { 1214 {
1215 __skb_queue_after(list, (struct sk_buff *)list, newsk); 1215 __skb_queue_after(list, (struct sk_buff *)list, newsk);
1216 } 1216 }
1217 1217
1218 /** 1218 /**
1219 * __skb_queue_tail - queue a buffer at the list tail 1219 * __skb_queue_tail - queue a buffer at the list tail
1220 * @list: list to use 1220 * @list: list to use
1221 * @newsk: buffer to queue 1221 * @newsk: buffer to queue
1222 * 1222 *
1223 * Queue a buffer at the end of a list. This function takes no locks 1223 * Queue a buffer at the end of a list. This function takes no locks
1224 * and you must therefore hold required locks before calling it. 1224 * and you must therefore hold required locks before calling it.
1225 * 1225 *
1226 * A buffer cannot be placed on two lists at the same time. 1226 * A buffer cannot be placed on two lists at the same time.
1227 */ 1227 */
1228 void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); 1228 void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
1229 static inline void __skb_queue_tail(struct sk_buff_head *list, 1229 static inline void __skb_queue_tail(struct sk_buff_head *list,
1230 struct sk_buff *newsk) 1230 struct sk_buff *newsk)
1231 { 1231 {
1232 __skb_queue_before(list, (struct sk_buff *)list, newsk); 1232 __skb_queue_before(list, (struct sk_buff *)list, newsk);
1233 } 1233 }
1234 1234
1235 /* 1235 /*
1236 * remove sk_buff from list. _Must_ be called atomically, and with 1236 * remove sk_buff from list. _Must_ be called atomically, and with
1237 * the list known.. 1237 * the list known..
1238 */ 1238 */
1239 void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list); 1239 void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list);
1240 static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) 1240 static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
1241 { 1241 {
1242 struct sk_buff *next, *prev; 1242 struct sk_buff *next, *prev;
1243 1243
1244 list->qlen--; 1244 list->qlen--;
1245 next = skb->next; 1245 next = skb->next;
1246 prev = skb->prev; 1246 prev = skb->prev;
1247 skb->next = skb->prev = NULL; 1247 skb->next = skb->prev = NULL;
1248 next->prev = prev; 1248 next->prev = prev;
1249 prev->next = next; 1249 prev->next = next;
1250 } 1250 }
1251 1251
1252 /** 1252 /**
1253 * __skb_dequeue - remove from the head of the queue 1253 * __skb_dequeue - remove from the head of the queue
1254 * @list: list to dequeue from 1254 * @list: list to dequeue from
1255 * 1255 *
1256 * Remove the head of the list. This function does not take any locks 1256 * Remove the head of the list. This function does not take any locks
1257 * so must be used with appropriate locks held only. The head item is 1257 * so must be used with appropriate locks held only. The head item is
1258 * returned or %NULL if the list is empty. 1258 * returned or %NULL if the list is empty.
1259 */ 1259 */
1260 struct sk_buff *skb_dequeue(struct sk_buff_head *list); 1260 struct sk_buff *skb_dequeue(struct sk_buff_head *list);
1261 static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) 1261 static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
1262 { 1262 {
1263 struct sk_buff *skb = skb_peek(list); 1263 struct sk_buff *skb = skb_peek(list);
1264 if (skb) 1264 if (skb)
1265 __skb_unlink(skb, list); 1265 __skb_unlink(skb, list);
1266 return skb; 1266 return skb;
1267 } 1267 }
1268 1268
1269 /** 1269 /**
1270 * __skb_dequeue_tail - remove from the tail of the queue 1270 * __skb_dequeue_tail - remove from the tail of the queue
1271 * @list: list to dequeue from 1271 * @list: list to dequeue from
1272 * 1272 *
1273 * Remove the tail of the list. This function does not take any locks 1273 * Remove the tail of the list. This function does not take any locks
1274 * so must be used with appropriate locks held only. The tail item is 1274 * so must be used with appropriate locks held only. The tail item is
1275 * returned or %NULL if the list is empty. 1275 * returned or %NULL if the list is empty.
1276 */ 1276 */
1277 struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); 1277 struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
1278 static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) 1278 static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
1279 { 1279 {
1280 struct sk_buff *skb = skb_peek_tail(list); 1280 struct sk_buff *skb = skb_peek_tail(list);
1281 if (skb) 1281 if (skb)
1282 __skb_unlink(skb, list); 1282 __skb_unlink(skb, list);
1283 return skb; 1283 return skb;
1284 } 1284 }
1285 1285
1286 1286
1287 static inline bool skb_is_nonlinear(const struct sk_buff *skb) 1287 static inline bool skb_is_nonlinear(const struct sk_buff *skb)
1288 { 1288 {
1289 return skb->data_len; 1289 return skb->data_len;
1290 } 1290 }
1291 1291
1292 static inline unsigned int skb_headlen(const struct sk_buff *skb) 1292 static inline unsigned int skb_headlen(const struct sk_buff *skb)
1293 { 1293 {
1294 return skb->len - skb->data_len; 1294 return skb->len - skb->data_len;
1295 } 1295 }
1296 1296
1297 static inline int skb_pagelen(const struct sk_buff *skb) 1297 static inline int skb_pagelen(const struct sk_buff *skb)
1298 { 1298 {
1299 int i, len = 0; 1299 int i, len = 0;
1300 1300
1301 for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) 1301 for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--)
1302 len += skb_frag_size(&skb_shinfo(skb)->frags[i]); 1302 len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
1303 return len + skb_headlen(skb); 1303 return len + skb_headlen(skb);
1304 } 1304 }
1305 1305
1306 /** 1306 /**
1307 * __skb_fill_page_desc - initialise a paged fragment in an skb 1307 * __skb_fill_page_desc - initialise a paged fragment in an skb
1308 * @skb: buffer containing fragment to be initialised 1308 * @skb: buffer containing fragment to be initialised
1309 * @i: paged fragment index to initialise 1309 * @i: paged fragment index to initialise
1310 * @page: the page to use for this fragment 1310 * @page: the page to use for this fragment
1311 * @off: the offset to the data with @page 1311 * @off: the offset to the data with @page
1312 * @size: the length of the data 1312 * @size: the length of the data
1313 * 1313 *
1314 * Initialises the @i'th fragment of @skb to point to &size bytes at 1314 * Initialises the @i'th fragment of @skb to point to &size bytes at
1315 * offset @off within @page. 1315 * offset @off within @page.
1316 * 1316 *
1317 * Does not take any additional reference on the fragment. 1317 * Does not take any additional reference on the fragment.
1318 */ 1318 */
1319 static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, 1319 static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
1320 struct page *page, int off, int size) 1320 struct page *page, int off, int size)
1321 { 1321 {
1322 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1322 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1323 1323
1324 /* 1324 /*
1325 * Propagate page->pfmemalloc to the skb if we can. The problem is 1325 * Propagate page->pfmemalloc to the skb if we can. The problem is
1326 * that not all callers have unique ownership of the page. If 1326 * that not all callers have unique ownership of the page. If
1327 * pfmemalloc is set, we check the mapping as a mapping implies 1327 * pfmemalloc is set, we check the mapping as a mapping implies
1328 * page->index is set (index and pfmemalloc share space). 1328 * page->index is set (index and pfmemalloc share space).
1329 * If it's a valid mapping, we cannot use page->pfmemalloc but we 1329 * If it's a valid mapping, we cannot use page->pfmemalloc but we
1330 * do not lose pfmemalloc information as the pages would not be 1330 * do not lose pfmemalloc information as the pages would not be
1331 * allocated using __GFP_MEMALLOC. 1331 * allocated using __GFP_MEMALLOC.
1332 */ 1332 */
1333 frag->page.p = page; 1333 frag->page.p = page;
1334 frag->page_offset = off; 1334 frag->page_offset = off;
1335 skb_frag_size_set(frag, size); 1335 skb_frag_size_set(frag, size);
1336 1336
1337 page = compound_head(page); 1337 page = compound_head(page);
1338 if (page->pfmemalloc && !page->mapping) 1338 if (page->pfmemalloc && !page->mapping)
1339 skb->pfmemalloc = true; 1339 skb->pfmemalloc = true;
1340 } 1340 }
1341 1341
1342 /** 1342 /**
1343 * skb_fill_page_desc - initialise a paged fragment in an skb 1343 * skb_fill_page_desc - initialise a paged fragment in an skb
1344 * @skb: buffer containing fragment to be initialised 1344 * @skb: buffer containing fragment to be initialised
1345 * @i: paged fragment index to initialise 1345 * @i: paged fragment index to initialise
1346 * @page: the page to use for this fragment 1346 * @page: the page to use for this fragment
1347 * @off: the offset to the data with @page 1347 * @off: the offset to the data with @page
1348 * @size: the length of the data 1348 * @size: the length of the data
1349 * 1349 *
1350 * As per __skb_fill_page_desc() -- initialises the @i'th fragment of 1350 * As per __skb_fill_page_desc() -- initialises the @i'th fragment of
1351 * @skb to point to @size bytes at offset @off within @page. In 1351 * @skb to point to @size bytes at offset @off within @page. In
1352 * addition updates @skb such that @i is the last fragment. 1352 * addition updates @skb such that @i is the last fragment.
1353 * 1353 *
1354 * Does not take any additional reference on the fragment. 1354 * Does not take any additional reference on the fragment.
1355 */ 1355 */
1356 static inline void skb_fill_page_desc(struct sk_buff *skb, int i, 1356 static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
1357 struct page *page, int off, int size) 1357 struct page *page, int off, int size)
1358 { 1358 {
1359 __skb_fill_page_desc(skb, i, page, off, size); 1359 __skb_fill_page_desc(skb, i, page, off, size);
1360 skb_shinfo(skb)->nr_frags = i + 1; 1360 skb_shinfo(skb)->nr_frags = i + 1;
1361 } 1361 }
1362 1362
1363 void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, 1363 void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
1364 int size, unsigned int truesize); 1364 int size, unsigned int truesize);
1365 1365
1366 void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, 1366 void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
1367 unsigned int truesize); 1367 unsigned int truesize);
1368 1368
1369 #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) 1369 #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags)
1370 #define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frag_list(skb)) 1370 #define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frag_list(skb))
1371 #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) 1371 #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb))
1372 1372
1373 #ifdef NET_SKBUFF_DATA_USES_OFFSET 1373 #ifdef NET_SKBUFF_DATA_USES_OFFSET
1374 static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) 1374 static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb)
1375 { 1375 {
1376 return skb->head + skb->tail; 1376 return skb->head + skb->tail;
1377 } 1377 }
1378 1378
1379 static inline void skb_reset_tail_pointer(struct sk_buff *skb) 1379 static inline void skb_reset_tail_pointer(struct sk_buff *skb)
1380 { 1380 {
1381 skb->tail = skb->data - skb->head; 1381 skb->tail = skb->data - skb->head;
1382 } 1382 }
1383 1383
1384 static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) 1384 static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
1385 { 1385 {
1386 skb_reset_tail_pointer(skb); 1386 skb_reset_tail_pointer(skb);
1387 skb->tail += offset; 1387 skb->tail += offset;
1388 } 1388 }
1389 1389
1390 #else /* NET_SKBUFF_DATA_USES_OFFSET */ 1390 #else /* NET_SKBUFF_DATA_USES_OFFSET */
1391 static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) 1391 static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb)
1392 { 1392 {
1393 return skb->tail; 1393 return skb->tail;
1394 } 1394 }
1395 1395
1396 static inline void skb_reset_tail_pointer(struct sk_buff *skb) 1396 static inline void skb_reset_tail_pointer(struct sk_buff *skb)
1397 { 1397 {
1398 skb->tail = skb->data; 1398 skb->tail = skb->data;
1399 } 1399 }
1400 1400
1401 static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) 1401 static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
1402 { 1402 {
1403 skb->tail = skb->data + offset; 1403 skb->tail = skb->data + offset;
1404 } 1404 }
1405 1405
1406 #endif /* NET_SKBUFF_DATA_USES_OFFSET */ 1406 #endif /* NET_SKBUFF_DATA_USES_OFFSET */
1407 1407
1408 /* 1408 /*
1409 * Add data to an sk_buff 1409 * Add data to an sk_buff
1410 */ 1410 */
1411 unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len); 1411 unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
1412 unsigned char *skb_put(struct sk_buff *skb, unsigned int len); 1412 unsigned char *skb_put(struct sk_buff *skb, unsigned int len);
1413 static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) 1413 static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
1414 { 1414 {
1415 unsigned char *tmp = skb_tail_pointer(skb); 1415 unsigned char *tmp = skb_tail_pointer(skb);
1416 SKB_LINEAR_ASSERT(skb); 1416 SKB_LINEAR_ASSERT(skb);
1417 skb->tail += len; 1417 skb->tail += len;
1418 skb->len += len; 1418 skb->len += len;
1419 return tmp; 1419 return tmp;
1420 } 1420 }
1421 1421
1422 unsigned char *skb_push(struct sk_buff *skb, unsigned int len); 1422 unsigned char *skb_push(struct sk_buff *skb, unsigned int len);
1423 static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) 1423 static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
1424 { 1424 {
1425 skb->data -= len; 1425 skb->data -= len;
1426 skb->len += len; 1426 skb->len += len;
1427 return skb->data; 1427 return skb->data;
1428 } 1428 }
1429 1429
1430 unsigned char *skb_pull(struct sk_buff *skb, unsigned int len); 1430 unsigned char *skb_pull(struct sk_buff *skb, unsigned int len);
1431 static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len) 1431 static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len)
1432 { 1432 {
1433 skb->len -= len; 1433 skb->len -= len;
1434 BUG_ON(skb->len < skb->data_len); 1434 BUG_ON(skb->len < skb->data_len);
1435 return skb->data += len; 1435 return skb->data += len;
1436 } 1436 }
1437 1437
1438 static inline unsigned char *skb_pull_inline(struct sk_buff *skb, unsigned int len) 1438 static inline unsigned char *skb_pull_inline(struct sk_buff *skb, unsigned int len)
1439 { 1439 {
1440 return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); 1440 return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
1441 } 1441 }
1442 1442
1443 unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta); 1443 unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
1444 1444
1445 static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len) 1445 static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len)
1446 { 1446 {
1447 if (len > skb_headlen(skb) && 1447 if (len > skb_headlen(skb) &&
1448 !__pskb_pull_tail(skb, len - skb_headlen(skb))) 1448 !__pskb_pull_tail(skb, len - skb_headlen(skb)))
1449 return NULL; 1449 return NULL;
1450 skb->len -= len; 1450 skb->len -= len;
1451 return skb->data += len; 1451 return skb->data += len;
1452 } 1452 }
1453 1453
1454 static inline unsigned char *pskb_pull(struct sk_buff *skb, unsigned int len) 1454 static inline unsigned char *pskb_pull(struct sk_buff *skb, unsigned int len)
1455 { 1455 {
1456 return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len); 1456 return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len);
1457 } 1457 }
1458 1458
1459 static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) 1459 static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
1460 { 1460 {
1461 if (likely(len <= skb_headlen(skb))) 1461 if (likely(len <= skb_headlen(skb)))
1462 return 1; 1462 return 1;
1463 if (unlikely(len > skb->len)) 1463 if (unlikely(len > skb->len))
1464 return 0; 1464 return 0;
1465 return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL; 1465 return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL;
1466 } 1466 }
1467 1467
1468 /** 1468 /**
1469 * skb_headroom - bytes at buffer head 1469 * skb_headroom - bytes at buffer head
1470 * @skb: buffer to check 1470 * @skb: buffer to check
1471 * 1471 *
1472 * Return the number of bytes of free space at the head of an &sk_buff. 1472 * Return the number of bytes of free space at the head of an &sk_buff.
1473 */ 1473 */
1474 static inline unsigned int skb_headroom(const struct sk_buff *skb) 1474 static inline unsigned int skb_headroom(const struct sk_buff *skb)
1475 { 1475 {
1476 return skb->data - skb->head; 1476 return skb->data - skb->head;
1477 } 1477 }
1478 1478
1479 /** 1479 /**
1480 * skb_tailroom - bytes at buffer end 1480 * skb_tailroom - bytes at buffer end
1481 * @skb: buffer to check 1481 * @skb: buffer to check
1482 * 1482 *
1483 * Return the number of bytes of free space at the tail of an sk_buff 1483 * Return the number of bytes of free space at the tail of an sk_buff
1484 */ 1484 */
1485 static inline int skb_tailroom(const struct sk_buff *skb) 1485 static inline int skb_tailroom(const struct sk_buff *skb)
1486 { 1486 {
1487 return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail; 1487 return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail;
1488 } 1488 }
1489 1489
1490 /** 1490 /**
1491 * skb_availroom - bytes at buffer end 1491 * skb_availroom - bytes at buffer end
1492 * @skb: buffer to check 1492 * @skb: buffer to check
1493 * 1493 *
1494 * Return the number of bytes of free space at the tail of an sk_buff 1494 * Return the number of bytes of free space at the tail of an sk_buff
1495 * allocated by sk_stream_alloc() 1495 * allocated by sk_stream_alloc()
1496 */ 1496 */
1497 static inline int skb_availroom(const struct sk_buff *skb) 1497 static inline int skb_availroom(const struct sk_buff *skb)
1498 { 1498 {
1499 if (skb_is_nonlinear(skb)) 1499 if (skb_is_nonlinear(skb))
1500 return 0; 1500 return 0;
1501 1501
1502 return skb->end - skb->tail - skb->reserved_tailroom; 1502 return skb->end - skb->tail - skb->reserved_tailroom;
1503 } 1503 }
1504 1504
1505 /** 1505 /**
1506 * skb_reserve - adjust headroom 1506 * skb_reserve - adjust headroom
1507 * @skb: buffer to alter 1507 * @skb: buffer to alter
1508 * @len: bytes to move 1508 * @len: bytes to move
1509 * 1509 *
1510 * Increase the headroom of an empty &sk_buff by reducing the tail 1510 * Increase the headroom of an empty &sk_buff by reducing the tail
1511 * room. This is only allowed for an empty buffer. 1511 * room. This is only allowed for an empty buffer.
1512 */ 1512 */
1513 static inline void skb_reserve(struct sk_buff *skb, int len) 1513 static inline void skb_reserve(struct sk_buff *skb, int len)
1514 { 1514 {
1515 skb->data += len; 1515 skb->data += len;
1516 skb->tail += len; 1516 skb->tail += len;
1517 } 1517 }
1518 1518
1519 static inline void skb_reset_inner_headers(struct sk_buff *skb) 1519 static inline void skb_reset_inner_headers(struct sk_buff *skb)
1520 { 1520 {
1521 skb->inner_mac_header = skb->mac_header; 1521 skb->inner_mac_header = skb->mac_header;
1522 skb->inner_network_header = skb->network_header; 1522 skb->inner_network_header = skb->network_header;
1523 skb->inner_transport_header = skb->transport_header; 1523 skb->inner_transport_header = skb->transport_header;
1524 } 1524 }
1525 1525
1526 static inline void skb_reset_mac_len(struct sk_buff *skb) 1526 static inline void skb_reset_mac_len(struct sk_buff *skb)
1527 { 1527 {
1528 skb->mac_len = skb->network_header - skb->mac_header; 1528 skb->mac_len = skb->network_header - skb->mac_header;
1529 } 1529 }
1530 1530
1531 static inline unsigned char *skb_inner_transport_header(const struct sk_buff 1531 static inline unsigned char *skb_inner_transport_header(const struct sk_buff
1532 *skb) 1532 *skb)
1533 { 1533 {
1534 return skb->head + skb->inner_transport_header; 1534 return skb->head + skb->inner_transport_header;
1535 } 1535 }
1536 1536
1537 static inline void skb_reset_inner_transport_header(struct sk_buff *skb) 1537 static inline void skb_reset_inner_transport_header(struct sk_buff *skb)
1538 { 1538 {
1539 skb->inner_transport_header = skb->data - skb->head; 1539 skb->inner_transport_header = skb->data - skb->head;
1540 } 1540 }
1541 1541
1542 static inline void skb_set_inner_transport_header(struct sk_buff *skb, 1542 static inline void skb_set_inner_transport_header(struct sk_buff *skb,
1543 const int offset) 1543 const int offset)
1544 { 1544 {
1545 skb_reset_inner_transport_header(skb); 1545 skb_reset_inner_transport_header(skb);
1546 skb->inner_transport_header += offset; 1546 skb->inner_transport_header += offset;
1547 } 1547 }
1548 1548
1549 static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb) 1549 static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb)
1550 { 1550 {
1551 return skb->head + skb->inner_network_header; 1551 return skb->head + skb->inner_network_header;
1552 } 1552 }
1553 1553
1554 static inline void skb_reset_inner_network_header(struct sk_buff *skb) 1554 static inline void skb_reset_inner_network_header(struct sk_buff *skb)
1555 { 1555 {
1556 skb->inner_network_header = skb->data - skb->head; 1556 skb->inner_network_header = skb->data - skb->head;
1557 } 1557 }
1558 1558
1559 static inline void skb_set_inner_network_header(struct sk_buff *skb, 1559 static inline void skb_set_inner_network_header(struct sk_buff *skb,
1560 const int offset) 1560 const int offset)
1561 { 1561 {
1562 skb_reset_inner_network_header(skb); 1562 skb_reset_inner_network_header(skb);
1563 skb->inner_network_header += offset; 1563 skb->inner_network_header += offset;
1564 } 1564 }
1565 1565
1566 static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) 1566 static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb)
1567 { 1567 {
1568 return skb->head + skb->inner_mac_header; 1568 return skb->head + skb->inner_mac_header;
1569 } 1569 }
1570 1570
1571 static inline void skb_reset_inner_mac_header(struct sk_buff *skb) 1571 static inline void skb_reset_inner_mac_header(struct sk_buff *skb)
1572 { 1572 {
1573 skb->inner_mac_header = skb->data - skb->head; 1573 skb->inner_mac_header = skb->data - skb->head;
1574 } 1574 }
1575 1575
1576 static inline void skb_set_inner_mac_header(struct sk_buff *skb, 1576 static inline void skb_set_inner_mac_header(struct sk_buff *skb,
1577 const int offset) 1577 const int offset)
1578 { 1578 {
1579 skb_reset_inner_mac_header(skb); 1579 skb_reset_inner_mac_header(skb);
1580 skb->inner_mac_header += offset; 1580 skb->inner_mac_header += offset;
1581 } 1581 }
1582 static inline bool skb_transport_header_was_set(const struct sk_buff *skb) 1582 static inline bool skb_transport_header_was_set(const struct sk_buff *skb)
1583 { 1583 {
1584 return skb->transport_header != (typeof(skb->transport_header))~0U; 1584 return skb->transport_header != (typeof(skb->transport_header))~0U;
1585 } 1585 }
1586 1586
1587 static inline unsigned char *skb_transport_header(const struct sk_buff *skb) 1587 static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
1588 { 1588 {
1589 return skb->head + skb->transport_header; 1589 return skb->head + skb->transport_header;
1590 } 1590 }
1591 1591
1592 static inline void skb_reset_transport_header(struct sk_buff *skb) 1592 static inline void skb_reset_transport_header(struct sk_buff *skb)
1593 { 1593 {
1594 skb->transport_header = skb->data - skb->head; 1594 skb->transport_header = skb->data - skb->head;
1595 } 1595 }
1596 1596
1597 static inline void skb_set_transport_header(struct sk_buff *skb, 1597 static inline void skb_set_transport_header(struct sk_buff *skb,
1598 const int offset) 1598 const int offset)
1599 { 1599 {
1600 skb_reset_transport_header(skb); 1600 skb_reset_transport_header(skb);
1601 skb->transport_header += offset; 1601 skb->transport_header += offset;
1602 } 1602 }
1603 1603
1604 static inline unsigned char *skb_network_header(const struct sk_buff *skb) 1604 static inline unsigned char *skb_network_header(const struct sk_buff *skb)
1605 { 1605 {
1606 return skb->head + skb->network_header; 1606 return skb->head + skb->network_header;
1607 } 1607 }
1608 1608
1609 static inline void skb_reset_network_header(struct sk_buff *skb) 1609 static inline void skb_reset_network_header(struct sk_buff *skb)
1610 { 1610 {
1611 skb->network_header = skb->data - skb->head; 1611 skb->network_header = skb->data - skb->head;
1612 } 1612 }
1613 1613
1614 static inline void skb_set_network_header(struct sk_buff *skb, const int offset) 1614 static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
1615 { 1615 {
1616 skb_reset_network_header(skb); 1616 skb_reset_network_header(skb);
1617 skb->network_header += offset; 1617 skb->network_header += offset;
1618 } 1618 }
1619 1619
1620 static inline unsigned char *skb_mac_header(const struct sk_buff *skb) 1620 static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
1621 { 1621 {
1622 return skb->head + skb->mac_header; 1622 return skb->head + skb->mac_header;
1623 } 1623 }
1624 1624
1625 static inline int skb_mac_header_was_set(const struct sk_buff *skb) 1625 static inline int skb_mac_header_was_set(const struct sk_buff *skb)
1626 { 1626 {
1627 return skb->mac_header != (typeof(skb->mac_header))~0U; 1627 return skb->mac_header != (typeof(skb->mac_header))~0U;
1628 } 1628 }
1629 1629
1630 static inline void skb_reset_mac_header(struct sk_buff *skb) 1630 static inline void skb_reset_mac_header(struct sk_buff *skb)
1631 { 1631 {
1632 skb->mac_header = skb->data - skb->head; 1632 skb->mac_header = skb->data - skb->head;
1633 } 1633 }
1634 1634
1635 static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) 1635 static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
1636 { 1636 {
1637 skb_reset_mac_header(skb); 1637 skb_reset_mac_header(skb);
1638 skb->mac_header += offset; 1638 skb->mac_header += offset;
1639 } 1639 }
1640 1640
1641 static inline void skb_probe_transport_header(struct sk_buff *skb, 1641 static inline void skb_probe_transport_header(struct sk_buff *skb,
1642 const int offset_hint) 1642 const int offset_hint)
1643 { 1643 {
1644 struct flow_keys keys; 1644 struct flow_keys keys;
1645 1645
1646 if (skb_transport_header_was_set(skb)) 1646 if (skb_transport_header_was_set(skb))
1647 return; 1647 return;
1648 else if (skb_flow_dissect(skb, &keys)) 1648 else if (skb_flow_dissect(skb, &keys))
1649 skb_set_transport_header(skb, keys.thoff); 1649 skb_set_transport_header(skb, keys.thoff);
1650 else 1650 else
1651 skb_set_transport_header(skb, offset_hint); 1651 skb_set_transport_header(skb, offset_hint);
1652 } 1652 }
1653 1653
1654 static inline void skb_mac_header_rebuild(struct sk_buff *skb) 1654 static inline void skb_mac_header_rebuild(struct sk_buff *skb)
1655 { 1655 {
1656 if (skb_mac_header_was_set(skb)) { 1656 if (skb_mac_header_was_set(skb)) {
1657 const unsigned char *old_mac = skb_mac_header(skb); 1657 const unsigned char *old_mac = skb_mac_header(skb);
1658 1658
1659 skb_set_mac_header(skb, -skb->mac_len); 1659 skb_set_mac_header(skb, -skb->mac_len);
1660 memmove(skb_mac_header(skb), old_mac, skb->mac_len); 1660 memmove(skb_mac_header(skb), old_mac, skb->mac_len);
1661 } 1661 }
1662 } 1662 }
1663 1663
1664 static inline int skb_checksum_start_offset(const struct sk_buff *skb) 1664 static inline int skb_checksum_start_offset(const struct sk_buff *skb)
1665 { 1665 {
1666 return skb->csum_start - skb_headroom(skb); 1666 return skb->csum_start - skb_headroom(skb);
1667 } 1667 }
1668 1668
1669 static inline int skb_transport_offset(const struct sk_buff *skb) 1669 static inline int skb_transport_offset(const struct sk_buff *skb)
1670 { 1670 {
1671 return skb_transport_header(skb) - skb->data; 1671 return skb_transport_header(skb) - skb->data;
1672 } 1672 }
1673 1673
1674 static inline u32 skb_network_header_len(const struct sk_buff *skb) 1674 static inline u32 skb_network_header_len(const struct sk_buff *skb)
1675 { 1675 {
1676 return skb->transport_header - skb->network_header; 1676 return skb->transport_header - skb->network_header;
1677 } 1677 }
1678 1678
1679 static inline u32 skb_inner_network_header_len(const struct sk_buff *skb) 1679 static inline u32 skb_inner_network_header_len(const struct sk_buff *skb)
1680 { 1680 {
1681 return skb->inner_transport_header - skb->inner_network_header; 1681 return skb->inner_transport_header - skb->inner_network_header;
1682 } 1682 }
1683 1683
1684 static inline int skb_network_offset(const struct sk_buff *skb) 1684 static inline int skb_network_offset(const struct sk_buff *skb)
1685 { 1685 {
1686 return skb_network_header(skb) - skb->data; 1686 return skb_network_header(skb) - skb->data;
1687 } 1687 }
1688 1688
1689 static inline int skb_inner_network_offset(const struct sk_buff *skb) 1689 static inline int skb_inner_network_offset(const struct sk_buff *skb)
1690 { 1690 {
1691 return skb_inner_network_header(skb) - skb->data; 1691 return skb_inner_network_header(skb) - skb->data;
1692 } 1692 }
1693 1693
1694 static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) 1694 static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
1695 { 1695 {
1696 return pskb_may_pull(skb, skb_network_offset(skb) + len); 1696 return pskb_may_pull(skb, skb_network_offset(skb) + len);
1697 } 1697 }
1698 1698
1699 /* 1699 /*
1700 * CPUs often take a performance hit when accessing unaligned memory 1700 * CPUs often take a performance hit when accessing unaligned memory
1701 * locations. The actual performance hit varies, it can be small if the 1701 * locations. The actual performance hit varies, it can be small if the
1702 * hardware handles it or large if we have to take an exception and fix it 1702 * hardware handles it or large if we have to take an exception and fix it
1703 * in software. 1703 * in software.
1704 * 1704 *
1705 * Since an ethernet header is 14 bytes network drivers often end up with 1705 * Since an ethernet header is 14 bytes network drivers often end up with
1706 * the IP header at an unaligned offset. The IP header can be aligned by 1706 * the IP header at an unaligned offset. The IP header can be aligned by
1707 * shifting the start of the packet by 2 bytes. Drivers should do this 1707 * shifting the start of the packet by 2 bytes. Drivers should do this
1708 * with: 1708 * with:
1709 * 1709 *
1710 * skb_reserve(skb, NET_IP_ALIGN); 1710 * skb_reserve(skb, NET_IP_ALIGN);
1711 * 1711 *
1712 * The downside to this alignment of the IP header is that the DMA is now 1712 * The downside to this alignment of the IP header is that the DMA is now
1713 * unaligned. On some architectures the cost of an unaligned DMA is high 1713 * unaligned. On some architectures the cost of an unaligned DMA is high
1714 * and this cost outweighs the gains made by aligning the IP header. 1714 * and this cost outweighs the gains made by aligning the IP header.
1715 * 1715 *
1716 * Since this trade off varies between architectures, we allow NET_IP_ALIGN 1716 * Since this trade off varies between architectures, we allow NET_IP_ALIGN
1717 * to be overridden. 1717 * to be overridden.
1718 */ 1718 */
1719 #ifndef NET_IP_ALIGN 1719 #ifndef NET_IP_ALIGN
1720 #define NET_IP_ALIGN 2 1720 #define NET_IP_ALIGN 2
1721 #endif 1721 #endif
1722 1722
1723 /* 1723 /*
1724 * The networking layer reserves some headroom in skb data (via 1724 * The networking layer reserves some headroom in skb data (via
1725 * dev_alloc_skb). This is used to avoid having to reallocate skb data when 1725 * dev_alloc_skb). This is used to avoid having to reallocate skb data when
1726 * the header has to grow. In the default case, if the header has to grow 1726 * the header has to grow. In the default case, if the header has to grow
1727 * 32 bytes or less we avoid the reallocation. 1727 * 32 bytes or less we avoid the reallocation.
1728 * 1728 *
1729 * Unfortunately this headroom changes the DMA alignment of the resulting 1729 * Unfortunately this headroom changes the DMA alignment of the resulting
1730 * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive 1730 * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive
1731 * on some architectures. An architecture can override this value, 1731 * on some architectures. An architecture can override this value,
1732 * perhaps setting it to a cacheline in size (since that will maintain 1732 * perhaps setting it to a cacheline in size (since that will maintain
1733 * cacheline alignment of the DMA). It must be a power of 2. 1733 * cacheline alignment of the DMA). It must be a power of 2.
1734 * 1734 *
1735 * Various parts of the networking layer expect at least 32 bytes of 1735 * Various parts of the networking layer expect at least 32 bytes of
1736 * headroom, you should not reduce this. 1736 * headroom, you should not reduce this.
1737 * 1737 *
1738 * Using max(32, L1_CACHE_BYTES) makes sense (especially with RPS) 1738 * Using max(32, L1_CACHE_BYTES) makes sense (especially with RPS)
1739 * to reduce average number of cache lines per packet. 1739 * to reduce average number of cache lines per packet.
1740 * get_rps_cpus() for example only access one 64 bytes aligned block : 1740 * get_rps_cpus() for example only access one 64 bytes aligned block :
1741 * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8) 1741 * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8)
1742 */ 1742 */
1743 #ifndef NET_SKB_PAD 1743 #ifndef NET_SKB_PAD
1744 #define NET_SKB_PAD max(32, L1_CACHE_BYTES) 1744 #define NET_SKB_PAD max(32, L1_CACHE_BYTES)
1745 #endif 1745 #endif
1746 1746
1747 int ___pskb_trim(struct sk_buff *skb, unsigned int len); 1747 int ___pskb_trim(struct sk_buff *skb, unsigned int len);
1748 1748
1749 static inline void __skb_trim(struct sk_buff *skb, unsigned int len) 1749 static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
1750 { 1750 {
1751 if (unlikely(skb_is_nonlinear(skb))) { 1751 if (unlikely(skb_is_nonlinear(skb))) {
1752 WARN_ON(1); 1752 WARN_ON(1);
1753 return; 1753 return;
1754 } 1754 }
1755 skb->len = len; 1755 skb->len = len;
1756 skb_set_tail_pointer(skb, len); 1756 skb_set_tail_pointer(skb, len);
1757 } 1757 }
1758 1758
1759 void skb_trim(struct sk_buff *skb, unsigned int len); 1759 void skb_trim(struct sk_buff *skb, unsigned int len);
1760 1760
1761 static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) 1761 static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
1762 { 1762 {
1763 if (skb->data_len) 1763 if (skb->data_len)
1764 return ___pskb_trim(skb, len); 1764 return ___pskb_trim(skb, len);
1765 __skb_trim(skb, len); 1765 __skb_trim(skb, len);
1766 return 0; 1766 return 0;
1767 } 1767 }
1768 1768
1769 static inline int pskb_trim(struct sk_buff *skb, unsigned int len) 1769 static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
1770 { 1770 {
1771 return (len < skb->len) ? __pskb_trim(skb, len) : 0; 1771 return (len < skb->len) ? __pskb_trim(skb, len) : 0;
1772 } 1772 }
1773 1773
1774 /** 1774 /**
1775 * pskb_trim_unique - remove end from a paged unique (not cloned) buffer 1775 * pskb_trim_unique - remove end from a paged unique (not cloned) buffer
1776 * @skb: buffer to alter 1776 * @skb: buffer to alter
1777 * @len: new length 1777 * @len: new length
1778 * 1778 *
1779 * This is identical to pskb_trim except that the caller knows that 1779 * This is identical to pskb_trim except that the caller knows that
1780 * the skb is not cloned so we should never get an error due to out- 1780 * the skb is not cloned so we should never get an error due to out-
1781 * of-memory. 1781 * of-memory.
1782 */ 1782 */
1783 static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len) 1783 static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len)
1784 { 1784 {
1785 int err = pskb_trim(skb, len); 1785 int err = pskb_trim(skb, len);
1786 BUG_ON(err); 1786 BUG_ON(err);
1787 } 1787 }
1788 1788
1789 /** 1789 /**
1790 * skb_orphan - orphan a buffer 1790 * skb_orphan - orphan a buffer
1791 * @skb: buffer to orphan 1791 * @skb: buffer to orphan
1792 * 1792 *
1793 * If a buffer currently has an owner then we call the owner's 1793 * If a buffer currently has an owner then we call the owner's
1794 * destructor function and make the @skb unowned. The buffer continues 1794 * destructor function and make the @skb unowned. The buffer continues
1795 * to exist but is no longer charged to its former owner. 1795 * to exist but is no longer charged to its former owner.
1796 */ 1796 */
1797 static inline void skb_orphan(struct sk_buff *skb) 1797 static inline void skb_orphan(struct sk_buff *skb)
1798 { 1798 {
1799 if (skb->destructor) { 1799 if (skb->destructor) {
1800 skb->destructor(skb); 1800 skb->destructor(skb);
1801 skb->destructor = NULL; 1801 skb->destructor = NULL;
1802 skb->sk = NULL; 1802 skb->sk = NULL;
1803 } else { 1803 } else {
1804 BUG_ON(skb->sk); 1804 BUG_ON(skb->sk);
1805 } 1805 }
1806 } 1806 }
1807 1807
1808 /** 1808 /**
1809 * skb_orphan_frags - orphan the frags contained in a buffer 1809 * skb_orphan_frags - orphan the frags contained in a buffer
1810 * @skb: buffer to orphan frags from 1810 * @skb: buffer to orphan frags from
1811 * @gfp_mask: allocation mask for replacement pages 1811 * @gfp_mask: allocation mask for replacement pages
1812 * 1812 *
1813 * For each frag in the SKB which needs a destructor (i.e. has an 1813 * For each frag in the SKB which needs a destructor (i.e. has an
1814 * owner) create a copy of that frag and release the original 1814 * owner) create a copy of that frag and release the original
1815 * page by calling the destructor. 1815 * page by calling the destructor.
1816 */ 1816 */
1817 static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask) 1817 static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
1818 { 1818 {
1819 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY))) 1819 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY)))
1820 return 0; 1820 return 0;
1821 return skb_copy_ubufs(skb, gfp_mask); 1821 return skb_copy_ubufs(skb, gfp_mask);
1822 } 1822 }
1823 1823
1824 /** 1824 /**
1825 * __skb_queue_purge - empty a list 1825 * __skb_queue_purge - empty a list
1826 * @list: list to empty 1826 * @list: list to empty
1827 * 1827 *
1828 * Delete all buffers on an &sk_buff list. Each buffer is removed from 1828 * Delete all buffers on an &sk_buff list. Each buffer is removed from
1829 * the list and one reference dropped. This function does not take the 1829 * the list and one reference dropped. This function does not take the
1830 * list lock and the caller must hold the relevant locks to use it. 1830 * list lock and the caller must hold the relevant locks to use it.
1831 */ 1831 */
1832 void skb_queue_purge(struct sk_buff_head *list); 1832 void skb_queue_purge(struct sk_buff_head *list);
1833 static inline void __skb_queue_purge(struct sk_buff_head *list) 1833 static inline void __skb_queue_purge(struct sk_buff_head *list)
1834 { 1834 {
1835 struct sk_buff *skb; 1835 struct sk_buff *skb;
1836 while ((skb = __skb_dequeue(list)) != NULL) 1836 while ((skb = __skb_dequeue(list)) != NULL)
1837 kfree_skb(skb); 1837 kfree_skb(skb);
1838 } 1838 }
1839 1839
1840 #define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) 1840 #define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
1841 #define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) 1841 #define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
1842 #define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE 1842 #define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE
1843 1843
1844 void *netdev_alloc_frag(unsigned int fragsz); 1844 void *netdev_alloc_frag(unsigned int fragsz);
1845 1845
1846 struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, 1846 struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
1847 gfp_t gfp_mask); 1847 gfp_t gfp_mask);
1848 1848
1849 /** 1849 /**
1850 * netdev_alloc_skb - allocate an skbuff for rx on a specific device 1850 * netdev_alloc_skb - allocate an skbuff for rx on a specific device
1851 * @dev: network device to receive on 1851 * @dev: network device to receive on
1852 * @length: length to allocate 1852 * @length: length to allocate
1853 * 1853 *
1854 * Allocate a new &sk_buff and assign it a usage count of one. The 1854 * Allocate a new &sk_buff and assign it a usage count of one. The
1855 * buffer has unspecified headroom built in. Users should allocate 1855 * buffer has unspecified headroom built in. Users should allocate
1856 * the headroom they think they need without accounting for the 1856 * the headroom they think they need without accounting for the
1857 * built in space. The built in space is used for optimisations. 1857 * built in space. The built in space is used for optimisations.
1858 * 1858 *
1859 * %NULL is returned if there is no free memory. Although this function 1859 * %NULL is returned if there is no free memory. Although this function
1860 * allocates memory it can be called from an interrupt. 1860 * allocates memory it can be called from an interrupt.
1861 */ 1861 */
1862 static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, 1862 static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev,
1863 unsigned int length) 1863 unsigned int length)
1864 { 1864 {
1865 return __netdev_alloc_skb(dev, length, GFP_ATOMIC); 1865 return __netdev_alloc_skb(dev, length, GFP_ATOMIC);
1866 } 1866 }
1867 1867
1868 /* legacy helper around __netdev_alloc_skb() */ 1868 /* legacy helper around __netdev_alloc_skb() */
1869 static inline struct sk_buff *__dev_alloc_skb(unsigned int length, 1869 static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
1870 gfp_t gfp_mask) 1870 gfp_t gfp_mask)
1871 { 1871 {
1872 return __netdev_alloc_skb(NULL, length, gfp_mask); 1872 return __netdev_alloc_skb(NULL, length, gfp_mask);
1873 } 1873 }
1874 1874
1875 /* legacy helper around netdev_alloc_skb() */ 1875 /* legacy helper around netdev_alloc_skb() */
1876 static inline struct sk_buff *dev_alloc_skb(unsigned int length) 1876 static inline struct sk_buff *dev_alloc_skb(unsigned int length)
1877 { 1877 {
1878 return netdev_alloc_skb(NULL, length); 1878 return netdev_alloc_skb(NULL, length);
1879 } 1879 }
1880 1880
1881 1881
1882 static inline struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, 1882 static inline struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev,
1883 unsigned int length, gfp_t gfp) 1883 unsigned int length, gfp_t gfp)
1884 { 1884 {
1885 struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp); 1885 struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp);
1886 1886
1887 if (NET_IP_ALIGN && skb) 1887 if (NET_IP_ALIGN && skb)
1888 skb_reserve(skb, NET_IP_ALIGN); 1888 skb_reserve(skb, NET_IP_ALIGN);
1889 return skb; 1889 return skb;
1890 } 1890 }
1891 1891
1892 static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, 1892 static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
1893 unsigned int length) 1893 unsigned int length)
1894 { 1894 {
1895 return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); 1895 return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
1896 } 1896 }
1897 1897
1898 /** 1898 /**
1899 * __skb_alloc_pages - allocate pages for ps-rx on a skb and preserve pfmemalloc data 1899 * __skb_alloc_pages - allocate pages for ps-rx on a skb and preserve pfmemalloc data
1900 * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX 1900 * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
1901 * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used 1901 * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
1902 * @order: size of the allocation 1902 * @order: size of the allocation
1903 * 1903 *
1904 * Allocate a new page. 1904 * Allocate a new page.
1905 * 1905 *
1906 * %NULL is returned if there is no free memory. 1906 * %NULL is returned if there is no free memory.
1907 */ 1907 */
1908 static inline struct page *__skb_alloc_pages(gfp_t gfp_mask, 1908 static inline struct page *__skb_alloc_pages(gfp_t gfp_mask,
1909 struct sk_buff *skb, 1909 struct sk_buff *skb,
1910 unsigned int order) 1910 unsigned int order)
1911 { 1911 {
1912 struct page *page; 1912 struct page *page;
1913 1913
1914 gfp_mask |= __GFP_COLD; 1914 gfp_mask |= __GFP_COLD;
1915 1915
1916 if (!(gfp_mask & __GFP_NOMEMALLOC)) 1916 if (!(gfp_mask & __GFP_NOMEMALLOC))
1917 gfp_mask |= __GFP_MEMALLOC; 1917 gfp_mask |= __GFP_MEMALLOC;
1918 1918
1919 page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); 1919 page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
1920 if (skb && page && page->pfmemalloc) 1920 if (skb && page && page->pfmemalloc)
1921 skb->pfmemalloc = true; 1921 skb->pfmemalloc = true;
1922 1922
1923 return page; 1923 return page;
1924 } 1924 }
1925 1925
1926 /** 1926 /**
1927 * __skb_alloc_page - allocate a page for ps-rx for a given skb and preserve pfmemalloc data 1927 * __skb_alloc_page - allocate a page for ps-rx for a given skb and preserve pfmemalloc data
1928 * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX 1928 * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
1929 * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used 1929 * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
1930 * 1930 *
1931 * Allocate a new page. 1931 * Allocate a new page.
1932 * 1932 *
1933 * %NULL is returned if there is no free memory. 1933 * %NULL is returned if there is no free memory.
1934 */ 1934 */
1935 static inline struct page *__skb_alloc_page(gfp_t gfp_mask, 1935 static inline struct page *__skb_alloc_page(gfp_t gfp_mask,
1936 struct sk_buff *skb) 1936 struct sk_buff *skb)
1937 { 1937 {
1938 return __skb_alloc_pages(gfp_mask, skb, 0); 1938 return __skb_alloc_pages(gfp_mask, skb, 0);
1939 } 1939 }
1940 1940
1941 /** 1941 /**
1942 * skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page 1942 * skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page
1943 * @page: The page that was allocated from skb_alloc_page 1943 * @page: The page that was allocated from skb_alloc_page
1944 * @skb: The skb that may need pfmemalloc set 1944 * @skb: The skb that may need pfmemalloc set
1945 */ 1945 */
1946 static inline void skb_propagate_pfmemalloc(struct page *page, 1946 static inline void skb_propagate_pfmemalloc(struct page *page,
1947 struct sk_buff *skb) 1947 struct sk_buff *skb)
1948 { 1948 {
1949 if (page && page->pfmemalloc) 1949 if (page && page->pfmemalloc)
1950 skb->pfmemalloc = true; 1950 skb->pfmemalloc = true;
1951 } 1951 }
1952 1952
1953 /** 1953 /**
1954 * skb_frag_page - retrieve the page refered to by a paged fragment 1954 * skb_frag_page - retrieve the page refered to by a paged fragment
1955 * @frag: the paged fragment 1955 * @frag: the paged fragment
1956 * 1956 *
1957 * Returns the &struct page associated with @frag. 1957 * Returns the &struct page associated with @frag.
1958 */ 1958 */
1959 static inline struct page *skb_frag_page(const skb_frag_t *frag) 1959 static inline struct page *skb_frag_page(const skb_frag_t *frag)
1960 { 1960 {
1961 return frag->page.p; 1961 return frag->page.p;
1962 } 1962 }
1963 1963
1964 /** 1964 /**
1965 * __skb_frag_ref - take an addition reference on a paged fragment. 1965 * __skb_frag_ref - take an addition reference on a paged fragment.
1966 * @frag: the paged fragment 1966 * @frag: the paged fragment
1967 * 1967 *
1968 * Takes an additional reference on the paged fragment @frag. 1968 * Takes an additional reference on the paged fragment @frag.
1969 */ 1969 */
1970 static inline void __skb_frag_ref(skb_frag_t *frag) 1970 static inline void __skb_frag_ref(skb_frag_t *frag)
1971 { 1971 {
1972 get_page(skb_frag_page(frag)); 1972 get_page(skb_frag_page(frag));
1973 } 1973 }
1974 1974
1975 /** 1975 /**
1976 * skb_frag_ref - take an addition reference on a paged fragment of an skb. 1976 * skb_frag_ref - take an addition reference on a paged fragment of an skb.
1977 * @skb: the buffer 1977 * @skb: the buffer
1978 * @f: the fragment offset. 1978 * @f: the fragment offset.
1979 * 1979 *
1980 * Takes an additional reference on the @f'th paged fragment of @skb. 1980 * Takes an additional reference on the @f'th paged fragment of @skb.
1981 */ 1981 */
1982 static inline void skb_frag_ref(struct sk_buff *skb, int f) 1982 static inline void skb_frag_ref(struct sk_buff *skb, int f)
1983 { 1983 {
1984 __skb_frag_ref(&skb_shinfo(skb)->frags[f]); 1984 __skb_frag_ref(&skb_shinfo(skb)->frags[f]);
1985 } 1985 }
1986 1986
1987 /** 1987 /**
1988 * __skb_frag_unref - release a reference on a paged fragment. 1988 * __skb_frag_unref - release a reference on a paged fragment.
1989 * @frag: the paged fragment 1989 * @frag: the paged fragment
1990 * 1990 *
1991 * Releases a reference on the paged fragment @frag. 1991 * Releases a reference on the paged fragment @frag.
1992 */ 1992 */
1993 static inline void __skb_frag_unref(skb_frag_t *frag) 1993 static inline void __skb_frag_unref(skb_frag_t *frag)
1994 { 1994 {
1995 put_page(skb_frag_page(frag)); 1995 put_page(skb_frag_page(frag));
1996 } 1996 }
1997 1997
1998 /** 1998 /**
1999 * skb_frag_unref - release a reference on a paged fragment of an skb. 1999 * skb_frag_unref - release a reference on a paged fragment of an skb.
2000 * @skb: the buffer 2000 * @skb: the buffer
2001 * @f: the fragment offset 2001 * @f: the fragment offset
2002 * 2002 *
2003 * Releases a reference on the @f'th paged fragment of @skb. 2003 * Releases a reference on the @f'th paged fragment of @skb.
2004 */ 2004 */
2005 static inline void skb_frag_unref(struct sk_buff *skb, int f) 2005 static inline void skb_frag_unref(struct sk_buff *skb, int f)
2006 { 2006 {
2007 __skb_frag_unref(&skb_shinfo(skb)->frags[f]); 2007 __skb_frag_unref(&skb_shinfo(skb)->frags[f]);
2008 } 2008 }
2009 2009
2010 /** 2010 /**
2011 * skb_frag_address - gets the address of the data contained in a paged fragment 2011 * skb_frag_address - gets the address of the data contained in a paged fragment
2012 * @frag: the paged fragment buffer 2012 * @frag: the paged fragment buffer
2013 * 2013 *
2014 * Returns the address of the data within @frag. The page must already 2014 * Returns the address of the data within @frag. The page must already
2015 * be mapped. 2015 * be mapped.
2016 */ 2016 */
2017 static inline void *skb_frag_address(const skb_frag_t *frag) 2017 static inline void *skb_frag_address(const skb_frag_t *frag)
2018 { 2018 {
2019 return page_address(skb_frag_page(frag)) + frag->page_offset; 2019 return page_address(skb_frag_page(frag)) + frag->page_offset;
2020 } 2020 }
2021 2021
2022 /** 2022 /**
2023 * skb_frag_address_safe - gets the address of the data contained in a paged fragment 2023 * skb_frag_address_safe - gets the address of the data contained in a paged fragment
2024 * @frag: the paged fragment buffer 2024 * @frag: the paged fragment buffer
2025 * 2025 *
2026 * Returns the address of the data within @frag. Checks that the page 2026 * Returns the address of the data within @frag. Checks that the page
2027 * is mapped and returns %NULL otherwise. 2027 * is mapped and returns %NULL otherwise.
2028 */ 2028 */
2029 static inline void *skb_frag_address_safe(const skb_frag_t *frag) 2029 static inline void *skb_frag_address_safe(const skb_frag_t *frag)
2030 { 2030 {
2031 void *ptr = page_address(skb_frag_page(frag)); 2031 void *ptr = page_address(skb_frag_page(frag));
2032 if (unlikely(!ptr)) 2032 if (unlikely(!ptr))
2033 return NULL; 2033 return NULL;
2034 2034
2035 return ptr + frag->page_offset; 2035 return ptr + frag->page_offset;
2036 } 2036 }
2037 2037
2038 /** 2038 /**
2039 * __skb_frag_set_page - sets the page contained in a paged fragment 2039 * __skb_frag_set_page - sets the page contained in a paged fragment
2040 * @frag: the paged fragment 2040 * @frag: the paged fragment
2041 * @page: the page to set 2041 * @page: the page to set
2042 * 2042 *
2043 * Sets the fragment @frag to contain @page. 2043 * Sets the fragment @frag to contain @page.
2044 */ 2044 */
2045 static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page) 2045 static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page)
2046 { 2046 {
2047 frag->page.p = page; 2047 frag->page.p = page;
2048 } 2048 }
2049 2049
2050 /** 2050 /**
2051 * skb_frag_set_page - sets the page contained in a paged fragment of an skb 2051 * skb_frag_set_page - sets the page contained in a paged fragment of an skb
2052 * @skb: the buffer 2052 * @skb: the buffer
2053 * @f: the fragment offset 2053 * @f: the fragment offset
2054 * @page: the page to set 2054 * @page: the page to set
2055 * 2055 *
2056 * Sets the @f'th fragment of @skb to contain @page. 2056 * Sets the @f'th fragment of @skb to contain @page.
2057 */ 2057 */
2058 static inline void skb_frag_set_page(struct sk_buff *skb, int f, 2058 static inline void skb_frag_set_page(struct sk_buff *skb, int f,
2059 struct page *page) 2059 struct page *page)
2060 { 2060 {
2061 __skb_frag_set_page(&skb_shinfo(skb)->frags[f], page); 2061 __skb_frag_set_page(&skb_shinfo(skb)->frags[f], page);
2062 } 2062 }
2063 2063
2064 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); 2064 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
2065 2065
2066 /** 2066 /**
2067 * skb_frag_dma_map - maps a paged fragment via the DMA API 2067 * skb_frag_dma_map - maps a paged fragment via the DMA API
2068 * @dev: the device to map the fragment to 2068 * @dev: the device to map the fragment to
2069 * @frag: the paged fragment to map 2069 * @frag: the paged fragment to map
2070 * @offset: the offset within the fragment (starting at the 2070 * @offset: the offset within the fragment (starting at the
2071 * fragment's own offset) 2071 * fragment's own offset)
2072 * @size: the number of bytes to map 2072 * @size: the number of bytes to map
2073 * @dir: the direction of the mapping (%PCI_DMA_*) 2073 * @dir: the direction of the mapping (%PCI_DMA_*)
2074 * 2074 *
2075 * Maps the page associated with @frag to @device. 2075 * Maps the page associated with @frag to @device.
2076 */ 2076 */
2077 static inline dma_addr_t skb_frag_dma_map(struct device *dev, 2077 static inline dma_addr_t skb_frag_dma_map(struct device *dev,
2078 const skb_frag_t *frag, 2078 const skb_frag_t *frag,
2079 size_t offset, size_t size, 2079 size_t offset, size_t size,
2080 enum dma_data_direction dir) 2080 enum dma_data_direction dir)
2081 { 2081 {
2082 return dma_map_page(dev, skb_frag_page(frag), 2082 return dma_map_page(dev, skb_frag_page(frag),
2083 frag->page_offset + offset, size, dir); 2083 frag->page_offset + offset, size, dir);
2084 } 2084 }
2085 2085
2086 static inline struct sk_buff *pskb_copy(struct sk_buff *skb, 2086 static inline struct sk_buff *pskb_copy(struct sk_buff *skb,
2087 gfp_t gfp_mask) 2087 gfp_t gfp_mask)
2088 { 2088 {
2089 return __pskb_copy(skb, skb_headroom(skb), gfp_mask); 2089 return __pskb_copy(skb, skb_headroom(skb), gfp_mask);
2090 } 2090 }
2091 2091
2092 /** 2092 /**
2093 * skb_clone_writable - is the header of a clone writable 2093 * skb_clone_writable - is the header of a clone writable
2094 * @skb: buffer to check 2094 * @skb: buffer to check
2095 * @len: length up to which to write 2095 * @len: length up to which to write
2096 * 2096 *
2097 * Returns true if modifying the header part of the cloned buffer 2097 * Returns true if modifying the header part of the cloned buffer
2098 * does not requires the data to be copied. 2098 * does not requires the data to be copied.
2099 */ 2099 */
2100 static inline int skb_clone_writable(const struct sk_buff *skb, unsigned int len) 2100 static inline int skb_clone_writable(const struct sk_buff *skb, unsigned int len)
2101 { 2101 {
2102 return !skb_header_cloned(skb) && 2102 return !skb_header_cloned(skb) &&
2103 skb_headroom(skb) + len <= skb->hdr_len; 2103 skb_headroom(skb) + len <= skb->hdr_len;
2104 } 2104 }
2105 2105
2106 static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom, 2106 static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom,
2107 int cloned) 2107 int cloned)
2108 { 2108 {
2109 int delta = 0; 2109 int delta = 0;
2110 2110
2111 if (headroom > skb_headroom(skb)) 2111 if (headroom > skb_headroom(skb))
2112 delta = headroom - skb_headroom(skb); 2112 delta = headroom - skb_headroom(skb);
2113 2113
2114 if (delta || cloned) 2114 if (delta || cloned)
2115 return pskb_expand_head(skb, ALIGN(delta, NET_SKB_PAD), 0, 2115 return pskb_expand_head(skb, ALIGN(delta, NET_SKB_PAD), 0,
2116 GFP_ATOMIC); 2116 GFP_ATOMIC);
2117 return 0; 2117 return 0;
2118 } 2118 }
2119 2119
2120 /** 2120 /**
2121 * skb_cow - copy header of skb when it is required 2121 * skb_cow - copy header of skb when it is required
2122 * @skb: buffer to cow 2122 * @skb: buffer to cow
2123 * @headroom: needed headroom 2123 * @headroom: needed headroom
2124 * 2124 *
2125 * If the skb passed lacks sufficient headroom or its data part 2125 * If the skb passed lacks sufficient headroom or its data part
2126 * is shared, data is reallocated. If reallocation fails, an error 2126 * is shared, data is reallocated. If reallocation fails, an error
2127 * is returned and original skb is not changed. 2127 * is returned and original skb is not changed.
2128 * 2128 *
2129 * The result is skb with writable area skb->head...skb->tail 2129 * The result is skb with writable area skb->head...skb->tail
2130 * and at least @headroom of space at head. 2130 * and at least @headroom of space at head.
2131 */ 2131 */
2132 static inline int skb_cow(struct sk_buff *skb, unsigned int headroom) 2132 static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
2133 { 2133 {
2134 return __skb_cow(skb, headroom, skb_cloned(skb)); 2134 return __skb_cow(skb, headroom, skb_cloned(skb));
2135 } 2135 }
2136 2136
2137 /** 2137 /**
2138 * skb_cow_head - skb_cow but only making the head writable 2138 * skb_cow_head - skb_cow but only making the head writable
2139 * @skb: buffer to cow 2139 * @skb: buffer to cow
2140 * @headroom: needed headroom 2140 * @headroom: needed headroom
2141 * 2141 *
2142 * This function is identical to skb_cow except that we replace the 2142 * This function is identical to skb_cow except that we replace the
2143 * skb_cloned check by skb_header_cloned. It should be used when 2143 * skb_cloned check by skb_header_cloned. It should be used when
2144 * you only need to push on some header and do not need to modify 2144 * you only need to push on some header and do not need to modify
2145 * the data. 2145 * the data.
2146 */ 2146 */
2147 static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom) 2147 static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom)
2148 { 2148 {
2149 return __skb_cow(skb, headroom, skb_header_cloned(skb)); 2149 return __skb_cow(skb, headroom, skb_header_cloned(skb));
2150 } 2150 }
2151 2151
2152 /** 2152 /**
2153 * skb_padto - pad an skbuff up to a minimal size 2153 * skb_padto - pad an skbuff up to a minimal size
2154 * @skb: buffer to pad 2154 * @skb: buffer to pad
2155 * @len: minimal length 2155 * @len: minimal length
2156 * 2156 *
2157 * Pads up a buffer to ensure the trailing bytes exist and are 2157 * Pads up a buffer to ensure the trailing bytes exist and are
2158 * blanked. If the buffer already contains sufficient data it 2158 * blanked. If the buffer already contains sufficient data it
2159 * is untouched. Otherwise it is extended. Returns zero on 2159 * is untouched. Otherwise it is extended. Returns zero on
2160 * success. The skb is freed on error. 2160 * success. The skb is freed on error.
2161 */ 2161 */
2162 2162
2163 static inline int skb_padto(struct sk_buff *skb, unsigned int len) 2163 static inline int skb_padto(struct sk_buff *skb, unsigned int len)
2164 { 2164 {
2165 unsigned int size = skb->len; 2165 unsigned int size = skb->len;
2166 if (likely(size >= len)) 2166 if (likely(size >= len))
2167 return 0; 2167 return 0;
2168 return skb_pad(skb, len - size); 2168 return skb_pad(skb, len - size);
2169 } 2169 }
2170 2170
2171 static inline int skb_add_data(struct sk_buff *skb, 2171 static inline int skb_add_data(struct sk_buff *skb,
2172 char __user *from, int copy) 2172 char __user *from, int copy)
2173 { 2173 {
2174 const int off = skb->len; 2174 const int off = skb->len;
2175 2175
2176 if (skb->ip_summed == CHECKSUM_NONE) { 2176 if (skb->ip_summed == CHECKSUM_NONE) {
2177 int err = 0; 2177 int err = 0;
2178 __wsum csum = csum_and_copy_from_user(from, skb_put(skb, copy), 2178 __wsum csum = csum_and_copy_from_user(from, skb_put(skb, copy),
2179 copy, 0, &err); 2179 copy, 0, &err);
2180 if (!err) { 2180 if (!err) {
2181 skb->csum = csum_block_add(skb->csum, csum, off); 2181 skb->csum = csum_block_add(skb->csum, csum, off);
2182 return 0; 2182 return 0;
2183 } 2183 }
2184 } else if (!copy_from_user(skb_put(skb, copy), from, copy)) 2184 } else if (!copy_from_user(skb_put(skb, copy), from, copy))
2185 return 0; 2185 return 0;
2186 2186
2187 __skb_trim(skb, off); 2187 __skb_trim(skb, off);
2188 return -EFAULT; 2188 return -EFAULT;
2189 } 2189 }
2190 2190
2191 static inline bool skb_can_coalesce(struct sk_buff *skb, int i, 2191 static inline bool skb_can_coalesce(struct sk_buff *skb, int i,
2192 const struct page *page, int off) 2192 const struct page *page, int off)
2193 { 2193 {
2194 if (i) { 2194 if (i) {
2195 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; 2195 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
2196 2196
2197 return page == skb_frag_page(frag) && 2197 return page == skb_frag_page(frag) &&
2198 off == frag->page_offset + skb_frag_size(frag); 2198 off == frag->page_offset + skb_frag_size(frag);
2199 } 2199 }
2200 return false; 2200 return false;
2201 } 2201 }
2202 2202
2203 static inline int __skb_linearize(struct sk_buff *skb) 2203 static inline int __skb_linearize(struct sk_buff *skb)
2204 { 2204 {
2205 return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM; 2205 return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM;
2206 } 2206 }
2207 2207
2208 /** 2208 /**
2209 * skb_linearize - convert paged skb to linear one 2209 * skb_linearize - convert paged skb to linear one
2210 * @skb: buffer to linarize 2210 * @skb: buffer to linarize
2211 * 2211 *
2212 * If there is no free memory -ENOMEM is returned, otherwise zero 2212 * If there is no free memory -ENOMEM is returned, otherwise zero
2213 * is returned and the old skb data released. 2213 * is returned and the old skb data released.
2214 */ 2214 */
2215 static inline int skb_linearize(struct sk_buff *skb) 2215 static inline int skb_linearize(struct sk_buff *skb)
2216 { 2216 {
2217 return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; 2217 return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0;
2218 } 2218 }
2219 2219
2220 /** 2220 /**
2221 * skb_has_shared_frag - can any frag be overwritten 2221 * skb_has_shared_frag - can any frag be overwritten
2222 * @skb: buffer to test 2222 * @skb: buffer to test
2223 * 2223 *
2224 * Return true if the skb has at least one frag that might be modified 2224 * Return true if the skb has at least one frag that might be modified
2225 * by an external entity (as in vmsplice()/sendfile()) 2225 * by an external entity (as in vmsplice()/sendfile())
2226 */ 2226 */
2227 static inline bool skb_has_shared_frag(const struct sk_buff *skb) 2227 static inline bool skb_has_shared_frag(const struct sk_buff *skb)
2228 { 2228 {
2229 return skb_is_nonlinear(skb) && 2229 return skb_is_nonlinear(skb) &&
2230 skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; 2230 skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
2231 } 2231 }
2232 2232
2233 /** 2233 /**
2234 * skb_linearize_cow - make sure skb is linear and writable 2234 * skb_linearize_cow - make sure skb is linear and writable
2235 * @skb: buffer to process 2235 * @skb: buffer to process
2236 * 2236 *
2237 * If there is no free memory -ENOMEM is returned, otherwise zero 2237 * If there is no free memory -ENOMEM is returned, otherwise zero
2238 * is returned and the old skb data released. 2238 * is returned and the old skb data released.
2239 */ 2239 */
2240 static inline int skb_linearize_cow(struct sk_buff *skb) 2240 static inline int skb_linearize_cow(struct sk_buff *skb)
2241 { 2241 {
2242 return skb_is_nonlinear(skb) || skb_cloned(skb) ? 2242 return skb_is_nonlinear(skb) || skb_cloned(skb) ?
2243 __skb_linearize(skb) : 0; 2243 __skb_linearize(skb) : 0;
2244 } 2244 }
2245 2245
2246 /** 2246 /**
2247 * skb_postpull_rcsum - update checksum for received skb after pull 2247 * skb_postpull_rcsum - update checksum for received skb after pull
2248 * @skb: buffer to update 2248 * @skb: buffer to update
2249 * @start: start of data before pull 2249 * @start: start of data before pull
2250 * @len: length of data pulled 2250 * @len: length of data pulled
2251 * 2251 *
2252 * After doing a pull on a received packet, you need to call this to 2252 * After doing a pull on a received packet, you need to call this to
2253 * update the CHECKSUM_COMPLETE checksum, or set ip_summed to 2253 * update the CHECKSUM_COMPLETE checksum, or set ip_summed to
2254 * CHECKSUM_NONE so that it can be recomputed from scratch. 2254 * CHECKSUM_NONE so that it can be recomputed from scratch.
2255 */ 2255 */
2256 2256
2257 static inline void skb_postpull_rcsum(struct sk_buff *skb, 2257 static inline void skb_postpull_rcsum(struct sk_buff *skb,
2258 const void *start, unsigned int len) 2258 const void *start, unsigned int len)
2259 { 2259 {
2260 if (skb->ip_summed == CHECKSUM_COMPLETE) 2260 if (skb->ip_summed == CHECKSUM_COMPLETE)
2261 skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); 2261 skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
2262 } 2262 }
2263 2263
2264 unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); 2264 unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
2265 2265
2266 /** 2266 /**
2267 * pskb_trim_rcsum - trim received skb and update checksum 2267 * pskb_trim_rcsum - trim received skb and update checksum
2268 * @skb: buffer to trim 2268 * @skb: buffer to trim
2269 * @len: new length 2269 * @len: new length
2270 * 2270 *
2271 * This is exactly the same as pskb_trim except that it ensures the 2271 * This is exactly the same as pskb_trim except that it ensures the
2272 * checksum of received packets are still valid after the operation. 2272 * checksum of received packets are still valid after the operation.
2273 */ 2273 */
2274 2274
2275 static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) 2275 static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
2276 { 2276 {
2277 if (likely(len >= skb->len)) 2277 if (likely(len >= skb->len))
2278 return 0; 2278 return 0;
2279 if (skb->ip_summed == CHECKSUM_COMPLETE) 2279 if (skb->ip_summed == CHECKSUM_COMPLETE)
2280 skb->ip_summed = CHECKSUM_NONE; 2280 skb->ip_summed = CHECKSUM_NONE;
2281 return __pskb_trim(skb, len); 2281 return __pskb_trim(skb, len);
2282 } 2282 }
2283 2283
2284 #define skb_queue_walk(queue, skb) \ 2284 #define skb_queue_walk(queue, skb) \
2285 for (skb = (queue)->next; \ 2285 for (skb = (queue)->next; \
2286 skb != (struct sk_buff *)(queue); \ 2286 skb != (struct sk_buff *)(queue); \
2287 skb = skb->next) 2287 skb = skb->next)
2288 2288
2289 #define skb_queue_walk_safe(queue, skb, tmp) \ 2289 #define skb_queue_walk_safe(queue, skb, tmp) \
2290 for (skb = (queue)->next, tmp = skb->next; \ 2290 for (skb = (queue)->next, tmp = skb->next; \
2291 skb != (struct sk_buff *)(queue); \ 2291 skb != (struct sk_buff *)(queue); \
2292 skb = tmp, tmp = skb->next) 2292 skb = tmp, tmp = skb->next)
2293 2293
2294 #define skb_queue_walk_from(queue, skb) \ 2294 #define skb_queue_walk_from(queue, skb) \
2295 for (; skb != (struct sk_buff *)(queue); \ 2295 for (; skb != (struct sk_buff *)(queue); \
2296 skb = skb->next) 2296 skb = skb->next)
2297 2297
2298 #define skb_queue_walk_from_safe(queue, skb, tmp) \ 2298 #define skb_queue_walk_from_safe(queue, skb, tmp) \
2299 for (tmp = skb->next; \ 2299 for (tmp = skb->next; \
2300 skb != (struct sk_buff *)(queue); \ 2300 skb != (struct sk_buff *)(queue); \
2301 skb = tmp, tmp = skb->next) 2301 skb = tmp, tmp = skb->next)
2302 2302
2303 #define skb_queue_reverse_walk(queue, skb) \ 2303 #define skb_queue_reverse_walk(queue, skb) \
2304 for (skb = (queue)->prev; \ 2304 for (skb = (queue)->prev; \
2305 skb != (struct sk_buff *)(queue); \ 2305 skb != (struct sk_buff *)(queue); \
2306 skb = skb->prev) 2306 skb = skb->prev)
2307 2307
2308 #define skb_queue_reverse_walk_safe(queue, skb, tmp) \ 2308 #define skb_queue_reverse_walk_safe(queue, skb, tmp) \
2309 for (skb = (queue)->prev, tmp = skb->prev; \ 2309 for (skb = (queue)->prev, tmp = skb->prev; \
2310 skb != (struct sk_buff *)(queue); \ 2310 skb != (struct sk_buff *)(queue); \
2311 skb = tmp, tmp = skb->prev) 2311 skb = tmp, tmp = skb->prev)
2312 2312
2313 #define skb_queue_reverse_walk_from_safe(queue, skb, tmp) \ 2313 #define skb_queue_reverse_walk_from_safe(queue, skb, tmp) \
2314 for (tmp = skb->prev; \ 2314 for (tmp = skb->prev; \
2315 skb != (struct sk_buff *)(queue); \ 2315 skb != (struct sk_buff *)(queue); \
2316 skb = tmp, tmp = skb->prev) 2316 skb = tmp, tmp = skb->prev)
2317 2317
2318 static inline bool skb_has_frag_list(const struct sk_buff *skb) 2318 static inline bool skb_has_frag_list(const struct sk_buff *skb)
2319 { 2319 {
2320 return skb_shinfo(skb)->frag_list != NULL; 2320 return skb_shinfo(skb)->frag_list != NULL;
2321 } 2321 }
2322 2322
2323 static inline void skb_frag_list_init(struct sk_buff *skb) 2323 static inline void skb_frag_list_init(struct sk_buff *skb)
2324 { 2324 {
2325 skb_shinfo(skb)->frag_list = NULL; 2325 skb_shinfo(skb)->frag_list = NULL;
2326 } 2326 }
2327 2327
2328 static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag) 2328 static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag)
2329 { 2329 {
2330 frag->next = skb_shinfo(skb)->frag_list; 2330 frag->next = skb_shinfo(skb)->frag_list;
2331 skb_shinfo(skb)->frag_list = frag; 2331 skb_shinfo(skb)->frag_list = frag;
2332 } 2332 }
2333 2333
2334 #define skb_walk_frags(skb, iter) \ 2334 #define skb_walk_frags(skb, iter) \
2335 for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) 2335 for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next)
2336 2336
2337 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, 2337 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
2338 int *peeked, int *off, int *err); 2338 int *peeked, int *off, int *err);
2339 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, 2339 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
2340 int *err); 2340 int *err);
2341 unsigned int datagram_poll(struct file *file, struct socket *sock, 2341 unsigned int datagram_poll(struct file *file, struct socket *sock,
2342 struct poll_table_struct *wait); 2342 struct poll_table_struct *wait);
2343 int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, 2343 int skb_copy_datagram_iovec(const struct sk_buff *from, int offset,
2344 struct iovec *to, int size); 2344 struct iovec *to, int size);
2345 int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, 2345 int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
2346 struct iovec *iov); 2346 struct iovec *iov);
2347 int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, 2347 int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
2348 const struct iovec *from, int from_offset, 2348 const struct iovec *from, int from_offset,
2349 int len); 2349 int len);
2350 int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm, 2350 int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm,
2351 int offset, size_t count); 2351 int offset, size_t count);
2352 int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset, 2352 int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset,
2353 const struct iovec *to, int to_offset, 2353 const struct iovec *to, int to_offset,
2354 int size); 2354 int size);
2355 void skb_free_datagram(struct sock *sk, struct sk_buff *skb); 2355 void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
2356 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb); 2356 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
2357 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); 2357 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
2358 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); 2358 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
2359 int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); 2359 int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
2360 __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, 2360 __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
2361 int len, __wsum csum); 2361 int len, __wsum csum);
2362 int skb_splice_bits(struct sk_buff *skb, unsigned int offset, 2362 int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
2363 struct pipe_inode_info *pipe, unsigned int len, 2363 struct pipe_inode_info *pipe, unsigned int len,
2364 unsigned int flags); 2364 unsigned int flags);
2365 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); 2365 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
2366 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); 2366 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
2367 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); 2367 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
2368 void skb_scrub_packet(struct sk_buff *skb, bool xnet); 2368 void skb_scrub_packet(struct sk_buff *skb, bool xnet);
2369 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); 2369 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
2370 2370
2371 struct skb_checksum_ops { 2371 struct skb_checksum_ops {
2372 __wsum (*update)(const void *mem, int len, __wsum wsum); 2372 __wsum (*update)(const void *mem, int len, __wsum wsum);
2373 __wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len); 2373 __wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len);
2374 }; 2374 };
2375 2375
2376 __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, 2376 __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
2377 __wsum csum, const struct skb_checksum_ops *ops); 2377 __wsum csum, const struct skb_checksum_ops *ops);
2378 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, 2378 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
2379 __wsum csum); 2379 __wsum csum);
2380 2380
2381 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, 2381 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
2382 int len, void *buffer) 2382 int len, void *buffer)
2383 { 2383 {
2384 int hlen = skb_headlen(skb); 2384 int hlen = skb_headlen(skb);
2385 2385
2386 if (hlen - offset >= len) 2386 if (hlen - offset >= len)
2387 return skb->data + offset; 2387 return skb->data + offset;
2388 2388
2389 if (skb_copy_bits(skb, offset, buffer, len) < 0) 2389 if (skb_copy_bits(skb, offset, buffer, len) < 0)
2390 return NULL; 2390 return NULL;
2391 2391
2392 return buffer; 2392 return buffer;
2393 } 2393 }
2394 2394
2395 /**
2396 * skb_needs_linearize - check if we need to linearize a given skb
2397 * depending on the given device features.
2398 * @skb: socket buffer to check
2399 * @features: net device features
2400 *
2401 * Returns true if either:
2402 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2403 * 2. skb is fragmented and the device does not support SG.
2404 */
2405 static inline bool skb_needs_linearize(struct sk_buff *skb,
2406 netdev_features_t features)
2407 {
2408 return skb_is_nonlinear(skb) &&
2409 ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
2410 (skb_shinfo(skb)->nr_frags && !(features & NETIF_F_SG)));
2411 }
2412
2395 static inline void skb_copy_from_linear_data(const struct sk_buff *skb, 2413 static inline void skb_copy_from_linear_data(const struct sk_buff *skb,
2396 void *to, 2414 void *to,
2397 const unsigned int len) 2415 const unsigned int len)
2398 { 2416 {
2399 memcpy(to, skb->data, len); 2417 memcpy(to, skb->data, len);
2400 } 2418 }
2401 2419
2402 static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb, 2420 static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb,
2403 const int offset, void *to, 2421 const int offset, void *to,
2404 const unsigned int len) 2422 const unsigned int len)
2405 { 2423 {
2406 memcpy(to, skb->data + offset, len); 2424 memcpy(to, skb->data + offset, len);
2407 } 2425 }
2408 2426
2409 static inline void skb_copy_to_linear_data(struct sk_buff *skb, 2427 static inline void skb_copy_to_linear_data(struct sk_buff *skb,
2410 const void *from, 2428 const void *from,
2411 const unsigned int len) 2429 const unsigned int len)
2412 { 2430 {
2413 memcpy(skb->data, from, len); 2431 memcpy(skb->data, from, len);
2414 } 2432 }
2415 2433
2416 static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb, 2434 static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,
2417 const int offset, 2435 const int offset,
2418 const void *from, 2436 const void *from,
2419 const unsigned int len) 2437 const unsigned int len)
2420 { 2438 {
2421 memcpy(skb->data + offset, from, len); 2439 memcpy(skb->data + offset, from, len);
2422 } 2440 }
2423 2441
2424 void skb_init(void); 2442 void skb_init(void);
2425 2443
2426 static inline ktime_t skb_get_ktime(const struct sk_buff *skb) 2444 static inline ktime_t skb_get_ktime(const struct sk_buff *skb)
2427 { 2445 {
2428 return skb->tstamp; 2446 return skb->tstamp;
2429 } 2447 }
2430 2448
2431 /** 2449 /**
2432 * skb_get_timestamp - get timestamp from a skb 2450 * skb_get_timestamp - get timestamp from a skb
2433 * @skb: skb to get stamp from 2451 * @skb: skb to get stamp from
2434 * @stamp: pointer to struct timeval to store stamp in 2452 * @stamp: pointer to struct timeval to store stamp in
2435 * 2453 *
2436 * Timestamps are stored in the skb as offsets to a base timestamp. 2454 * Timestamps are stored in the skb as offsets to a base timestamp.
2437 * This function converts the offset back to a struct timeval and stores 2455 * This function converts the offset back to a struct timeval and stores
2438 * it in stamp. 2456 * it in stamp.
2439 */ 2457 */
2440 static inline void skb_get_timestamp(const struct sk_buff *skb, 2458 static inline void skb_get_timestamp(const struct sk_buff *skb,
2441 struct timeval *stamp) 2459 struct timeval *stamp)
2442 { 2460 {
2443 *stamp = ktime_to_timeval(skb->tstamp); 2461 *stamp = ktime_to_timeval(skb->tstamp);
2444 } 2462 }
2445 2463
2446 static inline void skb_get_timestampns(const struct sk_buff *skb, 2464 static inline void skb_get_timestampns(const struct sk_buff *skb,
2447 struct timespec *stamp) 2465 struct timespec *stamp)
2448 { 2466 {
2449 *stamp = ktime_to_timespec(skb->tstamp); 2467 *stamp = ktime_to_timespec(skb->tstamp);
2450 } 2468 }
2451 2469
2452 static inline void __net_timestamp(struct sk_buff *skb) 2470 static inline void __net_timestamp(struct sk_buff *skb)
2453 { 2471 {
2454 skb->tstamp = ktime_get_real(); 2472 skb->tstamp = ktime_get_real();
2455 } 2473 }
2456 2474
2457 static inline ktime_t net_timedelta(ktime_t t) 2475 static inline ktime_t net_timedelta(ktime_t t)
2458 { 2476 {
2459 return ktime_sub(ktime_get_real(), t); 2477 return ktime_sub(ktime_get_real(), t);
2460 } 2478 }
2461 2479
2462 static inline ktime_t net_invalid_timestamp(void) 2480 static inline ktime_t net_invalid_timestamp(void)
2463 { 2481 {
2464 return ktime_set(0, 0); 2482 return ktime_set(0, 0);
2465 } 2483 }
2466 2484
2467 void skb_timestamping_init(void); 2485 void skb_timestamping_init(void);
2468 2486
2469 #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING 2487 #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2470 2488
2471 void skb_clone_tx_timestamp(struct sk_buff *skb); 2489 void skb_clone_tx_timestamp(struct sk_buff *skb);
2472 bool skb_defer_rx_timestamp(struct sk_buff *skb); 2490 bool skb_defer_rx_timestamp(struct sk_buff *skb);
2473 2491
2474 #else /* CONFIG_NETWORK_PHY_TIMESTAMPING */ 2492 #else /* CONFIG_NETWORK_PHY_TIMESTAMPING */
2475 2493
2476 static inline void skb_clone_tx_timestamp(struct sk_buff *skb) 2494 static inline void skb_clone_tx_timestamp(struct sk_buff *skb)
2477 { 2495 {
2478 } 2496 }
2479 2497
2480 static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) 2498 static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
2481 { 2499 {
2482 return false; 2500 return false;
2483 } 2501 }
2484 2502
2485 #endif /* !CONFIG_NETWORK_PHY_TIMESTAMPING */ 2503 #endif /* !CONFIG_NETWORK_PHY_TIMESTAMPING */
2486 2504
2487 /** 2505 /**
2488 * skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps 2506 * skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps
2489 * 2507 *
2490 * PHY drivers may accept clones of transmitted packets for 2508 * PHY drivers may accept clones of transmitted packets for
2491 * timestamping via their phy_driver.txtstamp method. These drivers 2509 * timestamping via their phy_driver.txtstamp method. These drivers
2492 * must call this function to return the skb back to the stack, with 2510 * must call this function to return the skb back to the stack, with
2493 * or without a timestamp. 2511 * or without a timestamp.
2494 * 2512 *
2495 * @skb: clone of the the original outgoing packet 2513 * @skb: clone of the the original outgoing packet
2496 * @hwtstamps: hardware time stamps, may be NULL if not available 2514 * @hwtstamps: hardware time stamps, may be NULL if not available
2497 * 2515 *
2498 */ 2516 */
2499 void skb_complete_tx_timestamp(struct sk_buff *skb, 2517 void skb_complete_tx_timestamp(struct sk_buff *skb,
2500 struct skb_shared_hwtstamps *hwtstamps); 2518 struct skb_shared_hwtstamps *hwtstamps);
2501 2519
2502 /** 2520 /**
2503 * skb_tstamp_tx - queue clone of skb with send time stamps 2521 * skb_tstamp_tx - queue clone of skb with send time stamps
2504 * @orig_skb: the original outgoing packet 2522 * @orig_skb: the original outgoing packet
2505 * @hwtstamps: hardware time stamps, may be NULL if not available 2523 * @hwtstamps: hardware time stamps, may be NULL if not available
2506 * 2524 *
2507 * If the skb has a socket associated, then this function clones the 2525 * If the skb has a socket associated, then this function clones the
2508 * skb (thus sharing the actual data and optional structures), stores 2526 * skb (thus sharing the actual data and optional structures), stores
2509 * the optional hardware time stamping information (if non NULL) or 2527 * the optional hardware time stamping information (if non NULL) or
2510 * generates a software time stamp (otherwise), then queues the clone 2528 * generates a software time stamp (otherwise), then queues the clone
2511 * to the error queue of the socket. Errors are silently ignored. 2529 * to the error queue of the socket. Errors are silently ignored.
2512 */ 2530 */
2513 void skb_tstamp_tx(struct sk_buff *orig_skb, 2531 void skb_tstamp_tx(struct sk_buff *orig_skb,
2514 struct skb_shared_hwtstamps *hwtstamps); 2532 struct skb_shared_hwtstamps *hwtstamps);
2515 2533
2516 static inline void sw_tx_timestamp(struct sk_buff *skb) 2534 static inline void sw_tx_timestamp(struct sk_buff *skb)
2517 { 2535 {
2518 if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP && 2536 if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP &&
2519 !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) 2537 !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
2520 skb_tstamp_tx(skb, NULL); 2538 skb_tstamp_tx(skb, NULL);
2521 } 2539 }
2522 2540
2523 /** 2541 /**
2524 * skb_tx_timestamp() - Driver hook for transmit timestamping 2542 * skb_tx_timestamp() - Driver hook for transmit timestamping
2525 * 2543 *
2526 * Ethernet MAC Drivers should call this function in their hard_xmit() 2544 * Ethernet MAC Drivers should call this function in their hard_xmit()
2527 * function immediately before giving the sk_buff to the MAC hardware. 2545 * function immediately before giving the sk_buff to the MAC hardware.
2528 * 2546 *
2529 * @skb: A socket buffer. 2547 * @skb: A socket buffer.
2530 */ 2548 */
2531 static inline void skb_tx_timestamp(struct sk_buff *skb) 2549 static inline void skb_tx_timestamp(struct sk_buff *skb)
2532 { 2550 {
2533 skb_clone_tx_timestamp(skb); 2551 skb_clone_tx_timestamp(skb);
2534 sw_tx_timestamp(skb); 2552 sw_tx_timestamp(skb);
2535 } 2553 }
2536 2554
2537 /** 2555 /**
2538 * skb_complete_wifi_ack - deliver skb with wifi status 2556 * skb_complete_wifi_ack - deliver skb with wifi status
2539 * 2557 *
2540 * @skb: the original outgoing packet 2558 * @skb: the original outgoing packet
2541 * @acked: ack status 2559 * @acked: ack status
2542 * 2560 *
2543 */ 2561 */
2544 void skb_complete_wifi_ack(struct sk_buff *skb, bool acked); 2562 void skb_complete_wifi_ack(struct sk_buff *skb, bool acked);
2545 2563
2546 __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); 2564 __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
2547 __sum16 __skb_checksum_complete(struct sk_buff *skb); 2565 __sum16 __skb_checksum_complete(struct sk_buff *skb);
2548 2566
2549 static inline int skb_csum_unnecessary(const struct sk_buff *skb) 2567 static inline int skb_csum_unnecessary(const struct sk_buff *skb)
2550 { 2568 {
2551 return skb->ip_summed & CHECKSUM_UNNECESSARY; 2569 return skb->ip_summed & CHECKSUM_UNNECESSARY;
2552 } 2570 }
2553 2571
2554 /** 2572 /**
2555 * skb_checksum_complete - Calculate checksum of an entire packet 2573 * skb_checksum_complete - Calculate checksum of an entire packet
2556 * @skb: packet to process 2574 * @skb: packet to process
2557 * 2575 *
2558 * This function calculates the checksum over the entire packet plus 2576 * This function calculates the checksum over the entire packet plus
2559 * the value of skb->csum. The latter can be used to supply the 2577 * the value of skb->csum. The latter can be used to supply the
2560 * checksum of a pseudo header as used by TCP/UDP. It returns the 2578 * checksum of a pseudo header as used by TCP/UDP. It returns the
2561 * checksum. 2579 * checksum.
2562 * 2580 *
2563 * For protocols that contain complete checksums such as ICMP/TCP/UDP, 2581 * For protocols that contain complete checksums such as ICMP/TCP/UDP,
2564 * this function can be used to verify that checksum on received 2582 * this function can be used to verify that checksum on received
2565 * packets. In that case the function should return zero if the 2583 * packets. In that case the function should return zero if the
2566 * checksum is correct. In particular, this function will return zero 2584 * checksum is correct. In particular, this function will return zero
2567 * if skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that the 2585 * if skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that the
2568 * hardware has already verified the correctness of the checksum. 2586 * hardware has already verified the correctness of the checksum.
2569 */ 2587 */
2570 static inline __sum16 skb_checksum_complete(struct sk_buff *skb) 2588 static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
2571 { 2589 {
2572 return skb_csum_unnecessary(skb) ? 2590 return skb_csum_unnecessary(skb) ?
2573 0 : __skb_checksum_complete(skb); 2591 0 : __skb_checksum_complete(skb);
2574 } 2592 }
2575 2593
2576 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 2594 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
2577 void nf_conntrack_destroy(struct nf_conntrack *nfct); 2595 void nf_conntrack_destroy(struct nf_conntrack *nfct);
2578 static inline void nf_conntrack_put(struct nf_conntrack *nfct) 2596 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
2579 { 2597 {
2580 if (nfct && atomic_dec_and_test(&nfct->use)) 2598 if (nfct && atomic_dec_and_test(&nfct->use))
2581 nf_conntrack_destroy(nfct); 2599 nf_conntrack_destroy(nfct);
2582 } 2600 }
2583 static inline void nf_conntrack_get(struct nf_conntrack *nfct) 2601 static inline void nf_conntrack_get(struct nf_conntrack *nfct)
2584 { 2602 {
2585 if (nfct) 2603 if (nfct)
2586 atomic_inc(&nfct->use); 2604 atomic_inc(&nfct->use);
2587 } 2605 }
2588 #endif 2606 #endif
2589 #ifdef CONFIG_BRIDGE_NETFILTER 2607 #ifdef CONFIG_BRIDGE_NETFILTER
2590 static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) 2608 static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
2591 { 2609 {
2592 if (nf_bridge && atomic_dec_and_test(&nf_bridge->use)) 2610 if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
2593 kfree(nf_bridge); 2611 kfree(nf_bridge);
2594 } 2612 }
2595 static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) 2613 static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
2596 { 2614 {
2597 if (nf_bridge) 2615 if (nf_bridge)
2598 atomic_inc(&nf_bridge->use); 2616 atomic_inc(&nf_bridge->use);
2599 } 2617 }
2600 #endif /* CONFIG_BRIDGE_NETFILTER */ 2618 #endif /* CONFIG_BRIDGE_NETFILTER */
2601 static inline void nf_reset(struct sk_buff *skb) 2619 static inline void nf_reset(struct sk_buff *skb)
2602 { 2620 {
2603 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 2621 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
2604 nf_conntrack_put(skb->nfct); 2622 nf_conntrack_put(skb->nfct);
2605 skb->nfct = NULL; 2623 skb->nfct = NULL;
2606 #endif 2624 #endif
2607 #ifdef CONFIG_BRIDGE_NETFILTER 2625 #ifdef CONFIG_BRIDGE_NETFILTER
2608 nf_bridge_put(skb->nf_bridge); 2626 nf_bridge_put(skb->nf_bridge);
2609 skb->nf_bridge = NULL; 2627 skb->nf_bridge = NULL;
2610 #endif 2628 #endif
2611 } 2629 }
2612 2630
2613 static inline void nf_reset_trace(struct sk_buff *skb) 2631 static inline void nf_reset_trace(struct sk_buff *skb)
2614 { 2632 {
2615 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) 2633 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
2616 skb->nf_trace = 0; 2634 skb->nf_trace = 0;
2617 #endif 2635 #endif
2618 } 2636 }
2619 2637
2620 /* Note: This doesn't put any conntrack and bridge info in dst. */ 2638 /* Note: This doesn't put any conntrack and bridge info in dst. */
2621 static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) 2639 static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
2622 { 2640 {
2623 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 2641 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
2624 dst->nfct = src->nfct; 2642 dst->nfct = src->nfct;
2625 nf_conntrack_get(src->nfct); 2643 nf_conntrack_get(src->nfct);
2626 dst->nfctinfo = src->nfctinfo; 2644 dst->nfctinfo = src->nfctinfo;
2627 #endif 2645 #endif
2628 #ifdef CONFIG_BRIDGE_NETFILTER 2646 #ifdef CONFIG_BRIDGE_NETFILTER
2629 dst->nf_bridge = src->nf_bridge; 2647 dst->nf_bridge = src->nf_bridge;
2630 nf_bridge_get(src->nf_bridge); 2648 nf_bridge_get(src->nf_bridge);
2631 #endif 2649 #endif
2632 } 2650 }
2633 2651
2634 static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) 2652 static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
2635 { 2653 {
2636 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 2654 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
2637 nf_conntrack_put(dst->nfct); 2655 nf_conntrack_put(dst->nfct);
2638 #endif 2656 #endif
2639 #ifdef CONFIG_BRIDGE_NETFILTER 2657 #ifdef CONFIG_BRIDGE_NETFILTER
2640 nf_bridge_put(dst->nf_bridge); 2658 nf_bridge_put(dst->nf_bridge);
2641 #endif 2659 #endif
2642 __nf_copy(dst, src); 2660 __nf_copy(dst, src);
2643 } 2661 }
2644 2662
2645 #ifdef CONFIG_NETWORK_SECMARK 2663 #ifdef CONFIG_NETWORK_SECMARK
2646 static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) 2664 static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from)
2647 { 2665 {
2648 to->secmark = from->secmark; 2666 to->secmark = from->secmark;
2649 } 2667 }
2650 2668
2651 static inline void skb_init_secmark(struct sk_buff *skb) 2669 static inline void skb_init_secmark(struct sk_buff *skb)
2652 { 2670 {
2653 skb->secmark = 0; 2671 skb->secmark = 0;
2654 } 2672 }
2655 #else 2673 #else
2656 static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) 2674 static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from)
2657 { } 2675 { }
2658 2676
2659 static inline void skb_init_secmark(struct sk_buff *skb) 2677 static inline void skb_init_secmark(struct sk_buff *skb)
2660 { } 2678 { }
2661 #endif 2679 #endif
2662 2680
2663 static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping) 2681 static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping)
2664 { 2682 {
2665 skb->queue_mapping = queue_mapping; 2683 skb->queue_mapping = queue_mapping;
2666 } 2684 }
2667 2685
2668 static inline u16 skb_get_queue_mapping(const struct sk_buff *skb) 2686 static inline u16 skb_get_queue_mapping(const struct sk_buff *skb)
2669 { 2687 {
2670 return skb->queue_mapping; 2688 return skb->queue_mapping;
2671 } 2689 }
2672 2690
2673 static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from) 2691 static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from)
2674 { 2692 {
2675 to->queue_mapping = from->queue_mapping; 2693 to->queue_mapping = from->queue_mapping;
2676 } 2694 }
2677 2695
2678 static inline void skb_record_rx_queue(struct sk_buff *skb, u16 rx_queue) 2696 static inline void skb_record_rx_queue(struct sk_buff *skb, u16 rx_queue)
2679 { 2697 {
2680 skb->queue_mapping = rx_queue + 1; 2698 skb->queue_mapping = rx_queue + 1;
2681 } 2699 }
2682 2700
2683 static inline u16 skb_get_rx_queue(const struct sk_buff *skb) 2701 static inline u16 skb_get_rx_queue(const struct sk_buff *skb)
2684 { 2702 {
2685 return skb->queue_mapping - 1; 2703 return skb->queue_mapping - 1;
2686 } 2704 }
2687 2705
2688 static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) 2706 static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
2689 { 2707 {
2690 return skb->queue_mapping != 0; 2708 return skb->queue_mapping != 0;
2691 } 2709 }
2692 2710
2693 u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, 2711 u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2694 unsigned int num_tx_queues); 2712 unsigned int num_tx_queues);
2695 2713
2696 static inline struct sec_path *skb_sec_path(struct sk_buff *skb) 2714 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
2697 { 2715 {
2698 #ifdef CONFIG_XFRM 2716 #ifdef CONFIG_XFRM
2699 return skb->sp; 2717 return skb->sp;
2700 #else 2718 #else
2701 return NULL; 2719 return NULL;
2702 #endif 2720 #endif
2703 } 2721 }
2704 2722
2705 /* Keeps track of mac header offset relative to skb->head. 2723 /* Keeps track of mac header offset relative to skb->head.
2706 * It is useful for TSO of Tunneling protocol. e.g. GRE. 2724 * It is useful for TSO of Tunneling protocol. e.g. GRE.
2707 * For non-tunnel skb it points to skb_mac_header() and for 2725 * For non-tunnel skb it points to skb_mac_header() and for
2708 * tunnel skb it points to outer mac header. 2726 * tunnel skb it points to outer mac header.
2709 * Keeps track of level of encapsulation of network headers. 2727 * Keeps track of level of encapsulation of network headers.
2710 */ 2728 */
2711 struct skb_gso_cb { 2729 struct skb_gso_cb {
2712 int mac_offset; 2730 int mac_offset;
2713 int encap_level; 2731 int encap_level;
2714 }; 2732 };
2715 #define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb) 2733 #define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb)
2716 2734
2717 static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) 2735 static inline int skb_tnl_header_len(const struct sk_buff *inner_skb)
2718 { 2736 {
2719 return (skb_mac_header(inner_skb) - inner_skb->head) - 2737 return (skb_mac_header(inner_skb) - inner_skb->head) -
2720 SKB_GSO_CB(inner_skb)->mac_offset; 2738 SKB_GSO_CB(inner_skb)->mac_offset;
2721 } 2739 }
2722 2740
2723 static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) 2741 static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra)
2724 { 2742 {
2725 int new_headroom, headroom; 2743 int new_headroom, headroom;
2726 int ret; 2744 int ret;
2727 2745
2728 headroom = skb_headroom(skb); 2746 headroom = skb_headroom(skb);
2729 ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC); 2747 ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC);
2730 if (ret) 2748 if (ret)
2731 return ret; 2749 return ret;
2732 2750
2733 new_headroom = skb_headroom(skb); 2751 new_headroom = skb_headroom(skb);
2734 SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom); 2752 SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom);
2735 return 0; 2753 return 0;
2736 } 2754 }
2737 2755
2738 static inline bool skb_is_gso(const struct sk_buff *skb) 2756 static inline bool skb_is_gso(const struct sk_buff *skb)
2739 { 2757 {
2740 return skb_shinfo(skb)->gso_size; 2758 return skb_shinfo(skb)->gso_size;
2741 } 2759 }
2742 2760
2743 /* Note: Should be called only if skb_is_gso(skb) is true */ 2761 /* Note: Should be called only if skb_is_gso(skb) is true */
2744 static inline bool skb_is_gso_v6(const struct sk_buff *skb) 2762 static inline bool skb_is_gso_v6(const struct sk_buff *skb)
2745 { 2763 {
2746 return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; 2764 return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
2747 } 2765 }
2748 2766
2749 void __skb_warn_lro_forwarding(const struct sk_buff *skb); 2767 void __skb_warn_lro_forwarding(const struct sk_buff *skb);
2750 2768
2751 static inline bool skb_warn_if_lro(const struct sk_buff *skb) 2769 static inline bool skb_warn_if_lro(const struct sk_buff *skb)
2752 { 2770 {
2753 /* LRO sets gso_size but not gso_type, whereas if GSO is really 2771 /* LRO sets gso_size but not gso_type, whereas if GSO is really
2754 * wanted then gso_type will be set. */ 2772 * wanted then gso_type will be set. */
2755 const struct skb_shared_info *shinfo = skb_shinfo(skb); 2773 const struct skb_shared_info *shinfo = skb_shinfo(skb);
2756 2774
2757 if (skb_is_nonlinear(skb) && shinfo->gso_size != 0 && 2775 if (skb_is_nonlinear(skb) && shinfo->gso_size != 0 &&
2758 unlikely(shinfo->gso_type == 0)) { 2776 unlikely(shinfo->gso_type == 0)) {
2759 __skb_warn_lro_forwarding(skb); 2777 __skb_warn_lro_forwarding(skb);
2760 return true; 2778 return true;
2761 } 2779 }
2762 return false; 2780 return false;
2763 } 2781 }
2764 2782
2765 static inline void skb_forward_csum(struct sk_buff *skb) 2783 static inline void skb_forward_csum(struct sk_buff *skb)
2766 { 2784 {
2767 /* Unfortunately we don't support this one. Any brave souls? */ 2785 /* Unfortunately we don't support this one. Any brave souls? */
2768 if (skb->ip_summed == CHECKSUM_COMPLETE) 2786 if (skb->ip_summed == CHECKSUM_COMPLETE)
2769 skb->ip_summed = CHECKSUM_NONE; 2787 skb->ip_summed = CHECKSUM_NONE;
2770 } 2788 }
2771 2789
2772 /** 2790 /**
2773 * skb_checksum_none_assert - make sure skb ip_summed is CHECKSUM_NONE 2791 * skb_checksum_none_assert - make sure skb ip_summed is CHECKSUM_NONE
2774 * @skb: skb to check 2792 * @skb: skb to check
2775 * 2793 *
2776 * fresh skbs have their ip_summed set to CHECKSUM_NONE. 2794 * fresh skbs have their ip_summed set to CHECKSUM_NONE.
2777 * Instead of forcing ip_summed to CHECKSUM_NONE, we can 2795 * Instead of forcing ip_summed to CHECKSUM_NONE, we can
2778 * use this helper, to document places where we make this assertion. 2796 * use this helper, to document places where we make this assertion.
2779 */ 2797 */
2780 static inline void skb_checksum_none_assert(const struct sk_buff *skb) 2798 static inline void skb_checksum_none_assert(const struct sk_buff *skb)
2781 { 2799 {
2782 #ifdef DEBUG 2800 #ifdef DEBUG
2783 BUG_ON(skb->ip_summed != CHECKSUM_NONE); 2801 BUG_ON(skb->ip_summed != CHECKSUM_NONE);
2784 #endif 2802 #endif
2785 } 2803 }
2786 2804
2787 bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); 2805 bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
2788 2806
2789 u32 __skb_get_poff(const struct sk_buff *skb); 2807 u32 __skb_get_poff(const struct sk_buff *skb);
2790 2808
2791 /** 2809 /**
2792 * skb_head_is_locked - Determine if the skb->head is locked down 2810 * skb_head_is_locked - Determine if the skb->head is locked down
2793 * @skb: skb to check 2811 * @skb: skb to check
2794 * 2812 *
2795 * The head on skbs build around a head frag can be removed if they are 2813 * The head on skbs build around a head frag can be removed if they are
2796 * not cloned. This function returns true if the skb head is locked down 2814 * not cloned. This function returns true if the skb head is locked down
2797 * due to either being allocated via kmalloc, or by being a clone with 2815 * due to either being allocated via kmalloc, or by being a clone with
2798 * multiple references to the head. 2816 * multiple references to the head.
2799 */ 2817 */
2800 static inline bool skb_head_is_locked(const struct sk_buff *skb) 2818 static inline bool skb_head_is_locked(const struct sk_buff *skb)
2801 { 2819 {
2802 return !skb->head_frag || skb_cloned(skb); 2820 return !skb->head_frag || skb_cloned(skb);
2803 } 2821 }
2804 #endif /* __KERNEL__ */ 2822 #endif /* __KERNEL__ */
2805 #endif /* _LINUX_SKBUFF_H */ 2823 #endif /* _LINUX_SKBUFF_H */
2806 2824
1 /* 1 /*
2 * NET3 Protocol independent device support routines. 2 * NET3 Protocol independent device support routines.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License 5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version. 7 * 2 of the License, or (at your option) any later version.
8 * 8 *
9 * Derived from the non IP parts of dev.c 1.0.19 9 * Derived from the non IP parts of dev.c 1.0.19
10 * Authors: Ross Biro 10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 * 13 *
14 * Additional Authors: 14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de> 15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org> 16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net> 17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu> 19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi> 20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 * 21 *
22 * Changes: 22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set 23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called 24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a 25 * before net_dev_init & also removed a
26 * few lines of code in the process. 26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back. 27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant 28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe. 29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock. 30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap 31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range 32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into 33 * Alan Cox : Moved ioctl permission check into
34 * drivers 34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI 35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when 36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8) 37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager. 38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths. 39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass 40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler 41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before 42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function 43 * calling netif_rx. Saves a function
44 * call a packet. 44 * call a packet.
45 * Alan Cox : Hashed net_bh() 45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes. 46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR 47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection. 48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close 49 * Alan Cox : Fixed nasty side effect of device close
50 * changes. 50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to 51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address() 52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to 53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc. 54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack. 55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise. 56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under 57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device. 58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there 59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function. 60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF 61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF 62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD 63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload 64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge 65 * A network device unload needs to purge
66 * the backlog queue. 66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME 67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code 68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait 69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt 70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling 71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback 72 * - netif_rx() feedback
73 */ 73 */
74 74
75 #include <asm/uaccess.h> 75 #include <asm/uaccess.h>
76 #include <linux/bitops.h> 76 #include <linux/bitops.h>
77 #include <linux/capability.h> 77 #include <linux/capability.h>
78 #include <linux/cpu.h> 78 #include <linux/cpu.h>
79 #include <linux/types.h> 79 #include <linux/types.h>
80 #include <linux/kernel.h> 80 #include <linux/kernel.h>
81 #include <linux/hash.h> 81 #include <linux/hash.h>
82 #include <linux/slab.h> 82 #include <linux/slab.h>
83 #include <linux/sched.h> 83 #include <linux/sched.h>
84 #include <linux/mutex.h> 84 #include <linux/mutex.h>
85 #include <linux/string.h> 85 #include <linux/string.h>
86 #include <linux/mm.h> 86 #include <linux/mm.h>
87 #include <linux/socket.h> 87 #include <linux/socket.h>
88 #include <linux/sockios.h> 88 #include <linux/sockios.h>
89 #include <linux/errno.h> 89 #include <linux/errno.h>
90 #include <linux/interrupt.h> 90 #include <linux/interrupt.h>
91 #include <linux/if_ether.h> 91 #include <linux/if_ether.h>
92 #include <linux/netdevice.h> 92 #include <linux/netdevice.h>
93 #include <linux/etherdevice.h> 93 #include <linux/etherdevice.h>
94 #include <linux/ethtool.h> 94 #include <linux/ethtool.h>
95 #include <linux/notifier.h> 95 #include <linux/notifier.h>
96 #include <linux/skbuff.h> 96 #include <linux/skbuff.h>
97 #include <net/net_namespace.h> 97 #include <net/net_namespace.h>
98 #include <net/sock.h> 98 #include <net/sock.h>
99 #include <linux/rtnetlink.h> 99 #include <linux/rtnetlink.h>
100 #include <linux/stat.h> 100 #include <linux/stat.h>
101 #include <net/dst.h> 101 #include <net/dst.h>
102 #include <net/pkt_sched.h> 102 #include <net/pkt_sched.h>
103 #include <net/checksum.h> 103 #include <net/checksum.h>
104 #include <net/xfrm.h> 104 #include <net/xfrm.h>
105 #include <linux/highmem.h> 105 #include <linux/highmem.h>
106 #include <linux/init.h> 106 #include <linux/init.h>
107 #include <linux/module.h> 107 #include <linux/module.h>
108 #include <linux/netpoll.h> 108 #include <linux/netpoll.h>
109 #include <linux/rcupdate.h> 109 #include <linux/rcupdate.h>
110 #include <linux/delay.h> 110 #include <linux/delay.h>
111 #include <net/iw_handler.h> 111 #include <net/iw_handler.h>
112 #include <asm/current.h> 112 #include <asm/current.h>
113 #include <linux/audit.h> 113 #include <linux/audit.h>
114 #include <linux/dmaengine.h> 114 #include <linux/dmaengine.h>
115 #include <linux/err.h> 115 #include <linux/err.h>
116 #include <linux/ctype.h> 116 #include <linux/ctype.h>
117 #include <linux/if_arp.h> 117 #include <linux/if_arp.h>
118 #include <linux/if_vlan.h> 118 #include <linux/if_vlan.h>
119 #include <linux/ip.h> 119 #include <linux/ip.h>
120 #include <net/ip.h> 120 #include <net/ip.h>
121 #include <linux/ipv6.h> 121 #include <linux/ipv6.h>
122 #include <linux/in.h> 122 #include <linux/in.h>
123 #include <linux/jhash.h> 123 #include <linux/jhash.h>
124 #include <linux/random.h> 124 #include <linux/random.h>
125 #include <trace/events/napi.h> 125 #include <trace/events/napi.h>
126 #include <trace/events/net.h> 126 #include <trace/events/net.h>
127 #include <trace/events/skb.h> 127 #include <trace/events/skb.h>
128 #include <linux/pci.h> 128 #include <linux/pci.h>
129 #include <linux/inetdevice.h> 129 #include <linux/inetdevice.h>
130 #include <linux/cpu_rmap.h> 130 #include <linux/cpu_rmap.h>
131 #include <linux/static_key.h> 131 #include <linux/static_key.h>
132 #include <linux/hashtable.h> 132 #include <linux/hashtable.h>
133 #include <linux/vmalloc.h> 133 #include <linux/vmalloc.h>
134 #include <linux/if_macvlan.h> 134 #include <linux/if_macvlan.h>
135 135
136 #include "net-sysfs.h" 136 #include "net-sysfs.h"
137 137
138 /* Instead of increasing this, you should create a hash table. */ 138 /* Instead of increasing this, you should create a hash table. */
139 #define MAX_GRO_SKBS 8 139 #define MAX_GRO_SKBS 8
140 140
141 /* This should be increased if a protocol with a bigger head is added. */ 141 /* This should be increased if a protocol with a bigger head is added. */
142 #define GRO_MAX_HEAD (MAX_HEADER + 128) 142 #define GRO_MAX_HEAD (MAX_HEADER + 128)
143 143
144 static DEFINE_SPINLOCK(ptype_lock); 144 static DEFINE_SPINLOCK(ptype_lock);
145 static DEFINE_SPINLOCK(offload_lock); 145 static DEFINE_SPINLOCK(offload_lock);
146 struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; 146 struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
147 struct list_head ptype_all __read_mostly; /* Taps */ 147 struct list_head ptype_all __read_mostly; /* Taps */
148 static struct list_head offload_base __read_mostly; 148 static struct list_head offload_base __read_mostly;
149 149
150 /* 150 /*
151 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 151 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
152 * semaphore. 152 * semaphore.
153 * 153 *
154 * Pure readers hold dev_base_lock for reading, or rcu_read_lock() 154 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
155 * 155 *
156 * Writers must hold the rtnl semaphore while they loop through the 156 * Writers must hold the rtnl semaphore while they loop through the
157 * dev_base_head list, and hold dev_base_lock for writing when they do the 157 * dev_base_head list, and hold dev_base_lock for writing when they do the
158 * actual updates. This allows pure readers to access the list even 158 * actual updates. This allows pure readers to access the list even
159 * while a writer is preparing to update it. 159 * while a writer is preparing to update it.
160 * 160 *
161 * To put it another way, dev_base_lock is held for writing only to 161 * To put it another way, dev_base_lock is held for writing only to
162 * protect against pure readers; the rtnl semaphore provides the 162 * protect against pure readers; the rtnl semaphore provides the
163 * protection against other writers. 163 * protection against other writers.
164 * 164 *
165 * See, for example usages, register_netdevice() and 165 * See, for example usages, register_netdevice() and
166 * unregister_netdevice(), which must be called with the rtnl 166 * unregister_netdevice(), which must be called with the rtnl
167 * semaphore held. 167 * semaphore held.
168 */ 168 */
169 DEFINE_RWLOCK(dev_base_lock); 169 DEFINE_RWLOCK(dev_base_lock);
170 EXPORT_SYMBOL(dev_base_lock); 170 EXPORT_SYMBOL(dev_base_lock);
171 171
172 /* protects napi_hash addition/deletion and napi_gen_id */ 172 /* protects napi_hash addition/deletion and napi_gen_id */
173 static DEFINE_SPINLOCK(napi_hash_lock); 173 static DEFINE_SPINLOCK(napi_hash_lock);
174 174
175 static unsigned int napi_gen_id; 175 static unsigned int napi_gen_id;
176 static DEFINE_HASHTABLE(napi_hash, 8); 176 static DEFINE_HASHTABLE(napi_hash, 8);
177 177
178 static seqcount_t devnet_rename_seq; 178 static seqcount_t devnet_rename_seq;
179 179
180 static inline void dev_base_seq_inc(struct net *net) 180 static inline void dev_base_seq_inc(struct net *net)
181 { 181 {
182 while (++net->dev_base_seq == 0); 182 while (++net->dev_base_seq == 0);
183 } 183 }
184 184
185 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) 185 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
186 { 186 {
187 unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); 187 unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
188 188
189 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)]; 189 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
190 } 190 }
191 191
192 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) 192 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
193 { 193 {
194 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; 194 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
195 } 195 }
196 196
197 static inline void rps_lock(struct softnet_data *sd) 197 static inline void rps_lock(struct softnet_data *sd)
198 { 198 {
199 #ifdef CONFIG_RPS 199 #ifdef CONFIG_RPS
200 spin_lock(&sd->input_pkt_queue.lock); 200 spin_lock(&sd->input_pkt_queue.lock);
201 #endif 201 #endif
202 } 202 }
203 203
204 static inline void rps_unlock(struct softnet_data *sd) 204 static inline void rps_unlock(struct softnet_data *sd)
205 { 205 {
206 #ifdef CONFIG_RPS 206 #ifdef CONFIG_RPS
207 spin_unlock(&sd->input_pkt_queue.lock); 207 spin_unlock(&sd->input_pkt_queue.lock);
208 #endif 208 #endif
209 } 209 }
210 210
211 /* Device list insertion */ 211 /* Device list insertion */
212 static void list_netdevice(struct net_device *dev) 212 static void list_netdevice(struct net_device *dev)
213 { 213 {
214 struct net *net = dev_net(dev); 214 struct net *net = dev_net(dev);
215 215
216 ASSERT_RTNL(); 216 ASSERT_RTNL();
217 217
218 write_lock_bh(&dev_base_lock); 218 write_lock_bh(&dev_base_lock);
219 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); 219 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
220 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); 220 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
221 hlist_add_head_rcu(&dev->index_hlist, 221 hlist_add_head_rcu(&dev->index_hlist,
222 dev_index_hash(net, dev->ifindex)); 222 dev_index_hash(net, dev->ifindex));
223 write_unlock_bh(&dev_base_lock); 223 write_unlock_bh(&dev_base_lock);
224 224
225 dev_base_seq_inc(net); 225 dev_base_seq_inc(net);
226 } 226 }
227 227
228 /* Device list removal 228 /* Device list removal
229 * caller must respect a RCU grace period before freeing/reusing dev 229 * caller must respect a RCU grace period before freeing/reusing dev
230 */ 230 */
231 static void unlist_netdevice(struct net_device *dev) 231 static void unlist_netdevice(struct net_device *dev)
232 { 232 {
233 ASSERT_RTNL(); 233 ASSERT_RTNL();
234 234
235 /* Unlink dev from the device chain */ 235 /* Unlink dev from the device chain */
236 write_lock_bh(&dev_base_lock); 236 write_lock_bh(&dev_base_lock);
237 list_del_rcu(&dev->dev_list); 237 list_del_rcu(&dev->dev_list);
238 hlist_del_rcu(&dev->name_hlist); 238 hlist_del_rcu(&dev->name_hlist);
239 hlist_del_rcu(&dev->index_hlist); 239 hlist_del_rcu(&dev->index_hlist);
240 write_unlock_bh(&dev_base_lock); 240 write_unlock_bh(&dev_base_lock);
241 241
242 dev_base_seq_inc(dev_net(dev)); 242 dev_base_seq_inc(dev_net(dev));
243 } 243 }
244 244
245 /* 245 /*
246 * Our notifier list 246 * Our notifier list
247 */ 247 */
248 248
249 static RAW_NOTIFIER_HEAD(netdev_chain); 249 static RAW_NOTIFIER_HEAD(netdev_chain);
250 250
251 /* 251 /*
252 * Device drivers call our routines to queue packets here. We empty the 252 * Device drivers call our routines to queue packets here. We empty the
253 * queue in the local softnet handler. 253 * queue in the local softnet handler.
254 */ 254 */
255 255
256 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); 256 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
257 EXPORT_PER_CPU_SYMBOL(softnet_data); 257 EXPORT_PER_CPU_SYMBOL(softnet_data);
258 258
259 #ifdef CONFIG_LOCKDEP 259 #ifdef CONFIG_LOCKDEP
260 /* 260 /*
261 * register_netdevice() inits txq->_xmit_lock and sets lockdep class 261 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
262 * according to dev->type 262 * according to dev->type
263 */ 263 */
264 static const unsigned short netdev_lock_type[] = 264 static const unsigned short netdev_lock_type[] =
265 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, 265 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
266 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, 266 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
267 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, 267 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
268 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, 268 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
269 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, 269 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
270 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, 270 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
271 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, 271 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
272 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, 272 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
273 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, 273 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
274 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, 274 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
275 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, 275 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
276 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, 276 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
277 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, 277 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
278 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE, 278 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
279 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; 279 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
280 280
281 static const char *const netdev_lock_name[] = 281 static const char *const netdev_lock_name[] =
282 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", 282 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
283 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", 283 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
284 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", 284 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
285 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", 285 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
286 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", 286 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
287 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", 287 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
288 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", 288 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
289 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", 289 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
290 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", 290 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
291 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", 291 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
292 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", 292 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
293 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", 293 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
294 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", 294 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
295 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE", 295 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
296 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; 296 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
297 297
298 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; 298 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
299 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; 299 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
300 300
301 static inline unsigned short netdev_lock_pos(unsigned short dev_type) 301 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
302 { 302 {
303 int i; 303 int i;
304 304
305 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) 305 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
306 if (netdev_lock_type[i] == dev_type) 306 if (netdev_lock_type[i] == dev_type)
307 return i; 307 return i;
308 /* the last key is used by default */ 308 /* the last key is used by default */
309 return ARRAY_SIZE(netdev_lock_type) - 1; 309 return ARRAY_SIZE(netdev_lock_type) - 1;
310 } 310 }
311 311
312 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, 312 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
313 unsigned short dev_type) 313 unsigned short dev_type)
314 { 314 {
315 int i; 315 int i;
316 316
317 i = netdev_lock_pos(dev_type); 317 i = netdev_lock_pos(dev_type);
318 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], 318 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
319 netdev_lock_name[i]); 319 netdev_lock_name[i]);
320 } 320 }
321 321
322 static inline void netdev_set_addr_lockdep_class(struct net_device *dev) 322 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
323 { 323 {
324 int i; 324 int i;
325 325
326 i = netdev_lock_pos(dev->type); 326 i = netdev_lock_pos(dev->type);
327 lockdep_set_class_and_name(&dev->addr_list_lock, 327 lockdep_set_class_and_name(&dev->addr_list_lock,
328 &netdev_addr_lock_key[i], 328 &netdev_addr_lock_key[i],
329 netdev_lock_name[i]); 329 netdev_lock_name[i]);
330 } 330 }
331 #else 331 #else
332 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, 332 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
333 unsigned short dev_type) 333 unsigned short dev_type)
334 { 334 {
335 } 335 }
336 static inline void netdev_set_addr_lockdep_class(struct net_device *dev) 336 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
337 { 337 {
338 } 338 }
339 #endif 339 #endif
340 340
341 /******************************************************************************* 341 /*******************************************************************************
342 342
343 Protocol management and registration routines 343 Protocol management and registration routines
344 344
345 *******************************************************************************/ 345 *******************************************************************************/
346 346
347 /* 347 /*
348 * Add a protocol ID to the list. Now that the input handler is 348 * Add a protocol ID to the list. Now that the input handler is
349 * smarter we can dispense with all the messy stuff that used to be 349 * smarter we can dispense with all the messy stuff that used to be
350 * here. 350 * here.
351 * 351 *
352 * BEWARE!!! Protocol handlers, mangling input packets, 352 * BEWARE!!! Protocol handlers, mangling input packets,
353 * MUST BE last in hash buckets and checking protocol handlers 353 * MUST BE last in hash buckets and checking protocol handlers
354 * MUST start from promiscuous ptype_all chain in net_bh. 354 * MUST start from promiscuous ptype_all chain in net_bh.
355 * It is true now, do not change it. 355 * It is true now, do not change it.
356 * Explanation follows: if protocol handler, mangling packet, will 356 * Explanation follows: if protocol handler, mangling packet, will
357 * be the first on list, it is not able to sense, that packet 357 * be the first on list, it is not able to sense, that packet
358 * is cloned and should be copied-on-write, so that it will 358 * is cloned and should be copied-on-write, so that it will
359 * change it and subsequent readers will get broken packet. 359 * change it and subsequent readers will get broken packet.
360 * --ANK (980803) 360 * --ANK (980803)
361 */ 361 */
362 362
363 static inline struct list_head *ptype_head(const struct packet_type *pt) 363 static inline struct list_head *ptype_head(const struct packet_type *pt)
364 { 364 {
365 if (pt->type == htons(ETH_P_ALL)) 365 if (pt->type == htons(ETH_P_ALL))
366 return &ptype_all; 366 return &ptype_all;
367 else 367 else
368 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; 368 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
369 } 369 }
370 370
371 /** 371 /**
372 * dev_add_pack - add packet handler 372 * dev_add_pack - add packet handler
373 * @pt: packet type declaration 373 * @pt: packet type declaration
374 * 374 *
375 * Add a protocol handler to the networking stack. The passed &packet_type 375 * Add a protocol handler to the networking stack. The passed &packet_type
376 * is linked into kernel lists and may not be freed until it has been 376 * is linked into kernel lists and may not be freed until it has been
377 * removed from the kernel lists. 377 * removed from the kernel lists.
378 * 378 *
379 * This call does not sleep therefore it can not 379 * This call does not sleep therefore it can not
380 * guarantee all CPU's that are in middle of receiving packets 380 * guarantee all CPU's that are in middle of receiving packets
381 * will see the new packet type (until the next received packet). 381 * will see the new packet type (until the next received packet).
382 */ 382 */
383 383
384 void dev_add_pack(struct packet_type *pt) 384 void dev_add_pack(struct packet_type *pt)
385 { 385 {
386 struct list_head *head = ptype_head(pt); 386 struct list_head *head = ptype_head(pt);
387 387
388 spin_lock(&ptype_lock); 388 spin_lock(&ptype_lock);
389 list_add_rcu(&pt->list, head); 389 list_add_rcu(&pt->list, head);
390 spin_unlock(&ptype_lock); 390 spin_unlock(&ptype_lock);
391 } 391 }
392 EXPORT_SYMBOL(dev_add_pack); 392 EXPORT_SYMBOL(dev_add_pack);
393 393
394 /** 394 /**
395 * __dev_remove_pack - remove packet handler 395 * __dev_remove_pack - remove packet handler
396 * @pt: packet type declaration 396 * @pt: packet type declaration
397 * 397 *
398 * Remove a protocol handler that was previously added to the kernel 398 * Remove a protocol handler that was previously added to the kernel
399 * protocol handlers by dev_add_pack(). The passed &packet_type is removed 399 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
400 * from the kernel lists and can be freed or reused once this function 400 * from the kernel lists and can be freed or reused once this function
401 * returns. 401 * returns.
402 * 402 *
403 * The packet type might still be in use by receivers 403 * The packet type might still be in use by receivers
404 * and must not be freed until after all the CPU's have gone 404 * and must not be freed until after all the CPU's have gone
405 * through a quiescent state. 405 * through a quiescent state.
406 */ 406 */
407 void __dev_remove_pack(struct packet_type *pt) 407 void __dev_remove_pack(struct packet_type *pt)
408 { 408 {
409 struct list_head *head = ptype_head(pt); 409 struct list_head *head = ptype_head(pt);
410 struct packet_type *pt1; 410 struct packet_type *pt1;
411 411
412 spin_lock(&ptype_lock); 412 spin_lock(&ptype_lock);
413 413
414 list_for_each_entry(pt1, head, list) { 414 list_for_each_entry(pt1, head, list) {
415 if (pt == pt1) { 415 if (pt == pt1) {
416 list_del_rcu(&pt->list); 416 list_del_rcu(&pt->list);
417 goto out; 417 goto out;
418 } 418 }
419 } 419 }
420 420
421 pr_warn("dev_remove_pack: %p not found\n", pt); 421 pr_warn("dev_remove_pack: %p not found\n", pt);
422 out: 422 out:
423 spin_unlock(&ptype_lock); 423 spin_unlock(&ptype_lock);
424 } 424 }
425 EXPORT_SYMBOL(__dev_remove_pack); 425 EXPORT_SYMBOL(__dev_remove_pack);
426 426
427 /** 427 /**
428 * dev_remove_pack - remove packet handler 428 * dev_remove_pack - remove packet handler
429 * @pt: packet type declaration 429 * @pt: packet type declaration
430 * 430 *
431 * Remove a protocol handler that was previously added to the kernel 431 * Remove a protocol handler that was previously added to the kernel
432 * protocol handlers by dev_add_pack(). The passed &packet_type is removed 432 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
433 * from the kernel lists and can be freed or reused once this function 433 * from the kernel lists and can be freed or reused once this function
434 * returns. 434 * returns.
435 * 435 *
436 * This call sleeps to guarantee that no CPU is looking at the packet 436 * This call sleeps to guarantee that no CPU is looking at the packet
437 * type after return. 437 * type after return.
438 */ 438 */
439 void dev_remove_pack(struct packet_type *pt) 439 void dev_remove_pack(struct packet_type *pt)
440 { 440 {
441 __dev_remove_pack(pt); 441 __dev_remove_pack(pt);
442 442
443 synchronize_net(); 443 synchronize_net();
444 } 444 }
445 EXPORT_SYMBOL(dev_remove_pack); 445 EXPORT_SYMBOL(dev_remove_pack);
446 446
447 447
448 /** 448 /**
449 * dev_add_offload - register offload handlers 449 * dev_add_offload - register offload handlers
450 * @po: protocol offload declaration 450 * @po: protocol offload declaration
451 * 451 *
452 * Add protocol offload handlers to the networking stack. The passed 452 * Add protocol offload handlers to the networking stack. The passed
453 * &proto_offload is linked into kernel lists and may not be freed until 453 * &proto_offload is linked into kernel lists and may not be freed until
454 * it has been removed from the kernel lists. 454 * it has been removed from the kernel lists.
455 * 455 *
456 * This call does not sleep therefore it can not 456 * This call does not sleep therefore it can not
457 * guarantee all CPU's that are in middle of receiving packets 457 * guarantee all CPU's that are in middle of receiving packets
458 * will see the new offload handlers (until the next received packet). 458 * will see the new offload handlers (until the next received packet).
459 */ 459 */
460 void dev_add_offload(struct packet_offload *po) 460 void dev_add_offload(struct packet_offload *po)
461 { 461 {
462 struct list_head *head = &offload_base; 462 struct list_head *head = &offload_base;
463 463
464 spin_lock(&offload_lock); 464 spin_lock(&offload_lock);
465 list_add_rcu(&po->list, head); 465 list_add_rcu(&po->list, head);
466 spin_unlock(&offload_lock); 466 spin_unlock(&offload_lock);
467 } 467 }
468 EXPORT_SYMBOL(dev_add_offload); 468 EXPORT_SYMBOL(dev_add_offload);
469 469
470 /** 470 /**
471 * __dev_remove_offload - remove offload handler 471 * __dev_remove_offload - remove offload handler
472 * @po: packet offload declaration 472 * @po: packet offload declaration
473 * 473 *
474 * Remove a protocol offload handler that was previously added to the 474 * Remove a protocol offload handler that was previously added to the
475 * kernel offload handlers by dev_add_offload(). The passed &offload_type 475 * kernel offload handlers by dev_add_offload(). The passed &offload_type
476 * is removed from the kernel lists and can be freed or reused once this 476 * is removed from the kernel lists and can be freed or reused once this
477 * function returns. 477 * function returns.
478 * 478 *
479 * The packet type might still be in use by receivers 479 * The packet type might still be in use by receivers
480 * and must not be freed until after all the CPU's have gone 480 * and must not be freed until after all the CPU's have gone
481 * through a quiescent state. 481 * through a quiescent state.
482 */ 482 */
483 void __dev_remove_offload(struct packet_offload *po) 483 void __dev_remove_offload(struct packet_offload *po)
484 { 484 {
485 struct list_head *head = &offload_base; 485 struct list_head *head = &offload_base;
486 struct packet_offload *po1; 486 struct packet_offload *po1;
487 487
488 spin_lock(&offload_lock); 488 spin_lock(&offload_lock);
489 489
490 list_for_each_entry(po1, head, list) { 490 list_for_each_entry(po1, head, list) {
491 if (po == po1) { 491 if (po == po1) {
492 list_del_rcu(&po->list); 492 list_del_rcu(&po->list);
493 goto out; 493 goto out;
494 } 494 }
495 } 495 }
496 496
497 pr_warn("dev_remove_offload: %p not found\n", po); 497 pr_warn("dev_remove_offload: %p not found\n", po);
498 out: 498 out:
499 spin_unlock(&offload_lock); 499 spin_unlock(&offload_lock);
500 } 500 }
501 EXPORT_SYMBOL(__dev_remove_offload); 501 EXPORT_SYMBOL(__dev_remove_offload);
502 502
503 /** 503 /**
504 * dev_remove_offload - remove packet offload handler 504 * dev_remove_offload - remove packet offload handler
505 * @po: packet offload declaration 505 * @po: packet offload declaration
506 * 506 *
507 * Remove a packet offload handler that was previously added to the kernel 507 * Remove a packet offload handler that was previously added to the kernel
508 * offload handlers by dev_add_offload(). The passed &offload_type is 508 * offload handlers by dev_add_offload(). The passed &offload_type is
509 * removed from the kernel lists and can be freed or reused once this 509 * removed from the kernel lists and can be freed or reused once this
510 * function returns. 510 * function returns.
511 * 511 *
512 * This call sleeps to guarantee that no CPU is looking at the packet 512 * This call sleeps to guarantee that no CPU is looking at the packet
513 * type after return. 513 * type after return.
514 */ 514 */
515 void dev_remove_offload(struct packet_offload *po) 515 void dev_remove_offload(struct packet_offload *po)
516 { 516 {
517 __dev_remove_offload(po); 517 __dev_remove_offload(po);
518 518
519 synchronize_net(); 519 synchronize_net();
520 } 520 }
521 EXPORT_SYMBOL(dev_remove_offload); 521 EXPORT_SYMBOL(dev_remove_offload);
522 522
523 /****************************************************************************** 523 /******************************************************************************
524 524
525 Device Boot-time Settings Routines 525 Device Boot-time Settings Routines
526 526
527 *******************************************************************************/ 527 *******************************************************************************/
528 528
529 /* Boot time configuration table */ 529 /* Boot time configuration table */
530 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; 530 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
531 531
532 /** 532 /**
533 * netdev_boot_setup_add - add new setup entry 533 * netdev_boot_setup_add - add new setup entry
534 * @name: name of the device 534 * @name: name of the device
535 * @map: configured settings for the device 535 * @map: configured settings for the device
536 * 536 *
537 * Adds new setup entry to the dev_boot_setup list. The function 537 * Adds new setup entry to the dev_boot_setup list. The function
538 * returns 0 on error and 1 on success. This is a generic routine to 538 * returns 0 on error and 1 on success. This is a generic routine to
539 * all netdevices. 539 * all netdevices.
540 */ 540 */
541 static int netdev_boot_setup_add(char *name, struct ifmap *map) 541 static int netdev_boot_setup_add(char *name, struct ifmap *map)
542 { 542 {
543 struct netdev_boot_setup *s; 543 struct netdev_boot_setup *s;
544 int i; 544 int i;
545 545
546 s = dev_boot_setup; 546 s = dev_boot_setup;
547 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { 547 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
548 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { 548 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
549 memset(s[i].name, 0, sizeof(s[i].name)); 549 memset(s[i].name, 0, sizeof(s[i].name));
550 strlcpy(s[i].name, name, IFNAMSIZ); 550 strlcpy(s[i].name, name, IFNAMSIZ);
551 memcpy(&s[i].map, map, sizeof(s[i].map)); 551 memcpy(&s[i].map, map, sizeof(s[i].map));
552 break; 552 break;
553 } 553 }
554 } 554 }
555 555
556 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; 556 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
557 } 557 }
558 558
559 /** 559 /**
560 * netdev_boot_setup_check - check boot time settings 560 * netdev_boot_setup_check - check boot time settings
561 * @dev: the netdevice 561 * @dev: the netdevice
562 * 562 *
563 * Check boot time settings for the device. 563 * Check boot time settings for the device.
564 * The found settings are set for the device to be used 564 * The found settings are set for the device to be used
565 * later in the device probing. 565 * later in the device probing.
566 * Returns 0 if no settings found, 1 if they are. 566 * Returns 0 if no settings found, 1 if they are.
567 */ 567 */
568 int netdev_boot_setup_check(struct net_device *dev) 568 int netdev_boot_setup_check(struct net_device *dev)
569 { 569 {
570 struct netdev_boot_setup *s = dev_boot_setup; 570 struct netdev_boot_setup *s = dev_boot_setup;
571 int i; 571 int i;
572 572
573 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { 573 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
574 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && 574 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
575 !strcmp(dev->name, s[i].name)) { 575 !strcmp(dev->name, s[i].name)) {
576 dev->irq = s[i].map.irq; 576 dev->irq = s[i].map.irq;
577 dev->base_addr = s[i].map.base_addr; 577 dev->base_addr = s[i].map.base_addr;
578 dev->mem_start = s[i].map.mem_start; 578 dev->mem_start = s[i].map.mem_start;
579 dev->mem_end = s[i].map.mem_end; 579 dev->mem_end = s[i].map.mem_end;
580 return 1; 580 return 1;
581 } 581 }
582 } 582 }
583 return 0; 583 return 0;
584 } 584 }
585 EXPORT_SYMBOL(netdev_boot_setup_check); 585 EXPORT_SYMBOL(netdev_boot_setup_check);
586 586
587 587
588 /** 588 /**
589 * netdev_boot_base - get address from boot time settings 589 * netdev_boot_base - get address from boot time settings
590 * @prefix: prefix for network device 590 * @prefix: prefix for network device
591 * @unit: id for network device 591 * @unit: id for network device
592 * 592 *
593 * Check boot time settings for the base address of device. 593 * Check boot time settings for the base address of device.
594 * The found settings are set for the device to be used 594 * The found settings are set for the device to be used
595 * later in the device probing. 595 * later in the device probing.
596 * Returns 0 if no settings found. 596 * Returns 0 if no settings found.
597 */ 597 */
598 unsigned long netdev_boot_base(const char *prefix, int unit) 598 unsigned long netdev_boot_base(const char *prefix, int unit)
599 { 599 {
600 const struct netdev_boot_setup *s = dev_boot_setup; 600 const struct netdev_boot_setup *s = dev_boot_setup;
601 char name[IFNAMSIZ]; 601 char name[IFNAMSIZ];
602 int i; 602 int i;
603 603
604 sprintf(name, "%s%d", prefix, unit); 604 sprintf(name, "%s%d", prefix, unit);
605 605
606 /* 606 /*
607 * If device already registered then return base of 1 607 * If device already registered then return base of 1
608 * to indicate not to probe for this interface 608 * to indicate not to probe for this interface
609 */ 609 */
610 if (__dev_get_by_name(&init_net, name)) 610 if (__dev_get_by_name(&init_net, name))
611 return 1; 611 return 1;
612 612
613 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) 613 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
614 if (!strcmp(name, s[i].name)) 614 if (!strcmp(name, s[i].name))
615 return s[i].map.base_addr; 615 return s[i].map.base_addr;
616 return 0; 616 return 0;
617 } 617 }
618 618
619 /* 619 /*
620 * Saves at boot time configured settings for any netdevice. 620 * Saves at boot time configured settings for any netdevice.
621 */ 621 */
622 int __init netdev_boot_setup(char *str) 622 int __init netdev_boot_setup(char *str)
623 { 623 {
624 int ints[5]; 624 int ints[5];
625 struct ifmap map; 625 struct ifmap map;
626 626
627 str = get_options(str, ARRAY_SIZE(ints), ints); 627 str = get_options(str, ARRAY_SIZE(ints), ints);
628 if (!str || !*str) 628 if (!str || !*str)
629 return 0; 629 return 0;
630 630
631 /* Save settings */ 631 /* Save settings */
632 memset(&map, 0, sizeof(map)); 632 memset(&map, 0, sizeof(map));
633 if (ints[0] > 0) 633 if (ints[0] > 0)
634 map.irq = ints[1]; 634 map.irq = ints[1];
635 if (ints[0] > 1) 635 if (ints[0] > 1)
636 map.base_addr = ints[2]; 636 map.base_addr = ints[2];
637 if (ints[0] > 2) 637 if (ints[0] > 2)
638 map.mem_start = ints[3]; 638 map.mem_start = ints[3];
639 if (ints[0] > 3) 639 if (ints[0] > 3)
640 map.mem_end = ints[4]; 640 map.mem_end = ints[4];
641 641
642 /* Add new entry to the list */ 642 /* Add new entry to the list */
643 return netdev_boot_setup_add(str, &map); 643 return netdev_boot_setup_add(str, &map);
644 } 644 }
645 645
646 __setup("netdev=", netdev_boot_setup); 646 __setup("netdev=", netdev_boot_setup);
647 647
648 /******************************************************************************* 648 /*******************************************************************************
649 649
650 Device Interface Subroutines 650 Device Interface Subroutines
651 651
652 *******************************************************************************/ 652 *******************************************************************************/
653 653
654 /** 654 /**
655 * __dev_get_by_name - find a device by its name 655 * __dev_get_by_name - find a device by its name
656 * @net: the applicable net namespace 656 * @net: the applicable net namespace
657 * @name: name to find 657 * @name: name to find
658 * 658 *
659 * Find an interface by name. Must be called under RTNL semaphore 659 * Find an interface by name. Must be called under RTNL semaphore
660 * or @dev_base_lock. If the name is found a pointer to the device 660 * or @dev_base_lock. If the name is found a pointer to the device
661 * is returned. If the name is not found then %NULL is returned. The 661 * is returned. If the name is not found then %NULL is returned. The
662 * reference counters are not incremented so the caller must be 662 * reference counters are not incremented so the caller must be
663 * careful with locks. 663 * careful with locks.
664 */ 664 */
665 665
666 struct net_device *__dev_get_by_name(struct net *net, const char *name) 666 struct net_device *__dev_get_by_name(struct net *net, const char *name)
667 { 667 {
668 struct net_device *dev; 668 struct net_device *dev;
669 struct hlist_head *head = dev_name_hash(net, name); 669 struct hlist_head *head = dev_name_hash(net, name);
670 670
671 hlist_for_each_entry(dev, head, name_hlist) 671 hlist_for_each_entry(dev, head, name_hlist)
672 if (!strncmp(dev->name, name, IFNAMSIZ)) 672 if (!strncmp(dev->name, name, IFNAMSIZ))
673 return dev; 673 return dev;
674 674
675 return NULL; 675 return NULL;
676 } 676 }
677 EXPORT_SYMBOL(__dev_get_by_name); 677 EXPORT_SYMBOL(__dev_get_by_name);
678 678
679 /** 679 /**
680 * dev_get_by_name_rcu - find a device by its name 680 * dev_get_by_name_rcu - find a device by its name
681 * @net: the applicable net namespace 681 * @net: the applicable net namespace
682 * @name: name to find 682 * @name: name to find
683 * 683 *
684 * Find an interface by name. 684 * Find an interface by name.
685 * If the name is found a pointer to the device is returned. 685 * If the name is found a pointer to the device is returned.
686 * If the name is not found then %NULL is returned. 686 * If the name is not found then %NULL is returned.
687 * The reference counters are not incremented so the caller must be 687 * The reference counters are not incremented so the caller must be
688 * careful with locks. The caller must hold RCU lock. 688 * careful with locks. The caller must hold RCU lock.
689 */ 689 */
690 690
691 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) 691 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
692 { 692 {
693 struct net_device *dev; 693 struct net_device *dev;
694 struct hlist_head *head = dev_name_hash(net, name); 694 struct hlist_head *head = dev_name_hash(net, name);
695 695
696 hlist_for_each_entry_rcu(dev, head, name_hlist) 696 hlist_for_each_entry_rcu(dev, head, name_hlist)
697 if (!strncmp(dev->name, name, IFNAMSIZ)) 697 if (!strncmp(dev->name, name, IFNAMSIZ))
698 return dev; 698 return dev;
699 699
700 return NULL; 700 return NULL;
701 } 701 }
702 EXPORT_SYMBOL(dev_get_by_name_rcu); 702 EXPORT_SYMBOL(dev_get_by_name_rcu);
703 703
704 /** 704 /**
705 * dev_get_by_name - find a device by its name 705 * dev_get_by_name - find a device by its name
706 * @net: the applicable net namespace 706 * @net: the applicable net namespace
707 * @name: name to find 707 * @name: name to find
708 * 708 *
709 * Find an interface by name. This can be called from any 709 * Find an interface by name. This can be called from any
710 * context and does its own locking. The returned handle has 710 * context and does its own locking. The returned handle has
711 * the usage count incremented and the caller must use dev_put() to 711 * the usage count incremented and the caller must use dev_put() to
712 * release it when it is no longer needed. %NULL is returned if no 712 * release it when it is no longer needed. %NULL is returned if no
713 * matching device is found. 713 * matching device is found.
714 */ 714 */
715 715
716 struct net_device *dev_get_by_name(struct net *net, const char *name) 716 struct net_device *dev_get_by_name(struct net *net, const char *name)
717 { 717 {
718 struct net_device *dev; 718 struct net_device *dev;
719 719
720 rcu_read_lock(); 720 rcu_read_lock();
721 dev = dev_get_by_name_rcu(net, name); 721 dev = dev_get_by_name_rcu(net, name);
722 if (dev) 722 if (dev)
723 dev_hold(dev); 723 dev_hold(dev);
724 rcu_read_unlock(); 724 rcu_read_unlock();
725 return dev; 725 return dev;
726 } 726 }
727 EXPORT_SYMBOL(dev_get_by_name); 727 EXPORT_SYMBOL(dev_get_by_name);
728 728
729 /** 729 /**
730 * __dev_get_by_index - find a device by its ifindex 730 * __dev_get_by_index - find a device by its ifindex
731 * @net: the applicable net namespace 731 * @net: the applicable net namespace
732 * @ifindex: index of device 732 * @ifindex: index of device
733 * 733 *
734 * Search for an interface by index. Returns %NULL if the device 734 * Search for an interface by index. Returns %NULL if the device
735 * is not found or a pointer to the device. The device has not 735 * is not found or a pointer to the device. The device has not
736 * had its reference counter increased so the caller must be careful 736 * had its reference counter increased so the caller must be careful
737 * about locking. The caller must hold either the RTNL semaphore 737 * about locking. The caller must hold either the RTNL semaphore
738 * or @dev_base_lock. 738 * or @dev_base_lock.
739 */ 739 */
740 740
741 struct net_device *__dev_get_by_index(struct net *net, int ifindex) 741 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
742 { 742 {
743 struct net_device *dev; 743 struct net_device *dev;
744 struct hlist_head *head = dev_index_hash(net, ifindex); 744 struct hlist_head *head = dev_index_hash(net, ifindex);
745 745
746 hlist_for_each_entry(dev, head, index_hlist) 746 hlist_for_each_entry(dev, head, index_hlist)
747 if (dev->ifindex == ifindex) 747 if (dev->ifindex == ifindex)
748 return dev; 748 return dev;
749 749
750 return NULL; 750 return NULL;
751 } 751 }
752 EXPORT_SYMBOL(__dev_get_by_index); 752 EXPORT_SYMBOL(__dev_get_by_index);
753 753
754 /** 754 /**
755 * dev_get_by_index_rcu - find a device by its ifindex 755 * dev_get_by_index_rcu - find a device by its ifindex
756 * @net: the applicable net namespace 756 * @net: the applicable net namespace
757 * @ifindex: index of device 757 * @ifindex: index of device
758 * 758 *
759 * Search for an interface by index. Returns %NULL if the device 759 * Search for an interface by index. Returns %NULL if the device
760 * is not found or a pointer to the device. The device has not 760 * is not found or a pointer to the device. The device has not
761 * had its reference counter increased so the caller must be careful 761 * had its reference counter increased so the caller must be careful
762 * about locking. The caller must hold RCU lock. 762 * about locking. The caller must hold RCU lock.
763 */ 763 */
764 764
765 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) 765 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
766 { 766 {
767 struct net_device *dev; 767 struct net_device *dev;
768 struct hlist_head *head = dev_index_hash(net, ifindex); 768 struct hlist_head *head = dev_index_hash(net, ifindex);
769 769
770 hlist_for_each_entry_rcu(dev, head, index_hlist) 770 hlist_for_each_entry_rcu(dev, head, index_hlist)
771 if (dev->ifindex == ifindex) 771 if (dev->ifindex == ifindex)
772 return dev; 772 return dev;
773 773
774 return NULL; 774 return NULL;
775 } 775 }
776 EXPORT_SYMBOL(dev_get_by_index_rcu); 776 EXPORT_SYMBOL(dev_get_by_index_rcu);
777 777
778 778
779 /** 779 /**
780 * dev_get_by_index - find a device by its ifindex 780 * dev_get_by_index - find a device by its ifindex
781 * @net: the applicable net namespace 781 * @net: the applicable net namespace
782 * @ifindex: index of device 782 * @ifindex: index of device
783 * 783 *
784 * Search for an interface by index. Returns NULL if the device 784 * Search for an interface by index. Returns NULL if the device
785 * is not found or a pointer to the device. The device returned has 785 * is not found or a pointer to the device. The device returned has
786 * had a reference added and the pointer is safe until the user calls 786 * had a reference added and the pointer is safe until the user calls
787 * dev_put to indicate they have finished with it. 787 * dev_put to indicate they have finished with it.
788 */ 788 */
789 789
790 struct net_device *dev_get_by_index(struct net *net, int ifindex) 790 struct net_device *dev_get_by_index(struct net *net, int ifindex)
791 { 791 {
792 struct net_device *dev; 792 struct net_device *dev;
793 793
794 rcu_read_lock(); 794 rcu_read_lock();
795 dev = dev_get_by_index_rcu(net, ifindex); 795 dev = dev_get_by_index_rcu(net, ifindex);
796 if (dev) 796 if (dev)
797 dev_hold(dev); 797 dev_hold(dev);
798 rcu_read_unlock(); 798 rcu_read_unlock();
799 return dev; 799 return dev;
800 } 800 }
801 EXPORT_SYMBOL(dev_get_by_index); 801 EXPORT_SYMBOL(dev_get_by_index);
802 802
803 /** 803 /**
804 * netdev_get_name - get a netdevice name, knowing its ifindex. 804 * netdev_get_name - get a netdevice name, knowing its ifindex.
805 * @net: network namespace 805 * @net: network namespace
806 * @name: a pointer to the buffer where the name will be stored. 806 * @name: a pointer to the buffer where the name will be stored.
807 * @ifindex: the ifindex of the interface to get the name from. 807 * @ifindex: the ifindex of the interface to get the name from.
808 * 808 *
809 * The use of raw_seqcount_begin() and cond_resched() before 809 * The use of raw_seqcount_begin() and cond_resched() before
810 * retrying is required as we want to give the writers a chance 810 * retrying is required as we want to give the writers a chance
811 * to complete when CONFIG_PREEMPT is not set. 811 * to complete when CONFIG_PREEMPT is not set.
812 */ 812 */
813 int netdev_get_name(struct net *net, char *name, int ifindex) 813 int netdev_get_name(struct net *net, char *name, int ifindex)
814 { 814 {
815 struct net_device *dev; 815 struct net_device *dev;
816 unsigned int seq; 816 unsigned int seq;
817 817
818 retry: 818 retry:
819 seq = raw_seqcount_begin(&devnet_rename_seq); 819 seq = raw_seqcount_begin(&devnet_rename_seq);
820 rcu_read_lock(); 820 rcu_read_lock();
821 dev = dev_get_by_index_rcu(net, ifindex); 821 dev = dev_get_by_index_rcu(net, ifindex);
822 if (!dev) { 822 if (!dev) {
823 rcu_read_unlock(); 823 rcu_read_unlock();
824 return -ENODEV; 824 return -ENODEV;
825 } 825 }
826 826
827 strcpy(name, dev->name); 827 strcpy(name, dev->name);
828 rcu_read_unlock(); 828 rcu_read_unlock();
829 if (read_seqcount_retry(&devnet_rename_seq, seq)) { 829 if (read_seqcount_retry(&devnet_rename_seq, seq)) {
830 cond_resched(); 830 cond_resched();
831 goto retry; 831 goto retry;
832 } 832 }
833 833
834 return 0; 834 return 0;
835 } 835 }
836 836
837 /** 837 /**
838 * dev_getbyhwaddr_rcu - find a device by its hardware address 838 * dev_getbyhwaddr_rcu - find a device by its hardware address
839 * @net: the applicable net namespace 839 * @net: the applicable net namespace
840 * @type: media type of device 840 * @type: media type of device
841 * @ha: hardware address 841 * @ha: hardware address
842 * 842 *
843 * Search for an interface by MAC address. Returns NULL if the device 843 * Search for an interface by MAC address. Returns NULL if the device
844 * is not found or a pointer to the device. 844 * is not found or a pointer to the device.
845 * The caller must hold RCU or RTNL. 845 * The caller must hold RCU or RTNL.
846 * The returned device has not had its ref count increased 846 * The returned device has not had its ref count increased
847 * and the caller must therefore be careful about locking 847 * and the caller must therefore be careful about locking
848 * 848 *
849 */ 849 */
850 850
851 struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, 851 struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
852 const char *ha) 852 const char *ha)
853 { 853 {
854 struct net_device *dev; 854 struct net_device *dev;
855 855
856 for_each_netdev_rcu(net, dev) 856 for_each_netdev_rcu(net, dev)
857 if (dev->type == type && 857 if (dev->type == type &&
858 !memcmp(dev->dev_addr, ha, dev->addr_len)) 858 !memcmp(dev->dev_addr, ha, dev->addr_len))
859 return dev; 859 return dev;
860 860
861 return NULL; 861 return NULL;
862 } 862 }
863 EXPORT_SYMBOL(dev_getbyhwaddr_rcu); 863 EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
864 864
865 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) 865 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
866 { 866 {
867 struct net_device *dev; 867 struct net_device *dev;
868 868
869 ASSERT_RTNL(); 869 ASSERT_RTNL();
870 for_each_netdev(net, dev) 870 for_each_netdev(net, dev)
871 if (dev->type == type) 871 if (dev->type == type)
872 return dev; 872 return dev;
873 873
874 return NULL; 874 return NULL;
875 } 875 }
876 EXPORT_SYMBOL(__dev_getfirstbyhwtype); 876 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
877 877
878 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) 878 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
879 { 879 {
880 struct net_device *dev, *ret = NULL; 880 struct net_device *dev, *ret = NULL;
881 881
882 rcu_read_lock(); 882 rcu_read_lock();
883 for_each_netdev_rcu(net, dev) 883 for_each_netdev_rcu(net, dev)
884 if (dev->type == type) { 884 if (dev->type == type) {
885 dev_hold(dev); 885 dev_hold(dev);
886 ret = dev; 886 ret = dev;
887 break; 887 break;
888 } 888 }
889 rcu_read_unlock(); 889 rcu_read_unlock();
890 return ret; 890 return ret;
891 } 891 }
892 EXPORT_SYMBOL(dev_getfirstbyhwtype); 892 EXPORT_SYMBOL(dev_getfirstbyhwtype);
893 893
894 /** 894 /**
895 * dev_get_by_flags_rcu - find any device with given flags 895 * dev_get_by_flags_rcu - find any device with given flags
896 * @net: the applicable net namespace 896 * @net: the applicable net namespace
897 * @if_flags: IFF_* values 897 * @if_flags: IFF_* values
898 * @mask: bitmask of bits in if_flags to check 898 * @mask: bitmask of bits in if_flags to check
899 * 899 *
900 * Search for any interface with the given flags. Returns NULL if a device 900 * Search for any interface with the given flags. Returns NULL if a device
901 * is not found or a pointer to the device. Must be called inside 901 * is not found or a pointer to the device. Must be called inside
902 * rcu_read_lock(), and result refcount is unchanged. 902 * rcu_read_lock(), and result refcount is unchanged.
903 */ 903 */
904 904
905 struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags, 905 struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
906 unsigned short mask) 906 unsigned short mask)
907 { 907 {
908 struct net_device *dev, *ret; 908 struct net_device *dev, *ret;
909 909
910 ret = NULL; 910 ret = NULL;
911 for_each_netdev_rcu(net, dev) { 911 for_each_netdev_rcu(net, dev) {
912 if (((dev->flags ^ if_flags) & mask) == 0) { 912 if (((dev->flags ^ if_flags) & mask) == 0) {
913 ret = dev; 913 ret = dev;
914 break; 914 break;
915 } 915 }
916 } 916 }
917 return ret; 917 return ret;
918 } 918 }
919 EXPORT_SYMBOL(dev_get_by_flags_rcu); 919 EXPORT_SYMBOL(dev_get_by_flags_rcu);
920 920
921 /** 921 /**
922 * dev_valid_name - check if name is okay for network device 922 * dev_valid_name - check if name is okay for network device
923 * @name: name string 923 * @name: name string
924 * 924 *
925 * Network device names need to be valid file names to 925 * Network device names need to be valid file names to
926 * to allow sysfs to work. We also disallow any kind of 926 * to allow sysfs to work. We also disallow any kind of
927 * whitespace. 927 * whitespace.
928 */ 928 */
929 bool dev_valid_name(const char *name) 929 bool dev_valid_name(const char *name)
930 { 930 {
931 if (*name == '\0') 931 if (*name == '\0')
932 return false; 932 return false;
933 if (strlen(name) >= IFNAMSIZ) 933 if (strlen(name) >= IFNAMSIZ)
934 return false; 934 return false;
935 if (!strcmp(name, ".") || !strcmp(name, "..")) 935 if (!strcmp(name, ".") || !strcmp(name, ".."))
936 return false; 936 return false;
937 937
938 while (*name) { 938 while (*name) {
939 if (*name == '/' || isspace(*name)) 939 if (*name == '/' || isspace(*name))
940 return false; 940 return false;
941 name++; 941 name++;
942 } 942 }
943 return true; 943 return true;
944 } 944 }
945 EXPORT_SYMBOL(dev_valid_name); 945 EXPORT_SYMBOL(dev_valid_name);
946 946
947 /** 947 /**
948 * __dev_alloc_name - allocate a name for a device 948 * __dev_alloc_name - allocate a name for a device
949 * @net: network namespace to allocate the device name in 949 * @net: network namespace to allocate the device name in
950 * @name: name format string 950 * @name: name format string
951 * @buf: scratch buffer and result name string 951 * @buf: scratch buffer and result name string
952 * 952 *
953 * Passed a format string - eg "lt%d" it will try and find a suitable 953 * Passed a format string - eg "lt%d" it will try and find a suitable
954 * id. It scans list of devices to build up a free map, then chooses 954 * id. It scans list of devices to build up a free map, then chooses
955 * the first empty slot. The caller must hold the dev_base or rtnl lock 955 * the first empty slot. The caller must hold the dev_base or rtnl lock
956 * while allocating the name and adding the device in order to avoid 956 * while allocating the name and adding the device in order to avoid
957 * duplicates. 957 * duplicates.
958 * Limited to bits_per_byte * page size devices (ie 32K on most platforms). 958 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
959 * Returns the number of the unit assigned or a negative errno code. 959 * Returns the number of the unit assigned or a negative errno code.
960 */ 960 */
961 961
962 static int __dev_alloc_name(struct net *net, const char *name, char *buf) 962 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
963 { 963 {
964 int i = 0; 964 int i = 0;
965 const char *p; 965 const char *p;
966 const int max_netdevices = 8*PAGE_SIZE; 966 const int max_netdevices = 8*PAGE_SIZE;
967 unsigned long *inuse; 967 unsigned long *inuse;
968 struct net_device *d; 968 struct net_device *d;
969 969
970 p = strnchr(name, IFNAMSIZ-1, '%'); 970 p = strnchr(name, IFNAMSIZ-1, '%');
971 if (p) { 971 if (p) {
972 /* 972 /*
973 * Verify the string as this thing may have come from 973 * Verify the string as this thing may have come from
974 * the user. There must be either one "%d" and no other "%" 974 * the user. There must be either one "%d" and no other "%"
975 * characters. 975 * characters.
976 */ 976 */
977 if (p[1] != 'd' || strchr(p + 2, '%')) 977 if (p[1] != 'd' || strchr(p + 2, '%'))
978 return -EINVAL; 978 return -EINVAL;
979 979
980 /* Use one page as a bit array of possible slots */ 980 /* Use one page as a bit array of possible slots */
981 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC); 981 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
982 if (!inuse) 982 if (!inuse)
983 return -ENOMEM; 983 return -ENOMEM;
984 984
985 for_each_netdev(net, d) { 985 for_each_netdev(net, d) {
986 if (!sscanf(d->name, name, &i)) 986 if (!sscanf(d->name, name, &i))
987 continue; 987 continue;
988 if (i < 0 || i >= max_netdevices) 988 if (i < 0 || i >= max_netdevices)
989 continue; 989 continue;
990 990
991 /* avoid cases where sscanf is not exact inverse of printf */ 991 /* avoid cases where sscanf is not exact inverse of printf */
992 snprintf(buf, IFNAMSIZ, name, i); 992 snprintf(buf, IFNAMSIZ, name, i);
993 if (!strncmp(buf, d->name, IFNAMSIZ)) 993 if (!strncmp(buf, d->name, IFNAMSIZ))
994 set_bit(i, inuse); 994 set_bit(i, inuse);
995 } 995 }
996 996
997 i = find_first_zero_bit(inuse, max_netdevices); 997 i = find_first_zero_bit(inuse, max_netdevices);
998 free_page((unsigned long) inuse); 998 free_page((unsigned long) inuse);
999 } 999 }
1000 1000
1001 if (buf != name) 1001 if (buf != name)
1002 snprintf(buf, IFNAMSIZ, name, i); 1002 snprintf(buf, IFNAMSIZ, name, i);
1003 if (!__dev_get_by_name(net, buf)) 1003 if (!__dev_get_by_name(net, buf))
1004 return i; 1004 return i;
1005 1005
1006 /* It is possible to run out of possible slots 1006 /* It is possible to run out of possible slots
1007 * when the name is long and there isn't enough space left 1007 * when the name is long and there isn't enough space left
1008 * for the digits, or if all bits are used. 1008 * for the digits, or if all bits are used.
1009 */ 1009 */
1010 return -ENFILE; 1010 return -ENFILE;
1011 } 1011 }
1012 1012
1013 /** 1013 /**
1014 * dev_alloc_name - allocate a name for a device 1014 * dev_alloc_name - allocate a name for a device
1015 * @dev: device 1015 * @dev: device
1016 * @name: name format string 1016 * @name: name format string
1017 * 1017 *
1018 * Passed a format string - eg "lt%d" it will try and find a suitable 1018 * Passed a format string - eg "lt%d" it will try and find a suitable
1019 * id. It scans list of devices to build up a free map, then chooses 1019 * id. It scans list of devices to build up a free map, then chooses
1020 * the first empty slot. The caller must hold the dev_base or rtnl lock 1020 * the first empty slot. The caller must hold the dev_base or rtnl lock
1021 * while allocating the name and adding the device in order to avoid 1021 * while allocating the name and adding the device in order to avoid
1022 * duplicates. 1022 * duplicates.
1023 * Limited to bits_per_byte * page size devices (ie 32K on most platforms). 1023 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
1024 * Returns the number of the unit assigned or a negative errno code. 1024 * Returns the number of the unit assigned or a negative errno code.
1025 */ 1025 */
1026 1026
1027 int dev_alloc_name(struct net_device *dev, const char *name) 1027 int dev_alloc_name(struct net_device *dev, const char *name)
1028 { 1028 {
1029 char buf[IFNAMSIZ]; 1029 char buf[IFNAMSIZ];
1030 struct net *net; 1030 struct net *net;
1031 int ret; 1031 int ret;
1032 1032
1033 BUG_ON(!dev_net(dev)); 1033 BUG_ON(!dev_net(dev));
1034 net = dev_net(dev); 1034 net = dev_net(dev);
1035 ret = __dev_alloc_name(net, name, buf); 1035 ret = __dev_alloc_name(net, name, buf);
1036 if (ret >= 0) 1036 if (ret >= 0)
1037 strlcpy(dev->name, buf, IFNAMSIZ); 1037 strlcpy(dev->name, buf, IFNAMSIZ);
1038 return ret; 1038 return ret;
1039 } 1039 }
1040 EXPORT_SYMBOL(dev_alloc_name); 1040 EXPORT_SYMBOL(dev_alloc_name);
1041 1041
1042 static int dev_alloc_name_ns(struct net *net, 1042 static int dev_alloc_name_ns(struct net *net,
1043 struct net_device *dev, 1043 struct net_device *dev,
1044 const char *name) 1044 const char *name)
1045 { 1045 {
1046 char buf[IFNAMSIZ]; 1046 char buf[IFNAMSIZ];
1047 int ret; 1047 int ret;
1048 1048
1049 ret = __dev_alloc_name(net, name, buf); 1049 ret = __dev_alloc_name(net, name, buf);
1050 if (ret >= 0) 1050 if (ret >= 0)
1051 strlcpy(dev->name, buf, IFNAMSIZ); 1051 strlcpy(dev->name, buf, IFNAMSIZ);
1052 return ret; 1052 return ret;
1053 } 1053 }
1054 1054
1055 static int dev_get_valid_name(struct net *net, 1055 static int dev_get_valid_name(struct net *net,
1056 struct net_device *dev, 1056 struct net_device *dev,
1057 const char *name) 1057 const char *name)
1058 { 1058 {
1059 BUG_ON(!net); 1059 BUG_ON(!net);
1060 1060
1061 if (!dev_valid_name(name)) 1061 if (!dev_valid_name(name))
1062 return -EINVAL; 1062 return -EINVAL;
1063 1063
1064 if (strchr(name, '%')) 1064 if (strchr(name, '%'))
1065 return dev_alloc_name_ns(net, dev, name); 1065 return dev_alloc_name_ns(net, dev, name);
1066 else if (__dev_get_by_name(net, name)) 1066 else if (__dev_get_by_name(net, name))
1067 return -EEXIST; 1067 return -EEXIST;
1068 else if (dev->name != name) 1068 else if (dev->name != name)
1069 strlcpy(dev->name, name, IFNAMSIZ); 1069 strlcpy(dev->name, name, IFNAMSIZ);
1070 1070
1071 return 0; 1071 return 0;
1072 } 1072 }
1073 1073
1074 /** 1074 /**
1075 * dev_change_name - change name of a device 1075 * dev_change_name - change name of a device
1076 * @dev: device 1076 * @dev: device
1077 * @newname: name (or format string) must be at least IFNAMSIZ 1077 * @newname: name (or format string) must be at least IFNAMSIZ
1078 * 1078 *
1079 * Change name of a device, can pass format strings "eth%d". 1079 * Change name of a device, can pass format strings "eth%d".
1080 * for wildcarding. 1080 * for wildcarding.
1081 */ 1081 */
1082 int dev_change_name(struct net_device *dev, const char *newname) 1082 int dev_change_name(struct net_device *dev, const char *newname)
1083 { 1083 {
1084 char oldname[IFNAMSIZ]; 1084 char oldname[IFNAMSIZ];
1085 int err = 0; 1085 int err = 0;
1086 int ret; 1086 int ret;
1087 struct net *net; 1087 struct net *net;
1088 1088
1089 ASSERT_RTNL(); 1089 ASSERT_RTNL();
1090 BUG_ON(!dev_net(dev)); 1090 BUG_ON(!dev_net(dev));
1091 1091
1092 net = dev_net(dev); 1092 net = dev_net(dev);
1093 if (dev->flags & IFF_UP) 1093 if (dev->flags & IFF_UP)
1094 return -EBUSY; 1094 return -EBUSY;
1095 1095
1096 write_seqcount_begin(&devnet_rename_seq); 1096 write_seqcount_begin(&devnet_rename_seq);
1097 1097
1098 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { 1098 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1099 write_seqcount_end(&devnet_rename_seq); 1099 write_seqcount_end(&devnet_rename_seq);
1100 return 0; 1100 return 0;
1101 } 1101 }
1102 1102
1103 memcpy(oldname, dev->name, IFNAMSIZ); 1103 memcpy(oldname, dev->name, IFNAMSIZ);
1104 1104
1105 err = dev_get_valid_name(net, dev, newname); 1105 err = dev_get_valid_name(net, dev, newname);
1106 if (err < 0) { 1106 if (err < 0) {
1107 write_seqcount_end(&devnet_rename_seq); 1107 write_seqcount_end(&devnet_rename_seq);
1108 return err; 1108 return err;
1109 } 1109 }
1110 1110
1111 rollback: 1111 rollback:
1112 ret = device_rename(&dev->dev, dev->name); 1112 ret = device_rename(&dev->dev, dev->name);
1113 if (ret) { 1113 if (ret) {
1114 memcpy(dev->name, oldname, IFNAMSIZ); 1114 memcpy(dev->name, oldname, IFNAMSIZ);
1115 write_seqcount_end(&devnet_rename_seq); 1115 write_seqcount_end(&devnet_rename_seq);
1116 return ret; 1116 return ret;
1117 } 1117 }
1118 1118
1119 write_seqcount_end(&devnet_rename_seq); 1119 write_seqcount_end(&devnet_rename_seq);
1120 1120
1121 write_lock_bh(&dev_base_lock); 1121 write_lock_bh(&dev_base_lock);
1122 hlist_del_rcu(&dev->name_hlist); 1122 hlist_del_rcu(&dev->name_hlist);
1123 write_unlock_bh(&dev_base_lock); 1123 write_unlock_bh(&dev_base_lock);
1124 1124
1125 synchronize_rcu(); 1125 synchronize_rcu();
1126 1126
1127 write_lock_bh(&dev_base_lock); 1127 write_lock_bh(&dev_base_lock);
1128 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); 1128 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1129 write_unlock_bh(&dev_base_lock); 1129 write_unlock_bh(&dev_base_lock);
1130 1130
1131 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); 1131 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1132 ret = notifier_to_errno(ret); 1132 ret = notifier_to_errno(ret);
1133 1133
1134 if (ret) { 1134 if (ret) {
1135 /* err >= 0 after dev_alloc_name() or stores the first errno */ 1135 /* err >= 0 after dev_alloc_name() or stores the first errno */
1136 if (err >= 0) { 1136 if (err >= 0) {
1137 err = ret; 1137 err = ret;
1138 write_seqcount_begin(&devnet_rename_seq); 1138 write_seqcount_begin(&devnet_rename_seq);
1139 memcpy(dev->name, oldname, IFNAMSIZ); 1139 memcpy(dev->name, oldname, IFNAMSIZ);
1140 goto rollback; 1140 goto rollback;
1141 } else { 1141 } else {
1142 pr_err("%s: name change rollback failed: %d\n", 1142 pr_err("%s: name change rollback failed: %d\n",
1143 dev->name, ret); 1143 dev->name, ret);
1144 } 1144 }
1145 } 1145 }
1146 1146
1147 return err; 1147 return err;
1148 } 1148 }
1149 1149
1150 /** 1150 /**
1151 * dev_set_alias - change ifalias of a device 1151 * dev_set_alias - change ifalias of a device
1152 * @dev: device 1152 * @dev: device
1153 * @alias: name up to IFALIASZ 1153 * @alias: name up to IFALIASZ
1154 * @len: limit of bytes to copy from info 1154 * @len: limit of bytes to copy from info
1155 * 1155 *
1156 * Set ifalias for a device, 1156 * Set ifalias for a device,
1157 */ 1157 */
1158 int dev_set_alias(struct net_device *dev, const char *alias, size_t len) 1158 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1159 { 1159 {
1160 char *new_ifalias; 1160 char *new_ifalias;
1161 1161
1162 ASSERT_RTNL(); 1162 ASSERT_RTNL();
1163 1163
1164 if (len >= IFALIASZ) 1164 if (len >= IFALIASZ)
1165 return -EINVAL; 1165 return -EINVAL;
1166 1166
1167 if (!len) { 1167 if (!len) {
1168 kfree(dev->ifalias); 1168 kfree(dev->ifalias);
1169 dev->ifalias = NULL; 1169 dev->ifalias = NULL;
1170 return 0; 1170 return 0;
1171 } 1171 }
1172 1172
1173 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); 1173 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1174 if (!new_ifalias) 1174 if (!new_ifalias)
1175 return -ENOMEM; 1175 return -ENOMEM;
1176 dev->ifalias = new_ifalias; 1176 dev->ifalias = new_ifalias;
1177 1177
1178 strlcpy(dev->ifalias, alias, len+1); 1178 strlcpy(dev->ifalias, alias, len+1);
1179 return len; 1179 return len;
1180 } 1180 }
1181 1181
1182 1182
1183 /** 1183 /**
1184 * netdev_features_change - device changes features 1184 * netdev_features_change - device changes features
1185 * @dev: device to cause notification 1185 * @dev: device to cause notification
1186 * 1186 *
1187 * Called to indicate a device has changed features. 1187 * Called to indicate a device has changed features.
1188 */ 1188 */
1189 void netdev_features_change(struct net_device *dev) 1189 void netdev_features_change(struct net_device *dev)
1190 { 1190 {
1191 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev); 1191 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1192 } 1192 }
1193 EXPORT_SYMBOL(netdev_features_change); 1193 EXPORT_SYMBOL(netdev_features_change);
1194 1194
1195 /** 1195 /**
1196 * netdev_state_change - device changes state 1196 * netdev_state_change - device changes state
1197 * @dev: device to cause notification 1197 * @dev: device to cause notification
1198 * 1198 *
1199 * Called to indicate a device has changed state. This function calls 1199 * Called to indicate a device has changed state. This function calls
1200 * the notifier chains for netdev_chain and sends a NEWLINK message 1200 * the notifier chains for netdev_chain and sends a NEWLINK message
1201 * to the routing socket. 1201 * to the routing socket.
1202 */ 1202 */
1203 void netdev_state_change(struct net_device *dev) 1203 void netdev_state_change(struct net_device *dev)
1204 { 1204 {
1205 if (dev->flags & IFF_UP) { 1205 if (dev->flags & IFF_UP) {
1206 call_netdevice_notifiers(NETDEV_CHANGE, dev); 1206 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1207 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); 1207 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
1208 } 1208 }
1209 } 1209 }
1210 EXPORT_SYMBOL(netdev_state_change); 1210 EXPORT_SYMBOL(netdev_state_change);
1211 1211
1212 /** 1212 /**
1213 * netdev_notify_peers - notify network peers about existence of @dev 1213 * netdev_notify_peers - notify network peers about existence of @dev
1214 * @dev: network device 1214 * @dev: network device
1215 * 1215 *
1216 * Generate traffic such that interested network peers are aware of 1216 * Generate traffic such that interested network peers are aware of
1217 * @dev, such as by generating a gratuitous ARP. This may be used when 1217 * @dev, such as by generating a gratuitous ARP. This may be used when
1218 * a device wants to inform the rest of the network about some sort of 1218 * a device wants to inform the rest of the network about some sort of
1219 * reconfiguration such as a failover event or virtual machine 1219 * reconfiguration such as a failover event or virtual machine
1220 * migration. 1220 * migration.
1221 */ 1221 */
1222 void netdev_notify_peers(struct net_device *dev) 1222 void netdev_notify_peers(struct net_device *dev)
1223 { 1223 {
1224 rtnl_lock(); 1224 rtnl_lock();
1225 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); 1225 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1226 rtnl_unlock(); 1226 rtnl_unlock();
1227 } 1227 }
1228 EXPORT_SYMBOL(netdev_notify_peers); 1228 EXPORT_SYMBOL(netdev_notify_peers);
1229 1229
1230 static int __dev_open(struct net_device *dev) 1230 static int __dev_open(struct net_device *dev)
1231 { 1231 {
1232 const struct net_device_ops *ops = dev->netdev_ops; 1232 const struct net_device_ops *ops = dev->netdev_ops;
1233 int ret; 1233 int ret;
1234 1234
1235 ASSERT_RTNL(); 1235 ASSERT_RTNL();
1236 1236
1237 if (!netif_device_present(dev)) 1237 if (!netif_device_present(dev))
1238 return -ENODEV; 1238 return -ENODEV;
1239 1239
1240 /* Block netpoll from trying to do any rx path servicing. 1240 /* Block netpoll from trying to do any rx path servicing.
1241 * If we don't do this there is a chance ndo_poll_controller 1241 * If we don't do this there is a chance ndo_poll_controller
1242 * or ndo_poll may be running while we open the device 1242 * or ndo_poll may be running while we open the device
1243 */ 1243 */
1244 netpoll_rx_disable(dev); 1244 netpoll_rx_disable(dev);
1245 1245
1246 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); 1246 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1247 ret = notifier_to_errno(ret); 1247 ret = notifier_to_errno(ret);
1248 if (ret) 1248 if (ret)
1249 return ret; 1249 return ret;
1250 1250
1251 set_bit(__LINK_STATE_START, &dev->state); 1251 set_bit(__LINK_STATE_START, &dev->state);
1252 1252
1253 if (ops->ndo_validate_addr) 1253 if (ops->ndo_validate_addr)
1254 ret = ops->ndo_validate_addr(dev); 1254 ret = ops->ndo_validate_addr(dev);
1255 1255
1256 if (!ret && ops->ndo_open) 1256 if (!ret && ops->ndo_open)
1257 ret = ops->ndo_open(dev); 1257 ret = ops->ndo_open(dev);
1258 1258
1259 netpoll_rx_enable(dev); 1259 netpoll_rx_enable(dev);
1260 1260
1261 if (ret) 1261 if (ret)
1262 clear_bit(__LINK_STATE_START, &dev->state); 1262 clear_bit(__LINK_STATE_START, &dev->state);
1263 else { 1263 else {
1264 dev->flags |= IFF_UP; 1264 dev->flags |= IFF_UP;
1265 net_dmaengine_get(); 1265 net_dmaengine_get();
1266 dev_set_rx_mode(dev); 1266 dev_set_rx_mode(dev);
1267 dev_activate(dev); 1267 dev_activate(dev);
1268 add_device_randomness(dev->dev_addr, dev->addr_len); 1268 add_device_randomness(dev->dev_addr, dev->addr_len);
1269 } 1269 }
1270 1270
1271 return ret; 1271 return ret;
1272 } 1272 }
1273 1273
1274 /** 1274 /**
1275 * dev_open - prepare an interface for use. 1275 * dev_open - prepare an interface for use.
1276 * @dev: device to open 1276 * @dev: device to open
1277 * 1277 *
1278 * Takes a device from down to up state. The device's private open 1278 * Takes a device from down to up state. The device's private open
1279 * function is invoked and then the multicast lists are loaded. Finally 1279 * function is invoked and then the multicast lists are loaded. Finally
1280 * the device is moved into the up state and a %NETDEV_UP message is 1280 * the device is moved into the up state and a %NETDEV_UP message is
1281 * sent to the netdev notifier chain. 1281 * sent to the netdev notifier chain.
1282 * 1282 *
1283 * Calling this function on an active interface is a nop. On a failure 1283 * Calling this function on an active interface is a nop. On a failure
1284 * a negative errno code is returned. 1284 * a negative errno code is returned.
1285 */ 1285 */
1286 int dev_open(struct net_device *dev) 1286 int dev_open(struct net_device *dev)
1287 { 1287 {
1288 int ret; 1288 int ret;
1289 1289
1290 if (dev->flags & IFF_UP) 1290 if (dev->flags & IFF_UP)
1291 return 0; 1291 return 0;
1292 1292
1293 ret = __dev_open(dev); 1293 ret = __dev_open(dev);
1294 if (ret < 0) 1294 if (ret < 0)
1295 return ret; 1295 return ret;
1296 1296
1297 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); 1297 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1298 call_netdevice_notifiers(NETDEV_UP, dev); 1298 call_netdevice_notifiers(NETDEV_UP, dev);
1299 1299
1300 return ret; 1300 return ret;
1301 } 1301 }
1302 EXPORT_SYMBOL(dev_open); 1302 EXPORT_SYMBOL(dev_open);
1303 1303
1304 static int __dev_close_many(struct list_head *head) 1304 static int __dev_close_many(struct list_head *head)
1305 { 1305 {
1306 struct net_device *dev; 1306 struct net_device *dev;
1307 1307
1308 ASSERT_RTNL(); 1308 ASSERT_RTNL();
1309 might_sleep(); 1309 might_sleep();
1310 1310
1311 list_for_each_entry(dev, head, close_list) { 1311 list_for_each_entry(dev, head, close_list) {
1312 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); 1312 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1313 1313
1314 clear_bit(__LINK_STATE_START, &dev->state); 1314 clear_bit(__LINK_STATE_START, &dev->state);
1315 1315
1316 /* Synchronize to scheduled poll. We cannot touch poll list, it 1316 /* Synchronize to scheduled poll. We cannot touch poll list, it
1317 * can be even on different cpu. So just clear netif_running(). 1317 * can be even on different cpu. So just clear netif_running().
1318 * 1318 *
1319 * dev->stop() will invoke napi_disable() on all of it's 1319 * dev->stop() will invoke napi_disable() on all of it's
1320 * napi_struct instances on this device. 1320 * napi_struct instances on this device.
1321 */ 1321 */
1322 smp_mb__after_clear_bit(); /* Commit netif_running(). */ 1322 smp_mb__after_clear_bit(); /* Commit netif_running(). */
1323 } 1323 }
1324 1324
1325 dev_deactivate_many(head); 1325 dev_deactivate_many(head);
1326 1326
1327 list_for_each_entry(dev, head, close_list) { 1327 list_for_each_entry(dev, head, close_list) {
1328 const struct net_device_ops *ops = dev->netdev_ops; 1328 const struct net_device_ops *ops = dev->netdev_ops;
1329 1329
1330 /* 1330 /*
1331 * Call the device specific close. This cannot fail. 1331 * Call the device specific close. This cannot fail.
1332 * Only if device is UP 1332 * Only if device is UP
1333 * 1333 *
1334 * We allow it to be called even after a DETACH hot-plug 1334 * We allow it to be called even after a DETACH hot-plug
1335 * event. 1335 * event.
1336 */ 1336 */
1337 if (ops->ndo_stop) 1337 if (ops->ndo_stop)
1338 ops->ndo_stop(dev); 1338 ops->ndo_stop(dev);
1339 1339
1340 dev->flags &= ~IFF_UP; 1340 dev->flags &= ~IFF_UP;
1341 net_dmaengine_put(); 1341 net_dmaengine_put();
1342 } 1342 }
1343 1343
1344 return 0; 1344 return 0;
1345 } 1345 }
1346 1346
1347 static int __dev_close(struct net_device *dev) 1347 static int __dev_close(struct net_device *dev)
1348 { 1348 {
1349 int retval; 1349 int retval;
1350 LIST_HEAD(single); 1350 LIST_HEAD(single);
1351 1351
1352 /* Temporarily disable netpoll until the interface is down */ 1352 /* Temporarily disable netpoll until the interface is down */
1353 netpoll_rx_disable(dev); 1353 netpoll_rx_disable(dev);
1354 1354
1355 list_add(&dev->close_list, &single); 1355 list_add(&dev->close_list, &single);
1356 retval = __dev_close_many(&single); 1356 retval = __dev_close_many(&single);
1357 list_del(&single); 1357 list_del(&single);
1358 1358
1359 netpoll_rx_enable(dev); 1359 netpoll_rx_enable(dev);
1360 return retval; 1360 return retval;
1361 } 1361 }
1362 1362
1363 static int dev_close_many(struct list_head *head) 1363 static int dev_close_many(struct list_head *head)
1364 { 1364 {
1365 struct net_device *dev, *tmp; 1365 struct net_device *dev, *tmp;
1366 1366
1367 /* Remove the devices that don't need to be closed */ 1367 /* Remove the devices that don't need to be closed */
1368 list_for_each_entry_safe(dev, tmp, head, close_list) 1368 list_for_each_entry_safe(dev, tmp, head, close_list)
1369 if (!(dev->flags & IFF_UP)) 1369 if (!(dev->flags & IFF_UP))
1370 list_del_init(&dev->close_list); 1370 list_del_init(&dev->close_list);
1371 1371
1372 __dev_close_many(head); 1372 __dev_close_many(head);
1373 1373
1374 list_for_each_entry_safe(dev, tmp, head, close_list) { 1374 list_for_each_entry_safe(dev, tmp, head, close_list) {
1375 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); 1375 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1376 call_netdevice_notifiers(NETDEV_DOWN, dev); 1376 call_netdevice_notifiers(NETDEV_DOWN, dev);
1377 list_del_init(&dev->close_list); 1377 list_del_init(&dev->close_list);
1378 } 1378 }
1379 1379
1380 return 0; 1380 return 0;
1381 } 1381 }
1382 1382
1383 /** 1383 /**
1384 * dev_close - shutdown an interface. 1384 * dev_close - shutdown an interface.
1385 * @dev: device to shutdown 1385 * @dev: device to shutdown
1386 * 1386 *
1387 * This function moves an active device into down state. A 1387 * This function moves an active device into down state. A
1388 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device 1388 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1389 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier 1389 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1390 * chain. 1390 * chain.
1391 */ 1391 */
1392 int dev_close(struct net_device *dev) 1392 int dev_close(struct net_device *dev)
1393 { 1393 {
1394 if (dev->flags & IFF_UP) { 1394 if (dev->flags & IFF_UP) {
1395 LIST_HEAD(single); 1395 LIST_HEAD(single);
1396 1396
1397 /* Block netpoll rx while the interface is going down */ 1397 /* Block netpoll rx while the interface is going down */
1398 netpoll_rx_disable(dev); 1398 netpoll_rx_disable(dev);
1399 1399
1400 list_add(&dev->close_list, &single); 1400 list_add(&dev->close_list, &single);
1401 dev_close_many(&single); 1401 dev_close_many(&single);
1402 list_del(&single); 1402 list_del(&single);
1403 1403
1404 netpoll_rx_enable(dev); 1404 netpoll_rx_enable(dev);
1405 } 1405 }
1406 return 0; 1406 return 0;
1407 } 1407 }
1408 EXPORT_SYMBOL(dev_close); 1408 EXPORT_SYMBOL(dev_close);
1409 1409
1410 1410
1411 /** 1411 /**
1412 * dev_disable_lro - disable Large Receive Offload on a device 1412 * dev_disable_lro - disable Large Receive Offload on a device
1413 * @dev: device 1413 * @dev: device
1414 * 1414 *
1415 * Disable Large Receive Offload (LRO) on a net device. Must be 1415 * Disable Large Receive Offload (LRO) on a net device. Must be
1416 * called under RTNL. This is needed if received packets may be 1416 * called under RTNL. This is needed if received packets may be
1417 * forwarded to another interface. 1417 * forwarded to another interface.
1418 */ 1418 */
1419 void dev_disable_lro(struct net_device *dev) 1419 void dev_disable_lro(struct net_device *dev)
1420 { 1420 {
1421 /* 1421 /*
1422 * If we're trying to disable lro on a vlan device 1422 * If we're trying to disable lro on a vlan device
1423 * use the underlying physical device instead 1423 * use the underlying physical device instead
1424 */ 1424 */
1425 if (is_vlan_dev(dev)) 1425 if (is_vlan_dev(dev))
1426 dev = vlan_dev_real_dev(dev); 1426 dev = vlan_dev_real_dev(dev);
1427 1427
1428 /* the same for macvlan devices */ 1428 /* the same for macvlan devices */
1429 if (netif_is_macvlan(dev)) 1429 if (netif_is_macvlan(dev))
1430 dev = macvlan_dev_real_dev(dev); 1430 dev = macvlan_dev_real_dev(dev);
1431 1431
1432 dev->wanted_features &= ~NETIF_F_LRO; 1432 dev->wanted_features &= ~NETIF_F_LRO;
1433 netdev_update_features(dev); 1433 netdev_update_features(dev);
1434 1434
1435 if (unlikely(dev->features & NETIF_F_LRO)) 1435 if (unlikely(dev->features & NETIF_F_LRO))
1436 netdev_WARN(dev, "failed to disable LRO!\n"); 1436 netdev_WARN(dev, "failed to disable LRO!\n");
1437 } 1437 }
1438 EXPORT_SYMBOL(dev_disable_lro); 1438 EXPORT_SYMBOL(dev_disable_lro);
1439 1439
1440 static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, 1440 static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1441 struct net_device *dev) 1441 struct net_device *dev)
1442 { 1442 {
1443 struct netdev_notifier_info info; 1443 struct netdev_notifier_info info;
1444 1444
1445 netdev_notifier_info_init(&info, dev); 1445 netdev_notifier_info_init(&info, dev);
1446 return nb->notifier_call(nb, val, &info); 1446 return nb->notifier_call(nb, val, &info);
1447 } 1447 }
1448 1448
1449 static int dev_boot_phase = 1; 1449 static int dev_boot_phase = 1;
1450 1450
1451 /** 1451 /**
1452 * register_netdevice_notifier - register a network notifier block 1452 * register_netdevice_notifier - register a network notifier block
1453 * @nb: notifier 1453 * @nb: notifier
1454 * 1454 *
1455 * Register a notifier to be called when network device events occur. 1455 * Register a notifier to be called when network device events occur.
1456 * The notifier passed is linked into the kernel structures and must 1456 * The notifier passed is linked into the kernel structures and must
1457 * not be reused until it has been unregistered. A negative errno code 1457 * not be reused until it has been unregistered. A negative errno code
1458 * is returned on a failure. 1458 * is returned on a failure.
1459 * 1459 *
1460 * When registered all registration and up events are replayed 1460 * When registered all registration and up events are replayed
1461 * to the new notifier to allow device to have a race free 1461 * to the new notifier to allow device to have a race free
1462 * view of the network device list. 1462 * view of the network device list.
1463 */ 1463 */
1464 1464
1465 int register_netdevice_notifier(struct notifier_block *nb) 1465 int register_netdevice_notifier(struct notifier_block *nb)
1466 { 1466 {
1467 struct net_device *dev; 1467 struct net_device *dev;
1468 struct net_device *last; 1468 struct net_device *last;
1469 struct net *net; 1469 struct net *net;
1470 int err; 1470 int err;
1471 1471
1472 rtnl_lock(); 1472 rtnl_lock();
1473 err = raw_notifier_chain_register(&netdev_chain, nb); 1473 err = raw_notifier_chain_register(&netdev_chain, nb);
1474 if (err) 1474 if (err)
1475 goto unlock; 1475 goto unlock;
1476 if (dev_boot_phase) 1476 if (dev_boot_phase)
1477 goto unlock; 1477 goto unlock;
1478 for_each_net(net) { 1478 for_each_net(net) {
1479 for_each_netdev(net, dev) { 1479 for_each_netdev(net, dev) {
1480 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); 1480 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
1481 err = notifier_to_errno(err); 1481 err = notifier_to_errno(err);
1482 if (err) 1482 if (err)
1483 goto rollback; 1483 goto rollback;
1484 1484
1485 if (!(dev->flags & IFF_UP)) 1485 if (!(dev->flags & IFF_UP))
1486 continue; 1486 continue;
1487 1487
1488 call_netdevice_notifier(nb, NETDEV_UP, dev); 1488 call_netdevice_notifier(nb, NETDEV_UP, dev);
1489 } 1489 }
1490 } 1490 }
1491 1491
1492 unlock: 1492 unlock:
1493 rtnl_unlock(); 1493 rtnl_unlock();
1494 return err; 1494 return err;
1495 1495
1496 rollback: 1496 rollback:
1497 last = dev; 1497 last = dev;
1498 for_each_net(net) { 1498 for_each_net(net) {
1499 for_each_netdev(net, dev) { 1499 for_each_netdev(net, dev) {
1500 if (dev == last) 1500 if (dev == last)
1501 goto outroll; 1501 goto outroll;
1502 1502
1503 if (dev->flags & IFF_UP) { 1503 if (dev->flags & IFF_UP) {
1504 call_netdevice_notifier(nb, NETDEV_GOING_DOWN, 1504 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1505 dev); 1505 dev);
1506 call_netdevice_notifier(nb, NETDEV_DOWN, dev); 1506 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1507 } 1507 }
1508 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); 1508 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1509 } 1509 }
1510 } 1510 }
1511 1511
1512 outroll: 1512 outroll:
1513 raw_notifier_chain_unregister(&netdev_chain, nb); 1513 raw_notifier_chain_unregister(&netdev_chain, nb);
1514 goto unlock; 1514 goto unlock;
1515 } 1515 }
1516 EXPORT_SYMBOL(register_netdevice_notifier); 1516 EXPORT_SYMBOL(register_netdevice_notifier);
1517 1517
1518 /** 1518 /**
1519 * unregister_netdevice_notifier - unregister a network notifier block 1519 * unregister_netdevice_notifier - unregister a network notifier block
1520 * @nb: notifier 1520 * @nb: notifier
1521 * 1521 *
1522 * Unregister a notifier previously registered by 1522 * Unregister a notifier previously registered by
1523 * register_netdevice_notifier(). The notifier is unlinked into the 1523 * register_netdevice_notifier(). The notifier is unlinked into the
1524 * kernel structures and may then be reused. A negative errno code 1524 * kernel structures and may then be reused. A negative errno code
1525 * is returned on a failure. 1525 * is returned on a failure.
1526 * 1526 *
1527 * After unregistering unregister and down device events are synthesized 1527 * After unregistering unregister and down device events are synthesized
1528 * for all devices on the device list to the removed notifier to remove 1528 * for all devices on the device list to the removed notifier to remove
1529 * the need for special case cleanup code. 1529 * the need for special case cleanup code.
1530 */ 1530 */
1531 1531
1532 int unregister_netdevice_notifier(struct notifier_block *nb) 1532 int unregister_netdevice_notifier(struct notifier_block *nb)
1533 { 1533 {
1534 struct net_device *dev; 1534 struct net_device *dev;
1535 struct net *net; 1535 struct net *net;
1536 int err; 1536 int err;
1537 1537
1538 rtnl_lock(); 1538 rtnl_lock();
1539 err = raw_notifier_chain_unregister(&netdev_chain, nb); 1539 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1540 if (err) 1540 if (err)
1541 goto unlock; 1541 goto unlock;
1542 1542
1543 for_each_net(net) { 1543 for_each_net(net) {
1544 for_each_netdev(net, dev) { 1544 for_each_netdev(net, dev) {
1545 if (dev->flags & IFF_UP) { 1545 if (dev->flags & IFF_UP) {
1546 call_netdevice_notifier(nb, NETDEV_GOING_DOWN, 1546 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1547 dev); 1547 dev);
1548 call_netdevice_notifier(nb, NETDEV_DOWN, dev); 1548 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1549 } 1549 }
1550 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); 1550 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1551 } 1551 }
1552 } 1552 }
1553 unlock: 1553 unlock:
1554 rtnl_unlock(); 1554 rtnl_unlock();
1555 return err; 1555 return err;
1556 } 1556 }
1557 EXPORT_SYMBOL(unregister_netdevice_notifier); 1557 EXPORT_SYMBOL(unregister_netdevice_notifier);
1558 1558
1559 /** 1559 /**
1560 * call_netdevice_notifiers_info - call all network notifier blocks 1560 * call_netdevice_notifiers_info - call all network notifier blocks
1561 * @val: value passed unmodified to notifier function 1561 * @val: value passed unmodified to notifier function
1562 * @dev: net_device pointer passed unmodified to notifier function 1562 * @dev: net_device pointer passed unmodified to notifier function
1563 * @info: notifier information data 1563 * @info: notifier information data
1564 * 1564 *
1565 * Call all network notifier blocks. Parameters and return value 1565 * Call all network notifier blocks. Parameters and return value
1566 * are as for raw_notifier_call_chain(). 1566 * are as for raw_notifier_call_chain().
1567 */ 1567 */
1568 1568
1569 int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, 1569 int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
1570 struct netdev_notifier_info *info) 1570 struct netdev_notifier_info *info)
1571 { 1571 {
1572 ASSERT_RTNL(); 1572 ASSERT_RTNL();
1573 netdev_notifier_info_init(info, dev); 1573 netdev_notifier_info_init(info, dev);
1574 return raw_notifier_call_chain(&netdev_chain, val, info); 1574 return raw_notifier_call_chain(&netdev_chain, val, info);
1575 } 1575 }
1576 EXPORT_SYMBOL(call_netdevice_notifiers_info); 1576 EXPORT_SYMBOL(call_netdevice_notifiers_info);
1577 1577
1578 /** 1578 /**
1579 * call_netdevice_notifiers - call all network notifier blocks 1579 * call_netdevice_notifiers - call all network notifier blocks
1580 * @val: value passed unmodified to notifier function 1580 * @val: value passed unmodified to notifier function
1581 * @dev: net_device pointer passed unmodified to notifier function 1581 * @dev: net_device pointer passed unmodified to notifier function
1582 * 1582 *
1583 * Call all network notifier blocks. Parameters and return value 1583 * Call all network notifier blocks. Parameters and return value
1584 * are as for raw_notifier_call_chain(). 1584 * are as for raw_notifier_call_chain().
1585 */ 1585 */
1586 1586
1587 int call_netdevice_notifiers(unsigned long val, struct net_device *dev) 1587 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1588 { 1588 {
1589 struct netdev_notifier_info info; 1589 struct netdev_notifier_info info;
1590 1590
1591 return call_netdevice_notifiers_info(val, dev, &info); 1591 return call_netdevice_notifiers_info(val, dev, &info);
1592 } 1592 }
1593 EXPORT_SYMBOL(call_netdevice_notifiers); 1593 EXPORT_SYMBOL(call_netdevice_notifiers);
1594 1594
1595 static struct static_key netstamp_needed __read_mostly; 1595 static struct static_key netstamp_needed __read_mostly;
1596 #ifdef HAVE_JUMP_LABEL 1596 #ifdef HAVE_JUMP_LABEL
1597 /* We are not allowed to call static_key_slow_dec() from irq context 1597 /* We are not allowed to call static_key_slow_dec() from irq context
1598 * If net_disable_timestamp() is called from irq context, defer the 1598 * If net_disable_timestamp() is called from irq context, defer the
1599 * static_key_slow_dec() calls. 1599 * static_key_slow_dec() calls.
1600 */ 1600 */
1601 static atomic_t netstamp_needed_deferred; 1601 static atomic_t netstamp_needed_deferred;
1602 #endif 1602 #endif
1603 1603
1604 void net_enable_timestamp(void) 1604 void net_enable_timestamp(void)
1605 { 1605 {
1606 #ifdef HAVE_JUMP_LABEL 1606 #ifdef HAVE_JUMP_LABEL
1607 int deferred = atomic_xchg(&netstamp_needed_deferred, 0); 1607 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1608 1608
1609 if (deferred) { 1609 if (deferred) {
1610 while (--deferred) 1610 while (--deferred)
1611 static_key_slow_dec(&netstamp_needed); 1611 static_key_slow_dec(&netstamp_needed);
1612 return; 1612 return;
1613 } 1613 }
1614 #endif 1614 #endif
1615 static_key_slow_inc(&netstamp_needed); 1615 static_key_slow_inc(&netstamp_needed);
1616 } 1616 }
1617 EXPORT_SYMBOL(net_enable_timestamp); 1617 EXPORT_SYMBOL(net_enable_timestamp);
1618 1618
1619 void net_disable_timestamp(void) 1619 void net_disable_timestamp(void)
1620 { 1620 {
1621 #ifdef HAVE_JUMP_LABEL 1621 #ifdef HAVE_JUMP_LABEL
1622 if (in_interrupt()) { 1622 if (in_interrupt()) {
1623 atomic_inc(&netstamp_needed_deferred); 1623 atomic_inc(&netstamp_needed_deferred);
1624 return; 1624 return;
1625 } 1625 }
1626 #endif 1626 #endif
1627 static_key_slow_dec(&netstamp_needed); 1627 static_key_slow_dec(&netstamp_needed);
1628 } 1628 }
1629 EXPORT_SYMBOL(net_disable_timestamp); 1629 EXPORT_SYMBOL(net_disable_timestamp);
1630 1630
1631 static inline void net_timestamp_set(struct sk_buff *skb) 1631 static inline void net_timestamp_set(struct sk_buff *skb)
1632 { 1632 {
1633 skb->tstamp.tv64 = 0; 1633 skb->tstamp.tv64 = 0;
1634 if (static_key_false(&netstamp_needed)) 1634 if (static_key_false(&netstamp_needed))
1635 __net_timestamp(skb); 1635 __net_timestamp(skb);
1636 } 1636 }
1637 1637
1638 #define net_timestamp_check(COND, SKB) \ 1638 #define net_timestamp_check(COND, SKB) \
1639 if (static_key_false(&netstamp_needed)) { \ 1639 if (static_key_false(&netstamp_needed)) { \
1640 if ((COND) && !(SKB)->tstamp.tv64) \ 1640 if ((COND) && !(SKB)->tstamp.tv64) \
1641 __net_timestamp(SKB); \ 1641 __net_timestamp(SKB); \
1642 } \ 1642 } \
1643 1643
1644 static inline bool is_skb_forwardable(struct net_device *dev, 1644 static inline bool is_skb_forwardable(struct net_device *dev,
1645 struct sk_buff *skb) 1645 struct sk_buff *skb)
1646 { 1646 {
1647 unsigned int len; 1647 unsigned int len;
1648 1648
1649 if (!(dev->flags & IFF_UP)) 1649 if (!(dev->flags & IFF_UP))
1650 return false; 1650 return false;
1651 1651
1652 len = dev->mtu + dev->hard_header_len + VLAN_HLEN; 1652 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1653 if (skb->len <= len) 1653 if (skb->len <= len)
1654 return true; 1654 return true;
1655 1655
1656 /* if TSO is enabled, we don't care about the length as the packet 1656 /* if TSO is enabled, we don't care about the length as the packet
1657 * could be forwarded without being segmented before 1657 * could be forwarded without being segmented before
1658 */ 1658 */
1659 if (skb_is_gso(skb)) 1659 if (skb_is_gso(skb))
1660 return true; 1660 return true;
1661 1661
1662 return false; 1662 return false;
1663 } 1663 }
1664 1664
1665 /** 1665 /**
1666 * dev_forward_skb - loopback an skb to another netif 1666 * dev_forward_skb - loopback an skb to another netif
1667 * 1667 *
1668 * @dev: destination network device 1668 * @dev: destination network device
1669 * @skb: buffer to forward 1669 * @skb: buffer to forward
1670 * 1670 *
1671 * return values: 1671 * return values:
1672 * NET_RX_SUCCESS (no congestion) 1672 * NET_RX_SUCCESS (no congestion)
1673 * NET_RX_DROP (packet was dropped, but freed) 1673 * NET_RX_DROP (packet was dropped, but freed)
1674 * 1674 *
1675 * dev_forward_skb can be used for injecting an skb from the 1675 * dev_forward_skb can be used for injecting an skb from the
1676 * start_xmit function of one device into the receive queue 1676 * start_xmit function of one device into the receive queue
1677 * of another device. 1677 * of another device.
1678 * 1678 *
1679 * The receiving device may be in another namespace, so 1679 * The receiving device may be in another namespace, so
1680 * we have to clear all information in the skb that could 1680 * we have to clear all information in the skb that could
1681 * impact namespace isolation. 1681 * impact namespace isolation.
1682 */ 1682 */
1683 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) 1683 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1684 { 1684 {
1685 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { 1685 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1686 if (skb_copy_ubufs(skb, GFP_ATOMIC)) { 1686 if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1687 atomic_long_inc(&dev->rx_dropped); 1687 atomic_long_inc(&dev->rx_dropped);
1688 kfree_skb(skb); 1688 kfree_skb(skb);
1689 return NET_RX_DROP; 1689 return NET_RX_DROP;
1690 } 1690 }
1691 } 1691 }
1692 1692
1693 if (unlikely(!is_skb_forwardable(dev, skb))) { 1693 if (unlikely(!is_skb_forwardable(dev, skb))) {
1694 atomic_long_inc(&dev->rx_dropped); 1694 atomic_long_inc(&dev->rx_dropped);
1695 kfree_skb(skb); 1695 kfree_skb(skb);
1696 return NET_RX_DROP; 1696 return NET_RX_DROP;
1697 } 1697 }
1698 1698
1699 skb_scrub_packet(skb, true); 1699 skb_scrub_packet(skb, true);
1700 skb->protocol = eth_type_trans(skb, dev); 1700 skb->protocol = eth_type_trans(skb, dev);
1701 1701
1702 return netif_rx(skb); 1702 return netif_rx(skb);
1703 } 1703 }
1704 EXPORT_SYMBOL_GPL(dev_forward_skb); 1704 EXPORT_SYMBOL_GPL(dev_forward_skb);
1705 1705
1706 static inline int deliver_skb(struct sk_buff *skb, 1706 static inline int deliver_skb(struct sk_buff *skb,
1707 struct packet_type *pt_prev, 1707 struct packet_type *pt_prev,
1708 struct net_device *orig_dev) 1708 struct net_device *orig_dev)
1709 { 1709 {
1710 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) 1710 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1711 return -ENOMEM; 1711 return -ENOMEM;
1712 atomic_inc(&skb->users); 1712 atomic_inc(&skb->users);
1713 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 1713 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1714 } 1714 }
1715 1715
1716 static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) 1716 static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1717 { 1717 {
1718 if (!ptype->af_packet_priv || !skb->sk) 1718 if (!ptype->af_packet_priv || !skb->sk)
1719 return false; 1719 return false;
1720 1720
1721 if (ptype->id_match) 1721 if (ptype->id_match)
1722 return ptype->id_match(ptype, skb->sk); 1722 return ptype->id_match(ptype, skb->sk);
1723 else if ((struct sock *)ptype->af_packet_priv == skb->sk) 1723 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
1724 return true; 1724 return true;
1725 1725
1726 return false; 1726 return false;
1727 } 1727 }
1728 1728
1729 /* 1729 /*
1730 * Support routine. Sends outgoing frames to any network 1730 * Support routine. Sends outgoing frames to any network
1731 * taps currently in use. 1731 * taps currently in use.
1732 */ 1732 */
1733 1733
1734 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) 1734 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1735 { 1735 {
1736 struct packet_type *ptype; 1736 struct packet_type *ptype;
1737 struct sk_buff *skb2 = NULL; 1737 struct sk_buff *skb2 = NULL;
1738 struct packet_type *pt_prev = NULL; 1738 struct packet_type *pt_prev = NULL;
1739 1739
1740 rcu_read_lock(); 1740 rcu_read_lock();
1741 list_for_each_entry_rcu(ptype, &ptype_all, list) { 1741 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1742 /* Never send packets back to the socket 1742 /* Never send packets back to the socket
1743 * they originated from - MvS (miquels@drinkel.ow.org) 1743 * they originated from - MvS (miquels@drinkel.ow.org)
1744 */ 1744 */
1745 if ((ptype->dev == dev || !ptype->dev) && 1745 if ((ptype->dev == dev || !ptype->dev) &&
1746 (!skb_loop_sk(ptype, skb))) { 1746 (!skb_loop_sk(ptype, skb))) {
1747 if (pt_prev) { 1747 if (pt_prev) {
1748 deliver_skb(skb2, pt_prev, skb->dev); 1748 deliver_skb(skb2, pt_prev, skb->dev);
1749 pt_prev = ptype; 1749 pt_prev = ptype;
1750 continue; 1750 continue;
1751 } 1751 }
1752 1752
1753 skb2 = skb_clone(skb, GFP_ATOMIC); 1753 skb2 = skb_clone(skb, GFP_ATOMIC);
1754 if (!skb2) 1754 if (!skb2)
1755 break; 1755 break;
1756 1756
1757 net_timestamp_set(skb2); 1757 net_timestamp_set(skb2);
1758 1758
1759 /* skb->nh should be correctly 1759 /* skb->nh should be correctly
1760 set by sender, so that the second statement is 1760 set by sender, so that the second statement is
1761 just protection against buggy protocols. 1761 just protection against buggy protocols.
1762 */ 1762 */
1763 skb_reset_mac_header(skb2); 1763 skb_reset_mac_header(skb2);
1764 1764
1765 if (skb_network_header(skb2) < skb2->data || 1765 if (skb_network_header(skb2) < skb2->data ||
1766 skb_network_header(skb2) > skb_tail_pointer(skb2)) { 1766 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1767 net_crit_ratelimited("protocol %04x is buggy, dev %s\n", 1767 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1768 ntohs(skb2->protocol), 1768 ntohs(skb2->protocol),
1769 dev->name); 1769 dev->name);
1770 skb_reset_network_header(skb2); 1770 skb_reset_network_header(skb2);
1771 } 1771 }
1772 1772
1773 skb2->transport_header = skb2->network_header; 1773 skb2->transport_header = skb2->network_header;
1774 skb2->pkt_type = PACKET_OUTGOING; 1774 skb2->pkt_type = PACKET_OUTGOING;
1775 pt_prev = ptype; 1775 pt_prev = ptype;
1776 } 1776 }
1777 } 1777 }
1778 if (pt_prev) 1778 if (pt_prev)
1779 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); 1779 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1780 rcu_read_unlock(); 1780 rcu_read_unlock();
1781 } 1781 }
1782 1782
1783 /** 1783 /**
1784 * netif_setup_tc - Handle tc mappings on real_num_tx_queues change 1784 * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
1785 * @dev: Network device 1785 * @dev: Network device
1786 * @txq: number of queues available 1786 * @txq: number of queues available
1787 * 1787 *
1788 * If real_num_tx_queues is changed the tc mappings may no longer be 1788 * If real_num_tx_queues is changed the tc mappings may no longer be
1789 * valid. To resolve this verify the tc mapping remains valid and if 1789 * valid. To resolve this verify the tc mapping remains valid and if
1790 * not NULL the mapping. With no priorities mapping to this 1790 * not NULL the mapping. With no priorities mapping to this
1791 * offset/count pair it will no longer be used. In the worst case TC0 1791 * offset/count pair it will no longer be used. In the worst case TC0
1792 * is invalid nothing can be done so disable priority mappings. If is 1792 * is invalid nothing can be done so disable priority mappings. If is
1793 * expected that drivers will fix this mapping if they can before 1793 * expected that drivers will fix this mapping if they can before
1794 * calling netif_set_real_num_tx_queues. 1794 * calling netif_set_real_num_tx_queues.
1795 */ 1795 */
1796 static void netif_setup_tc(struct net_device *dev, unsigned int txq) 1796 static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1797 { 1797 {
1798 int i; 1798 int i;
1799 struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; 1799 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1800 1800
1801 /* If TC0 is invalidated disable TC mapping */ 1801 /* If TC0 is invalidated disable TC mapping */
1802 if (tc->offset + tc->count > txq) { 1802 if (tc->offset + tc->count > txq) {
1803 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n"); 1803 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
1804 dev->num_tc = 0; 1804 dev->num_tc = 0;
1805 return; 1805 return;
1806 } 1806 }
1807 1807
1808 /* Invalidated prio to tc mappings set to TC0 */ 1808 /* Invalidated prio to tc mappings set to TC0 */
1809 for (i = 1; i < TC_BITMASK + 1; i++) { 1809 for (i = 1; i < TC_BITMASK + 1; i++) {
1810 int q = netdev_get_prio_tc_map(dev, i); 1810 int q = netdev_get_prio_tc_map(dev, i);
1811 1811
1812 tc = &dev->tc_to_txq[q]; 1812 tc = &dev->tc_to_txq[q];
1813 if (tc->offset + tc->count > txq) { 1813 if (tc->offset + tc->count > txq) {
1814 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n", 1814 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1815 i, q); 1815 i, q);
1816 netdev_set_prio_tc_map(dev, i, 0); 1816 netdev_set_prio_tc_map(dev, i, 0);
1817 } 1817 }
1818 } 1818 }
1819 } 1819 }
1820 1820
1821 #ifdef CONFIG_XPS 1821 #ifdef CONFIG_XPS
1822 static DEFINE_MUTEX(xps_map_mutex); 1822 static DEFINE_MUTEX(xps_map_mutex);
1823 #define xmap_dereference(P) \ 1823 #define xmap_dereference(P) \
1824 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) 1824 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
1825 1825
1826 static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, 1826 static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
1827 int cpu, u16 index) 1827 int cpu, u16 index)
1828 { 1828 {
1829 struct xps_map *map = NULL; 1829 struct xps_map *map = NULL;
1830 int pos; 1830 int pos;
1831 1831
1832 if (dev_maps) 1832 if (dev_maps)
1833 map = xmap_dereference(dev_maps->cpu_map[cpu]); 1833 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1834 1834
1835 for (pos = 0; map && pos < map->len; pos++) { 1835 for (pos = 0; map && pos < map->len; pos++) {
1836 if (map->queues[pos] == index) { 1836 if (map->queues[pos] == index) {
1837 if (map->len > 1) { 1837 if (map->len > 1) {
1838 map->queues[pos] = map->queues[--map->len]; 1838 map->queues[pos] = map->queues[--map->len];
1839 } else { 1839 } else {
1840 RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL); 1840 RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
1841 kfree_rcu(map, rcu); 1841 kfree_rcu(map, rcu);
1842 map = NULL; 1842 map = NULL;
1843 } 1843 }
1844 break; 1844 break;
1845 } 1845 }
1846 } 1846 }
1847 1847
1848 return map; 1848 return map;
1849 } 1849 }
1850 1850
1851 static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) 1851 static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
1852 { 1852 {
1853 struct xps_dev_maps *dev_maps; 1853 struct xps_dev_maps *dev_maps;
1854 int cpu, i; 1854 int cpu, i;
1855 bool active = false; 1855 bool active = false;
1856 1856
1857 mutex_lock(&xps_map_mutex); 1857 mutex_lock(&xps_map_mutex);
1858 dev_maps = xmap_dereference(dev->xps_maps); 1858 dev_maps = xmap_dereference(dev->xps_maps);
1859 1859
1860 if (!dev_maps) 1860 if (!dev_maps)
1861 goto out_no_maps; 1861 goto out_no_maps;
1862 1862
1863 for_each_possible_cpu(cpu) { 1863 for_each_possible_cpu(cpu) {
1864 for (i = index; i < dev->num_tx_queues; i++) { 1864 for (i = index; i < dev->num_tx_queues; i++) {
1865 if (!remove_xps_queue(dev_maps, cpu, i)) 1865 if (!remove_xps_queue(dev_maps, cpu, i))
1866 break; 1866 break;
1867 } 1867 }
1868 if (i == dev->num_tx_queues) 1868 if (i == dev->num_tx_queues)
1869 active = true; 1869 active = true;
1870 } 1870 }
1871 1871
1872 if (!active) { 1872 if (!active) {
1873 RCU_INIT_POINTER(dev->xps_maps, NULL); 1873 RCU_INIT_POINTER(dev->xps_maps, NULL);
1874 kfree_rcu(dev_maps, rcu); 1874 kfree_rcu(dev_maps, rcu);
1875 } 1875 }
1876 1876
1877 for (i = index; i < dev->num_tx_queues; i++) 1877 for (i = index; i < dev->num_tx_queues; i++)
1878 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), 1878 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
1879 NUMA_NO_NODE); 1879 NUMA_NO_NODE);
1880 1880
1881 out_no_maps: 1881 out_no_maps:
1882 mutex_unlock(&xps_map_mutex); 1882 mutex_unlock(&xps_map_mutex);
1883 } 1883 }
1884 1884
1885 static struct xps_map *expand_xps_map(struct xps_map *map, 1885 static struct xps_map *expand_xps_map(struct xps_map *map,
1886 int cpu, u16 index) 1886 int cpu, u16 index)
1887 { 1887 {
1888 struct xps_map *new_map; 1888 struct xps_map *new_map;
1889 int alloc_len = XPS_MIN_MAP_ALLOC; 1889 int alloc_len = XPS_MIN_MAP_ALLOC;
1890 int i, pos; 1890 int i, pos;
1891 1891
1892 for (pos = 0; map && pos < map->len; pos++) { 1892 for (pos = 0; map && pos < map->len; pos++) {
1893 if (map->queues[pos] != index) 1893 if (map->queues[pos] != index)
1894 continue; 1894 continue;
1895 return map; 1895 return map;
1896 } 1896 }
1897 1897
1898 /* Need to add queue to this CPU's existing map */ 1898 /* Need to add queue to this CPU's existing map */
1899 if (map) { 1899 if (map) {
1900 if (pos < map->alloc_len) 1900 if (pos < map->alloc_len)
1901 return map; 1901 return map;
1902 1902
1903 alloc_len = map->alloc_len * 2; 1903 alloc_len = map->alloc_len * 2;
1904 } 1904 }
1905 1905
1906 /* Need to allocate new map to store queue on this CPU's map */ 1906 /* Need to allocate new map to store queue on this CPU's map */
1907 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL, 1907 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
1908 cpu_to_node(cpu)); 1908 cpu_to_node(cpu));
1909 if (!new_map) 1909 if (!new_map)
1910 return NULL; 1910 return NULL;
1911 1911
1912 for (i = 0; i < pos; i++) 1912 for (i = 0; i < pos; i++)
1913 new_map->queues[i] = map->queues[i]; 1913 new_map->queues[i] = map->queues[i];
1914 new_map->alloc_len = alloc_len; 1914 new_map->alloc_len = alloc_len;
1915 new_map->len = pos; 1915 new_map->len = pos;
1916 1916
1917 return new_map; 1917 return new_map;
1918 } 1918 }
1919 1919
1920 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, 1920 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
1921 u16 index) 1921 u16 index)
1922 { 1922 {
1923 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; 1923 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
1924 struct xps_map *map, *new_map; 1924 struct xps_map *map, *new_map;
1925 int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES); 1925 int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
1926 int cpu, numa_node_id = -2; 1926 int cpu, numa_node_id = -2;
1927 bool active = false; 1927 bool active = false;
1928 1928
1929 mutex_lock(&xps_map_mutex); 1929 mutex_lock(&xps_map_mutex);
1930 1930
1931 dev_maps = xmap_dereference(dev->xps_maps); 1931 dev_maps = xmap_dereference(dev->xps_maps);
1932 1932
1933 /* allocate memory for queue storage */ 1933 /* allocate memory for queue storage */
1934 for_each_online_cpu(cpu) { 1934 for_each_online_cpu(cpu) {
1935 if (!cpumask_test_cpu(cpu, mask)) 1935 if (!cpumask_test_cpu(cpu, mask))
1936 continue; 1936 continue;
1937 1937
1938 if (!new_dev_maps) 1938 if (!new_dev_maps)
1939 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); 1939 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
1940 if (!new_dev_maps) { 1940 if (!new_dev_maps) {
1941 mutex_unlock(&xps_map_mutex); 1941 mutex_unlock(&xps_map_mutex);
1942 return -ENOMEM; 1942 return -ENOMEM;
1943 } 1943 }
1944 1944
1945 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : 1945 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
1946 NULL; 1946 NULL;
1947 1947
1948 map = expand_xps_map(map, cpu, index); 1948 map = expand_xps_map(map, cpu, index);
1949 if (!map) 1949 if (!map)
1950 goto error; 1950 goto error;
1951 1951
1952 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); 1952 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
1953 } 1953 }
1954 1954
1955 if (!new_dev_maps) 1955 if (!new_dev_maps)
1956 goto out_no_new_maps; 1956 goto out_no_new_maps;
1957 1957
1958 for_each_possible_cpu(cpu) { 1958 for_each_possible_cpu(cpu) {
1959 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { 1959 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
1960 /* add queue to CPU maps */ 1960 /* add queue to CPU maps */
1961 int pos = 0; 1961 int pos = 0;
1962 1962
1963 map = xmap_dereference(new_dev_maps->cpu_map[cpu]); 1963 map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
1964 while ((pos < map->len) && (map->queues[pos] != index)) 1964 while ((pos < map->len) && (map->queues[pos] != index))
1965 pos++; 1965 pos++;
1966 1966
1967 if (pos == map->len) 1967 if (pos == map->len)
1968 map->queues[map->len++] = index; 1968 map->queues[map->len++] = index;
1969 #ifdef CONFIG_NUMA 1969 #ifdef CONFIG_NUMA
1970 if (numa_node_id == -2) 1970 if (numa_node_id == -2)
1971 numa_node_id = cpu_to_node(cpu); 1971 numa_node_id = cpu_to_node(cpu);
1972 else if (numa_node_id != cpu_to_node(cpu)) 1972 else if (numa_node_id != cpu_to_node(cpu))
1973 numa_node_id = -1; 1973 numa_node_id = -1;
1974 #endif 1974 #endif
1975 } else if (dev_maps) { 1975 } else if (dev_maps) {
1976 /* fill in the new device map from the old device map */ 1976 /* fill in the new device map from the old device map */
1977 map = xmap_dereference(dev_maps->cpu_map[cpu]); 1977 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1978 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); 1978 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
1979 } 1979 }
1980 1980
1981 } 1981 }
1982 1982
1983 rcu_assign_pointer(dev->xps_maps, new_dev_maps); 1983 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
1984 1984
1985 /* Cleanup old maps */ 1985 /* Cleanup old maps */
1986 if (dev_maps) { 1986 if (dev_maps) {
1987 for_each_possible_cpu(cpu) { 1987 for_each_possible_cpu(cpu) {
1988 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); 1988 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
1989 map = xmap_dereference(dev_maps->cpu_map[cpu]); 1989 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1990 if (map && map != new_map) 1990 if (map && map != new_map)
1991 kfree_rcu(map, rcu); 1991 kfree_rcu(map, rcu);
1992 } 1992 }
1993 1993
1994 kfree_rcu(dev_maps, rcu); 1994 kfree_rcu(dev_maps, rcu);
1995 } 1995 }
1996 1996
1997 dev_maps = new_dev_maps; 1997 dev_maps = new_dev_maps;
1998 active = true; 1998 active = true;
1999 1999
2000 out_no_new_maps: 2000 out_no_new_maps:
2001 /* update Tx queue numa node */ 2001 /* update Tx queue numa node */
2002 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), 2002 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
2003 (numa_node_id >= 0) ? numa_node_id : 2003 (numa_node_id >= 0) ? numa_node_id :
2004 NUMA_NO_NODE); 2004 NUMA_NO_NODE);
2005 2005
2006 if (!dev_maps) 2006 if (!dev_maps)
2007 goto out_no_maps; 2007 goto out_no_maps;
2008 2008
2009 /* removes queue from unused CPUs */ 2009 /* removes queue from unused CPUs */
2010 for_each_possible_cpu(cpu) { 2010 for_each_possible_cpu(cpu) {
2011 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) 2011 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
2012 continue; 2012 continue;
2013 2013
2014 if (remove_xps_queue(dev_maps, cpu, index)) 2014 if (remove_xps_queue(dev_maps, cpu, index))
2015 active = true; 2015 active = true;
2016 } 2016 }
2017 2017
2018 /* free map if not active */ 2018 /* free map if not active */
2019 if (!active) { 2019 if (!active) {
2020 RCU_INIT_POINTER(dev->xps_maps, NULL); 2020 RCU_INIT_POINTER(dev->xps_maps, NULL);
2021 kfree_rcu(dev_maps, rcu); 2021 kfree_rcu(dev_maps, rcu);
2022 } 2022 }
2023 2023
2024 out_no_maps: 2024 out_no_maps:
2025 mutex_unlock(&xps_map_mutex); 2025 mutex_unlock(&xps_map_mutex);
2026 2026
2027 return 0; 2027 return 0;
2028 error: 2028 error:
2029 /* remove any maps that we added */ 2029 /* remove any maps that we added */
2030 for_each_possible_cpu(cpu) { 2030 for_each_possible_cpu(cpu) {
2031 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); 2031 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2032 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : 2032 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
2033 NULL; 2033 NULL;
2034 if (new_map && new_map != map) 2034 if (new_map && new_map != map)
2035 kfree(new_map); 2035 kfree(new_map);
2036 } 2036 }
2037 2037
2038 mutex_unlock(&xps_map_mutex); 2038 mutex_unlock(&xps_map_mutex);
2039 2039
2040 kfree(new_dev_maps); 2040 kfree(new_dev_maps);
2041 return -ENOMEM; 2041 return -ENOMEM;
2042 } 2042 }
2043 EXPORT_SYMBOL(netif_set_xps_queue); 2043 EXPORT_SYMBOL(netif_set_xps_queue);
2044 2044
2045 #endif 2045 #endif
2046 /* 2046 /*
2047 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 2047 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
2048 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. 2048 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
2049 */ 2049 */
2050 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) 2050 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2051 { 2051 {
2052 int rc; 2052 int rc;
2053 2053
2054 if (txq < 1 || txq > dev->num_tx_queues) 2054 if (txq < 1 || txq > dev->num_tx_queues)
2055 return -EINVAL; 2055 return -EINVAL;
2056 2056
2057 if (dev->reg_state == NETREG_REGISTERED || 2057 if (dev->reg_state == NETREG_REGISTERED ||
2058 dev->reg_state == NETREG_UNREGISTERING) { 2058 dev->reg_state == NETREG_UNREGISTERING) {
2059 ASSERT_RTNL(); 2059 ASSERT_RTNL();
2060 2060
2061 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, 2061 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
2062 txq); 2062 txq);
2063 if (rc) 2063 if (rc)
2064 return rc; 2064 return rc;
2065 2065
2066 if (dev->num_tc) 2066 if (dev->num_tc)
2067 netif_setup_tc(dev, txq); 2067 netif_setup_tc(dev, txq);
2068 2068
2069 if (txq < dev->real_num_tx_queues) { 2069 if (txq < dev->real_num_tx_queues) {
2070 qdisc_reset_all_tx_gt(dev, txq); 2070 qdisc_reset_all_tx_gt(dev, txq);
2071 #ifdef CONFIG_XPS 2071 #ifdef CONFIG_XPS
2072 netif_reset_xps_queues_gt(dev, txq); 2072 netif_reset_xps_queues_gt(dev, txq);
2073 #endif 2073 #endif
2074 } 2074 }
2075 } 2075 }
2076 2076
2077 dev->real_num_tx_queues = txq; 2077 dev->real_num_tx_queues = txq;
2078 return 0; 2078 return 0;
2079 } 2079 }
2080 EXPORT_SYMBOL(netif_set_real_num_tx_queues); 2080 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2081 2081
2082 #ifdef CONFIG_RPS 2082 #ifdef CONFIG_RPS
2083 /** 2083 /**
2084 * netif_set_real_num_rx_queues - set actual number of RX queues used 2084 * netif_set_real_num_rx_queues - set actual number of RX queues used
2085 * @dev: Network device 2085 * @dev: Network device
2086 * @rxq: Actual number of RX queues 2086 * @rxq: Actual number of RX queues
2087 * 2087 *
2088 * This must be called either with the rtnl_lock held or before 2088 * This must be called either with the rtnl_lock held or before
2089 * registration of the net device. Returns 0 on success, or a 2089 * registration of the net device. Returns 0 on success, or a
2090 * negative error code. If called before registration, it always 2090 * negative error code. If called before registration, it always
2091 * succeeds. 2091 * succeeds.
2092 */ 2092 */
2093 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) 2093 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
2094 { 2094 {
2095 int rc; 2095 int rc;
2096 2096
2097 if (rxq < 1 || rxq > dev->num_rx_queues) 2097 if (rxq < 1 || rxq > dev->num_rx_queues)
2098 return -EINVAL; 2098 return -EINVAL;
2099 2099
2100 if (dev->reg_state == NETREG_REGISTERED) { 2100 if (dev->reg_state == NETREG_REGISTERED) {
2101 ASSERT_RTNL(); 2101 ASSERT_RTNL();
2102 2102
2103 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues, 2103 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
2104 rxq); 2104 rxq);
2105 if (rc) 2105 if (rc)
2106 return rc; 2106 return rc;
2107 } 2107 }
2108 2108
2109 dev->real_num_rx_queues = rxq; 2109 dev->real_num_rx_queues = rxq;
2110 return 0; 2110 return 0;
2111 } 2111 }
2112 EXPORT_SYMBOL(netif_set_real_num_rx_queues); 2112 EXPORT_SYMBOL(netif_set_real_num_rx_queues);
2113 #endif 2113 #endif
2114 2114
2115 /** 2115 /**
2116 * netif_get_num_default_rss_queues - default number of RSS queues 2116 * netif_get_num_default_rss_queues - default number of RSS queues
2117 * 2117 *
2118 * This routine should set an upper limit on the number of RSS queues 2118 * This routine should set an upper limit on the number of RSS queues
2119 * used by default by multiqueue devices. 2119 * used by default by multiqueue devices.
2120 */ 2120 */
2121 int netif_get_num_default_rss_queues(void) 2121 int netif_get_num_default_rss_queues(void)
2122 { 2122 {
2123 return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus()); 2123 return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
2124 } 2124 }
2125 EXPORT_SYMBOL(netif_get_num_default_rss_queues); 2125 EXPORT_SYMBOL(netif_get_num_default_rss_queues);
2126 2126
2127 static inline void __netif_reschedule(struct Qdisc *q) 2127 static inline void __netif_reschedule(struct Qdisc *q)
2128 { 2128 {
2129 struct softnet_data *sd; 2129 struct softnet_data *sd;
2130 unsigned long flags; 2130 unsigned long flags;
2131 2131
2132 local_irq_save(flags); 2132 local_irq_save(flags);
2133 sd = &__get_cpu_var(softnet_data); 2133 sd = &__get_cpu_var(softnet_data);
2134 q->next_sched = NULL; 2134 q->next_sched = NULL;
2135 *sd->output_queue_tailp = q; 2135 *sd->output_queue_tailp = q;
2136 sd->output_queue_tailp = &q->next_sched; 2136 sd->output_queue_tailp = &q->next_sched;
2137 raise_softirq_irqoff(NET_TX_SOFTIRQ); 2137 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2138 local_irq_restore(flags); 2138 local_irq_restore(flags);
2139 } 2139 }
2140 2140
2141 void __netif_schedule(struct Qdisc *q) 2141 void __netif_schedule(struct Qdisc *q)
2142 { 2142 {
2143 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) 2143 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
2144 __netif_reschedule(q); 2144 __netif_reschedule(q);
2145 } 2145 }
2146 EXPORT_SYMBOL(__netif_schedule); 2146 EXPORT_SYMBOL(__netif_schedule);
2147 2147
2148 struct dev_kfree_skb_cb { 2148 struct dev_kfree_skb_cb {
2149 enum skb_free_reason reason; 2149 enum skb_free_reason reason;
2150 }; 2150 };
2151 2151
2152 static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb) 2152 static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
2153 { 2153 {
2154 return (struct dev_kfree_skb_cb *)skb->cb; 2154 return (struct dev_kfree_skb_cb *)skb->cb;
2155 } 2155 }
2156 2156
2157 void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) 2157 void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
2158 { 2158 {
2159 unsigned long flags; 2159 unsigned long flags;
2160 2160
2161 if (likely(atomic_read(&skb->users) == 1)) { 2161 if (likely(atomic_read(&skb->users) == 1)) {
2162 smp_rmb(); 2162 smp_rmb();
2163 atomic_set(&skb->users, 0); 2163 atomic_set(&skb->users, 0);
2164 } else if (likely(!atomic_dec_and_test(&skb->users))) { 2164 } else if (likely(!atomic_dec_and_test(&skb->users))) {
2165 return; 2165 return;
2166 } 2166 }
2167 get_kfree_skb_cb(skb)->reason = reason; 2167 get_kfree_skb_cb(skb)->reason = reason;
2168 local_irq_save(flags); 2168 local_irq_save(flags);
2169 skb->next = __this_cpu_read(softnet_data.completion_queue); 2169 skb->next = __this_cpu_read(softnet_data.completion_queue);
2170 __this_cpu_write(softnet_data.completion_queue, skb); 2170 __this_cpu_write(softnet_data.completion_queue, skb);
2171 raise_softirq_irqoff(NET_TX_SOFTIRQ); 2171 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2172 local_irq_restore(flags); 2172 local_irq_restore(flags);
2173 } 2173 }
2174 EXPORT_SYMBOL(__dev_kfree_skb_irq); 2174 EXPORT_SYMBOL(__dev_kfree_skb_irq);
2175 2175
2176 void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason) 2176 void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
2177 { 2177 {
2178 if (in_irq() || irqs_disabled()) 2178 if (in_irq() || irqs_disabled())
2179 __dev_kfree_skb_irq(skb, reason); 2179 __dev_kfree_skb_irq(skb, reason);
2180 else 2180 else
2181 dev_kfree_skb(skb); 2181 dev_kfree_skb(skb);
2182 } 2182 }
2183 EXPORT_SYMBOL(__dev_kfree_skb_any); 2183 EXPORT_SYMBOL(__dev_kfree_skb_any);
2184 2184
2185 2185
2186 /** 2186 /**
2187 * netif_device_detach - mark device as removed 2187 * netif_device_detach - mark device as removed
2188 * @dev: network device 2188 * @dev: network device
2189 * 2189 *
2190 * Mark device as removed from system and therefore no longer available. 2190 * Mark device as removed from system and therefore no longer available.
2191 */ 2191 */
2192 void netif_device_detach(struct net_device *dev) 2192 void netif_device_detach(struct net_device *dev)
2193 { 2193 {
2194 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && 2194 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
2195 netif_running(dev)) { 2195 netif_running(dev)) {
2196 netif_tx_stop_all_queues(dev); 2196 netif_tx_stop_all_queues(dev);
2197 } 2197 }
2198 } 2198 }
2199 EXPORT_SYMBOL(netif_device_detach); 2199 EXPORT_SYMBOL(netif_device_detach);
2200 2200
2201 /** 2201 /**
2202 * netif_device_attach - mark device as attached 2202 * netif_device_attach - mark device as attached
2203 * @dev: network device 2203 * @dev: network device
2204 * 2204 *
2205 * Mark device as attached from system and restart if needed. 2205 * Mark device as attached from system and restart if needed.
2206 */ 2206 */
2207 void netif_device_attach(struct net_device *dev) 2207 void netif_device_attach(struct net_device *dev)
2208 { 2208 {
2209 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && 2209 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
2210 netif_running(dev)) { 2210 netif_running(dev)) {
2211 netif_tx_wake_all_queues(dev); 2211 netif_tx_wake_all_queues(dev);
2212 __netdev_watchdog_up(dev); 2212 __netdev_watchdog_up(dev);
2213 } 2213 }
2214 } 2214 }
2215 EXPORT_SYMBOL(netif_device_attach); 2215 EXPORT_SYMBOL(netif_device_attach);
2216 2216
2217 static void skb_warn_bad_offload(const struct sk_buff *skb) 2217 static void skb_warn_bad_offload(const struct sk_buff *skb)
2218 { 2218 {
2219 static const netdev_features_t null_features = 0; 2219 static const netdev_features_t null_features = 0;
2220 struct net_device *dev = skb->dev; 2220 struct net_device *dev = skb->dev;
2221 const char *driver = ""; 2221 const char *driver = "";
2222 2222
2223 if (!net_ratelimit()) 2223 if (!net_ratelimit())
2224 return; 2224 return;
2225 2225
2226 if (dev && dev->dev.parent) 2226 if (dev && dev->dev.parent)
2227 driver = dev_driver_string(dev->dev.parent); 2227 driver = dev_driver_string(dev->dev.parent);
2228 2228
2229 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d " 2229 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
2230 "gso_type=%d ip_summed=%d\n", 2230 "gso_type=%d ip_summed=%d\n",
2231 driver, dev ? &dev->features : &null_features, 2231 driver, dev ? &dev->features : &null_features,
2232 skb->sk ? &skb->sk->sk_route_caps : &null_features, 2232 skb->sk ? &skb->sk->sk_route_caps : &null_features,
2233 skb->len, skb->data_len, skb_shinfo(skb)->gso_size, 2233 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
2234 skb_shinfo(skb)->gso_type, skb->ip_summed); 2234 skb_shinfo(skb)->gso_type, skb->ip_summed);
2235 } 2235 }
2236 2236
2237 /* 2237 /*
2238 * Invalidate hardware checksum when packet is to be mangled, and 2238 * Invalidate hardware checksum when packet is to be mangled, and
2239 * complete checksum manually on outgoing path. 2239 * complete checksum manually on outgoing path.
2240 */ 2240 */
2241 int skb_checksum_help(struct sk_buff *skb) 2241 int skb_checksum_help(struct sk_buff *skb)
2242 { 2242 {
2243 __wsum csum; 2243 __wsum csum;
2244 int ret = 0, offset; 2244 int ret = 0, offset;
2245 2245
2246 if (skb->ip_summed == CHECKSUM_COMPLETE) 2246 if (skb->ip_summed == CHECKSUM_COMPLETE)
2247 goto out_set_summed; 2247 goto out_set_summed;
2248 2248
2249 if (unlikely(skb_shinfo(skb)->gso_size)) { 2249 if (unlikely(skb_shinfo(skb)->gso_size)) {
2250 skb_warn_bad_offload(skb); 2250 skb_warn_bad_offload(skb);
2251 return -EINVAL; 2251 return -EINVAL;
2252 } 2252 }
2253 2253
2254 /* Before computing a checksum, we should make sure no frag could 2254 /* Before computing a checksum, we should make sure no frag could
2255 * be modified by an external entity : checksum could be wrong. 2255 * be modified by an external entity : checksum could be wrong.
2256 */ 2256 */
2257 if (skb_has_shared_frag(skb)) { 2257 if (skb_has_shared_frag(skb)) {
2258 ret = __skb_linearize(skb); 2258 ret = __skb_linearize(skb);
2259 if (ret) 2259 if (ret)
2260 goto out; 2260 goto out;
2261 } 2261 }
2262 2262
2263 offset = skb_checksum_start_offset(skb); 2263 offset = skb_checksum_start_offset(skb);
2264 BUG_ON(offset >= skb_headlen(skb)); 2264 BUG_ON(offset >= skb_headlen(skb));
2265 csum = skb_checksum(skb, offset, skb->len - offset, 0); 2265 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2266 2266
2267 offset += skb->csum_offset; 2267 offset += skb->csum_offset;
2268 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); 2268 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
2269 2269
2270 if (skb_cloned(skb) && 2270 if (skb_cloned(skb) &&
2271 !skb_clone_writable(skb, offset + sizeof(__sum16))) { 2271 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
2272 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 2272 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2273 if (ret) 2273 if (ret)
2274 goto out; 2274 goto out;
2275 } 2275 }
2276 2276
2277 *(__sum16 *)(skb->data + offset) = csum_fold(csum); 2277 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
2278 out_set_summed: 2278 out_set_summed:
2279 skb->ip_summed = CHECKSUM_NONE; 2279 skb->ip_summed = CHECKSUM_NONE;
2280 out: 2280 out:
2281 return ret; 2281 return ret;
2282 } 2282 }
2283 EXPORT_SYMBOL(skb_checksum_help); 2283 EXPORT_SYMBOL(skb_checksum_help);
2284 2284
2285 __be16 skb_network_protocol(struct sk_buff *skb) 2285 __be16 skb_network_protocol(struct sk_buff *skb)
2286 { 2286 {
2287 __be16 type = skb->protocol; 2287 __be16 type = skb->protocol;
2288 int vlan_depth = ETH_HLEN; 2288 int vlan_depth = ETH_HLEN;
2289 2289
2290 /* Tunnel gso handlers can set protocol to ethernet. */ 2290 /* Tunnel gso handlers can set protocol to ethernet. */
2291 if (type == htons(ETH_P_TEB)) { 2291 if (type == htons(ETH_P_TEB)) {
2292 struct ethhdr *eth; 2292 struct ethhdr *eth;
2293 2293
2294 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) 2294 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
2295 return 0; 2295 return 0;
2296 2296
2297 eth = (struct ethhdr *)skb_mac_header(skb); 2297 eth = (struct ethhdr *)skb_mac_header(skb);
2298 type = eth->h_proto; 2298 type = eth->h_proto;
2299 } 2299 }
2300 2300
2301 while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { 2301 while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
2302 struct vlan_hdr *vh; 2302 struct vlan_hdr *vh;
2303 2303
2304 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) 2304 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
2305 return 0; 2305 return 0;
2306 2306
2307 vh = (struct vlan_hdr *)(skb->data + vlan_depth); 2307 vh = (struct vlan_hdr *)(skb->data + vlan_depth);
2308 type = vh->h_vlan_encapsulated_proto; 2308 type = vh->h_vlan_encapsulated_proto;
2309 vlan_depth += VLAN_HLEN; 2309 vlan_depth += VLAN_HLEN;
2310 } 2310 }
2311 2311
2312 return type; 2312 return type;
2313 } 2313 }
2314 2314
2315 /** 2315 /**
2316 * skb_mac_gso_segment - mac layer segmentation handler. 2316 * skb_mac_gso_segment - mac layer segmentation handler.
2317 * @skb: buffer to segment 2317 * @skb: buffer to segment
2318 * @features: features for the output path (see dev->features) 2318 * @features: features for the output path (see dev->features)
2319 */ 2319 */
2320 struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, 2320 struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2321 netdev_features_t features) 2321 netdev_features_t features)
2322 { 2322 {
2323 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 2323 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2324 struct packet_offload *ptype; 2324 struct packet_offload *ptype;
2325 __be16 type = skb_network_protocol(skb); 2325 __be16 type = skb_network_protocol(skb);
2326 2326
2327 if (unlikely(!type)) 2327 if (unlikely(!type))
2328 return ERR_PTR(-EINVAL); 2328 return ERR_PTR(-EINVAL);
2329 2329
2330 __skb_pull(skb, skb->mac_len); 2330 __skb_pull(skb, skb->mac_len);
2331 2331
2332 rcu_read_lock(); 2332 rcu_read_lock();
2333 list_for_each_entry_rcu(ptype, &offload_base, list) { 2333 list_for_each_entry_rcu(ptype, &offload_base, list) {
2334 if (ptype->type == type && ptype->callbacks.gso_segment) { 2334 if (ptype->type == type && ptype->callbacks.gso_segment) {
2335 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 2335 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2336 int err; 2336 int err;
2337 2337
2338 err = ptype->callbacks.gso_send_check(skb); 2338 err = ptype->callbacks.gso_send_check(skb);
2339 segs = ERR_PTR(err); 2339 segs = ERR_PTR(err);
2340 if (err || skb_gso_ok(skb, features)) 2340 if (err || skb_gso_ok(skb, features))
2341 break; 2341 break;
2342 __skb_push(skb, (skb->data - 2342 __skb_push(skb, (skb->data -
2343 skb_network_header(skb))); 2343 skb_network_header(skb)));
2344 } 2344 }
2345 segs = ptype->callbacks.gso_segment(skb, features); 2345 segs = ptype->callbacks.gso_segment(skb, features);
2346 break; 2346 break;
2347 } 2347 }
2348 } 2348 }
2349 rcu_read_unlock(); 2349 rcu_read_unlock();
2350 2350
2351 __skb_push(skb, skb->data - skb_mac_header(skb)); 2351 __skb_push(skb, skb->data - skb_mac_header(skb));
2352 2352
2353 return segs; 2353 return segs;
2354 } 2354 }
2355 EXPORT_SYMBOL(skb_mac_gso_segment); 2355 EXPORT_SYMBOL(skb_mac_gso_segment);
2356 2356
2357 2357
2358 /* openvswitch calls this on rx path, so we need a different check. 2358 /* openvswitch calls this on rx path, so we need a different check.
2359 */ 2359 */
2360 static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) 2360 static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
2361 { 2361 {
2362 if (tx_path) 2362 if (tx_path)
2363 return skb->ip_summed != CHECKSUM_PARTIAL; 2363 return skb->ip_summed != CHECKSUM_PARTIAL;
2364 else 2364 else
2365 return skb->ip_summed == CHECKSUM_NONE; 2365 return skb->ip_summed == CHECKSUM_NONE;
2366 } 2366 }
2367 2367
2368 /** 2368 /**
2369 * __skb_gso_segment - Perform segmentation on skb. 2369 * __skb_gso_segment - Perform segmentation on skb.
2370 * @skb: buffer to segment 2370 * @skb: buffer to segment
2371 * @features: features for the output path (see dev->features) 2371 * @features: features for the output path (see dev->features)
2372 * @tx_path: whether it is called in TX path 2372 * @tx_path: whether it is called in TX path
2373 * 2373 *
2374 * This function segments the given skb and returns a list of segments. 2374 * This function segments the given skb and returns a list of segments.
2375 * 2375 *
2376 * It may return NULL if the skb requires no segmentation. This is 2376 * It may return NULL if the skb requires no segmentation. This is
2377 * only possible when GSO is used for verifying header integrity. 2377 * only possible when GSO is used for verifying header integrity.
2378 */ 2378 */
2379 struct sk_buff *__skb_gso_segment(struct sk_buff *skb, 2379 struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2380 netdev_features_t features, bool tx_path) 2380 netdev_features_t features, bool tx_path)
2381 { 2381 {
2382 if (unlikely(skb_needs_check(skb, tx_path))) { 2382 if (unlikely(skb_needs_check(skb, tx_path))) {
2383 int err; 2383 int err;
2384 2384
2385 skb_warn_bad_offload(skb); 2385 skb_warn_bad_offload(skb);
2386 2386
2387 if (skb_header_cloned(skb) && 2387 if (skb_header_cloned(skb) &&
2388 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 2388 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
2389 return ERR_PTR(err); 2389 return ERR_PTR(err);
2390 } 2390 }
2391 2391
2392 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); 2392 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
2393 SKB_GSO_CB(skb)->encap_level = 0; 2393 SKB_GSO_CB(skb)->encap_level = 0;
2394 2394
2395 skb_reset_mac_header(skb); 2395 skb_reset_mac_header(skb);
2396 skb_reset_mac_len(skb); 2396 skb_reset_mac_len(skb);
2397 2397
2398 return skb_mac_gso_segment(skb, features); 2398 return skb_mac_gso_segment(skb, features);
2399 } 2399 }
2400 EXPORT_SYMBOL(__skb_gso_segment); 2400 EXPORT_SYMBOL(__skb_gso_segment);
2401 2401
2402 /* Take action when hardware reception checksum errors are detected. */ 2402 /* Take action when hardware reception checksum errors are detected. */
2403 #ifdef CONFIG_BUG 2403 #ifdef CONFIG_BUG
2404 void netdev_rx_csum_fault(struct net_device *dev) 2404 void netdev_rx_csum_fault(struct net_device *dev)
2405 { 2405 {
2406 if (net_ratelimit()) { 2406 if (net_ratelimit()) {
2407 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>"); 2407 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
2408 dump_stack(); 2408 dump_stack();
2409 } 2409 }
2410 } 2410 }
2411 EXPORT_SYMBOL(netdev_rx_csum_fault); 2411 EXPORT_SYMBOL(netdev_rx_csum_fault);
2412 #endif 2412 #endif
2413 2413
2414 /* Actually, we should eliminate this check as soon as we know, that: 2414 /* Actually, we should eliminate this check as soon as we know, that:
2415 * 1. IOMMU is present and allows to map all the memory. 2415 * 1. IOMMU is present and allows to map all the memory.
2416 * 2. No high memory really exists on this machine. 2416 * 2. No high memory really exists on this machine.
2417 */ 2417 */
2418 2418
2419 static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) 2419 static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2420 { 2420 {
2421 #ifdef CONFIG_HIGHMEM 2421 #ifdef CONFIG_HIGHMEM
2422 int i; 2422 int i;
2423 if (!(dev->features & NETIF_F_HIGHDMA)) { 2423 if (!(dev->features & NETIF_F_HIGHDMA)) {
2424 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 2424 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2425 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 2425 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2426 if (PageHighMem(skb_frag_page(frag))) 2426 if (PageHighMem(skb_frag_page(frag)))
2427 return 1; 2427 return 1;
2428 } 2428 }
2429 } 2429 }
2430 2430
2431 if (PCI_DMA_BUS_IS_PHYS) { 2431 if (PCI_DMA_BUS_IS_PHYS) {
2432 struct device *pdev = dev->dev.parent; 2432 struct device *pdev = dev->dev.parent;
2433 2433
2434 if (!pdev) 2434 if (!pdev)
2435 return 0; 2435 return 0;
2436 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 2436 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2437 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 2437 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2438 dma_addr_t addr = page_to_phys(skb_frag_page(frag)); 2438 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
2439 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) 2439 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2440 return 1; 2440 return 1;
2441 } 2441 }
2442 } 2442 }
2443 #endif 2443 #endif
2444 return 0; 2444 return 0;
2445 } 2445 }
2446 2446
2447 struct dev_gso_cb { 2447 struct dev_gso_cb {
2448 void (*destructor)(struct sk_buff *skb); 2448 void (*destructor)(struct sk_buff *skb);
2449 }; 2449 };
2450 2450
2451 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) 2451 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
2452 2452
2453 static void dev_gso_skb_destructor(struct sk_buff *skb) 2453 static void dev_gso_skb_destructor(struct sk_buff *skb)
2454 { 2454 {
2455 struct dev_gso_cb *cb; 2455 struct dev_gso_cb *cb;
2456 2456
2457 do { 2457 do {
2458 struct sk_buff *nskb = skb->next; 2458 struct sk_buff *nskb = skb->next;
2459 2459
2460 skb->next = nskb->next; 2460 skb->next = nskb->next;
2461 nskb->next = NULL; 2461 nskb->next = NULL;
2462 kfree_skb(nskb); 2462 kfree_skb(nskb);
2463 } while (skb->next); 2463 } while (skb->next);
2464 2464
2465 cb = DEV_GSO_CB(skb); 2465 cb = DEV_GSO_CB(skb);
2466 if (cb->destructor) 2466 if (cb->destructor)
2467 cb->destructor(skb); 2467 cb->destructor(skb);
2468 } 2468 }
2469 2469
2470 /** 2470 /**
2471 * dev_gso_segment - Perform emulated hardware segmentation on skb. 2471 * dev_gso_segment - Perform emulated hardware segmentation on skb.
2472 * @skb: buffer to segment 2472 * @skb: buffer to segment
2473 * @features: device features as applicable to this skb 2473 * @features: device features as applicable to this skb
2474 * 2474 *
2475 * This function segments the given skb and stores the list of segments 2475 * This function segments the given skb and stores the list of segments
2476 * in skb->next. 2476 * in skb->next.
2477 */ 2477 */
2478 static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) 2478 static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2479 { 2479 {
2480 struct sk_buff *segs; 2480 struct sk_buff *segs;
2481 2481
2482 segs = skb_gso_segment(skb, features); 2482 segs = skb_gso_segment(skb, features);
2483 2483
2484 /* Verifying header integrity only. */ 2484 /* Verifying header integrity only. */
2485 if (!segs) 2485 if (!segs)
2486 return 0; 2486 return 0;
2487 2487
2488 if (IS_ERR(segs)) 2488 if (IS_ERR(segs))
2489 return PTR_ERR(segs); 2489 return PTR_ERR(segs);
2490 2490
2491 skb->next = segs; 2491 skb->next = segs;
2492 DEV_GSO_CB(skb)->destructor = skb->destructor; 2492 DEV_GSO_CB(skb)->destructor = skb->destructor;
2493 skb->destructor = dev_gso_skb_destructor; 2493 skb->destructor = dev_gso_skb_destructor;
2494 2494
2495 return 0; 2495 return 0;
2496 } 2496 }
2497 2497
2498 static netdev_features_t harmonize_features(struct sk_buff *skb, 2498 static netdev_features_t harmonize_features(struct sk_buff *skb,
2499 netdev_features_t features) 2499 netdev_features_t features)
2500 { 2500 {
2501 if (skb->ip_summed != CHECKSUM_NONE && 2501 if (skb->ip_summed != CHECKSUM_NONE &&
2502 !can_checksum_protocol(features, skb_network_protocol(skb))) { 2502 !can_checksum_protocol(features, skb_network_protocol(skb))) {
2503 features &= ~NETIF_F_ALL_CSUM; 2503 features &= ~NETIF_F_ALL_CSUM;
2504 } else if (illegal_highdma(skb->dev, skb)) { 2504 } else if (illegal_highdma(skb->dev, skb)) {
2505 features &= ~NETIF_F_SG; 2505 features &= ~NETIF_F_SG;
2506 } 2506 }
2507 2507
2508 return features; 2508 return features;
2509 } 2509 }
2510 2510
2511 netdev_features_t netif_skb_features(struct sk_buff *skb) 2511 netdev_features_t netif_skb_features(struct sk_buff *skb)
2512 { 2512 {
2513 __be16 protocol = skb->protocol; 2513 __be16 protocol = skb->protocol;
2514 netdev_features_t features = skb->dev->features; 2514 netdev_features_t features = skb->dev->features;
2515 2515
2516 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) 2516 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
2517 features &= ~NETIF_F_GSO_MASK; 2517 features &= ~NETIF_F_GSO_MASK;
2518 2518
2519 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { 2519 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
2520 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 2520 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2521 protocol = veh->h_vlan_encapsulated_proto; 2521 protocol = veh->h_vlan_encapsulated_proto;
2522 } else if (!vlan_tx_tag_present(skb)) { 2522 } else if (!vlan_tx_tag_present(skb)) {
2523 return harmonize_features(skb, features); 2523 return harmonize_features(skb, features);
2524 } 2524 }
2525 2525
2526 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | 2526 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
2527 NETIF_F_HW_VLAN_STAG_TX); 2527 NETIF_F_HW_VLAN_STAG_TX);
2528 2528
2529 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) 2529 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
2530 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | 2530 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
2531 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | 2531 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2532 NETIF_F_HW_VLAN_STAG_TX; 2532 NETIF_F_HW_VLAN_STAG_TX;
2533 2533
2534 return harmonize_features(skb, features); 2534 return harmonize_features(skb, features);
2535 } 2535 }
2536 EXPORT_SYMBOL(netif_skb_features); 2536 EXPORT_SYMBOL(netif_skb_features);
2537 2537
2538 /*
2539 * Returns true if either:
2540 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2541 * 2. skb is fragmented and the device does not support SG.
2542 */
2543 static inline int skb_needs_linearize(struct sk_buff *skb,
2544 netdev_features_t features)
2545 {
2546 return skb_is_nonlinear(skb) &&
2547 ((skb_has_frag_list(skb) &&
2548 !(features & NETIF_F_FRAGLIST)) ||
2549 (skb_shinfo(skb)->nr_frags &&
2550 !(features & NETIF_F_SG)));
2551 }
2552
2553 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 2538 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2554 struct netdev_queue *txq, void *accel_priv) 2539 struct netdev_queue *txq, void *accel_priv)
2555 { 2540 {
2556 const struct net_device_ops *ops = dev->netdev_ops; 2541 const struct net_device_ops *ops = dev->netdev_ops;
2557 int rc = NETDEV_TX_OK; 2542 int rc = NETDEV_TX_OK;
2558 unsigned int skb_len; 2543 unsigned int skb_len;
2559 2544
2560 if (likely(!skb->next)) { 2545 if (likely(!skb->next)) {
2561 netdev_features_t features; 2546 netdev_features_t features;
2562 2547
2563 /* 2548 /*
2564 * If device doesn't need skb->dst, release it right now while 2549 * If device doesn't need skb->dst, release it right now while
2565 * its hot in this cpu cache 2550 * its hot in this cpu cache
2566 */ 2551 */
2567 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 2552 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2568 skb_dst_drop(skb); 2553 skb_dst_drop(skb);
2569 2554
2570 features = netif_skb_features(skb); 2555 features = netif_skb_features(skb);
2571 2556
2572 if (vlan_tx_tag_present(skb) && 2557 if (vlan_tx_tag_present(skb) &&
2573 !vlan_hw_offload_capable(features, skb->vlan_proto)) { 2558 !vlan_hw_offload_capable(features, skb->vlan_proto)) {
2574 skb = __vlan_put_tag(skb, skb->vlan_proto, 2559 skb = __vlan_put_tag(skb, skb->vlan_proto,
2575 vlan_tx_tag_get(skb)); 2560 vlan_tx_tag_get(skb));
2576 if (unlikely(!skb)) 2561 if (unlikely(!skb))
2577 goto out; 2562 goto out;
2578 2563
2579 skb->vlan_tci = 0; 2564 skb->vlan_tci = 0;
2580 } 2565 }
2581 2566
2582 /* If encapsulation offload request, verify we are testing 2567 /* If encapsulation offload request, verify we are testing
2583 * hardware encapsulation features instead of standard 2568 * hardware encapsulation features instead of standard
2584 * features for the netdev 2569 * features for the netdev
2585 */ 2570 */
2586 if (skb->encapsulation) 2571 if (skb->encapsulation)
2587 features &= dev->hw_enc_features; 2572 features &= dev->hw_enc_features;
2588 2573
2589 if (netif_needs_gso(skb, features)) { 2574 if (netif_needs_gso(skb, features)) {
2590 if (unlikely(dev_gso_segment(skb, features))) 2575 if (unlikely(dev_gso_segment(skb, features)))
2591 goto out_kfree_skb; 2576 goto out_kfree_skb;
2592 if (skb->next) 2577 if (skb->next)
2593 goto gso; 2578 goto gso;
2594 } else { 2579 } else {
2595 if (skb_needs_linearize(skb, features) && 2580 if (skb_needs_linearize(skb, features) &&
2596 __skb_linearize(skb)) 2581 __skb_linearize(skb))
2597 goto out_kfree_skb; 2582 goto out_kfree_skb;
2598 2583
2599 /* If packet is not checksummed and device does not 2584 /* If packet is not checksummed and device does not
2600 * support checksumming for this protocol, complete 2585 * support checksumming for this protocol, complete
2601 * checksumming here. 2586 * checksumming here.
2602 */ 2587 */
2603 if (skb->ip_summed == CHECKSUM_PARTIAL) { 2588 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2604 if (skb->encapsulation) 2589 if (skb->encapsulation)
2605 skb_set_inner_transport_header(skb, 2590 skb_set_inner_transport_header(skb,
2606 skb_checksum_start_offset(skb)); 2591 skb_checksum_start_offset(skb));
2607 else 2592 else
2608 skb_set_transport_header(skb, 2593 skb_set_transport_header(skb,
2609 skb_checksum_start_offset(skb)); 2594 skb_checksum_start_offset(skb));
2610 if (!(features & NETIF_F_ALL_CSUM) && 2595 if (!(features & NETIF_F_ALL_CSUM) &&
2611 skb_checksum_help(skb)) 2596 skb_checksum_help(skb))
2612 goto out_kfree_skb; 2597 goto out_kfree_skb;
2613 } 2598 }
2614 } 2599 }
2615 2600
2616 if (!list_empty(&ptype_all)) 2601 if (!list_empty(&ptype_all))
2617 dev_queue_xmit_nit(skb, dev); 2602 dev_queue_xmit_nit(skb, dev);
2618 2603
2619 skb_len = skb->len; 2604 skb_len = skb->len;
2620 if (accel_priv) 2605 if (accel_priv)
2621 rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv); 2606 rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv);
2622 else 2607 else
2623 rc = ops->ndo_start_xmit(skb, dev); 2608 rc = ops->ndo_start_xmit(skb, dev);
2624 2609
2625 trace_net_dev_xmit(skb, rc, dev, skb_len); 2610 trace_net_dev_xmit(skb, rc, dev, skb_len);
2626 if (rc == NETDEV_TX_OK && txq) 2611 if (rc == NETDEV_TX_OK && txq)
2627 txq_trans_update(txq); 2612 txq_trans_update(txq);
2628 return rc; 2613 return rc;
2629 } 2614 }
2630 2615
2631 gso: 2616 gso:
2632 do { 2617 do {
2633 struct sk_buff *nskb = skb->next; 2618 struct sk_buff *nskb = skb->next;
2634 2619
2635 skb->next = nskb->next; 2620 skb->next = nskb->next;
2636 nskb->next = NULL; 2621 nskb->next = NULL;
2637 2622
2638 if (!list_empty(&ptype_all)) 2623 if (!list_empty(&ptype_all))
2639 dev_queue_xmit_nit(nskb, dev); 2624 dev_queue_xmit_nit(nskb, dev);
2640 2625
2641 skb_len = nskb->len; 2626 skb_len = nskb->len;
2642 if (accel_priv) 2627 if (accel_priv)
2643 rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv); 2628 rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv);
2644 else 2629 else
2645 rc = ops->ndo_start_xmit(nskb, dev); 2630 rc = ops->ndo_start_xmit(nskb, dev);
2646 trace_net_dev_xmit(nskb, rc, dev, skb_len); 2631 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2647 if (unlikely(rc != NETDEV_TX_OK)) { 2632 if (unlikely(rc != NETDEV_TX_OK)) {
2648 if (rc & ~NETDEV_TX_MASK) 2633 if (rc & ~NETDEV_TX_MASK)
2649 goto out_kfree_gso_skb; 2634 goto out_kfree_gso_skb;
2650 nskb->next = skb->next; 2635 nskb->next = skb->next;
2651 skb->next = nskb; 2636 skb->next = nskb;
2652 return rc; 2637 return rc;
2653 } 2638 }
2654 txq_trans_update(txq); 2639 txq_trans_update(txq);
2655 if (unlikely(netif_xmit_stopped(txq) && skb->next)) 2640 if (unlikely(netif_xmit_stopped(txq) && skb->next))
2656 return NETDEV_TX_BUSY; 2641 return NETDEV_TX_BUSY;
2657 } while (skb->next); 2642 } while (skb->next);
2658 2643
2659 out_kfree_gso_skb: 2644 out_kfree_gso_skb:
2660 if (likely(skb->next == NULL)) { 2645 if (likely(skb->next == NULL)) {
2661 skb->destructor = DEV_GSO_CB(skb)->destructor; 2646 skb->destructor = DEV_GSO_CB(skb)->destructor;
2662 consume_skb(skb); 2647 consume_skb(skb);
2663 return rc; 2648 return rc;
2664 } 2649 }
2665 out_kfree_skb: 2650 out_kfree_skb:
2666 kfree_skb(skb); 2651 kfree_skb(skb);
2667 out: 2652 out:
2668 return rc; 2653 return rc;
2669 } 2654 }
2670 EXPORT_SYMBOL_GPL(dev_hard_start_xmit); 2655 EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
2671 2656
2672 static void qdisc_pkt_len_init(struct sk_buff *skb) 2657 static void qdisc_pkt_len_init(struct sk_buff *skb)
2673 { 2658 {
2674 const struct skb_shared_info *shinfo = skb_shinfo(skb); 2659 const struct skb_shared_info *shinfo = skb_shinfo(skb);
2675 2660
2676 qdisc_skb_cb(skb)->pkt_len = skb->len; 2661 qdisc_skb_cb(skb)->pkt_len = skb->len;
2677 2662
2678 /* To get more precise estimation of bytes sent on wire, 2663 /* To get more precise estimation of bytes sent on wire,
2679 * we add to pkt_len the headers size of all segments 2664 * we add to pkt_len the headers size of all segments
2680 */ 2665 */
2681 if (shinfo->gso_size) { 2666 if (shinfo->gso_size) {
2682 unsigned int hdr_len; 2667 unsigned int hdr_len;
2683 u16 gso_segs = shinfo->gso_segs; 2668 u16 gso_segs = shinfo->gso_segs;
2684 2669
2685 /* mac layer + network layer */ 2670 /* mac layer + network layer */
2686 hdr_len = skb_transport_header(skb) - skb_mac_header(skb); 2671 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
2687 2672
2688 /* + transport layer */ 2673 /* + transport layer */
2689 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) 2674 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
2690 hdr_len += tcp_hdrlen(skb); 2675 hdr_len += tcp_hdrlen(skb);
2691 else 2676 else
2692 hdr_len += sizeof(struct udphdr); 2677 hdr_len += sizeof(struct udphdr);
2693 2678
2694 if (shinfo->gso_type & SKB_GSO_DODGY) 2679 if (shinfo->gso_type & SKB_GSO_DODGY)
2695 gso_segs = DIV_ROUND_UP(skb->len - hdr_len, 2680 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
2696 shinfo->gso_size); 2681 shinfo->gso_size);
2697 2682
2698 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; 2683 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
2699 } 2684 }
2700 } 2685 }
2701 2686
2702 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, 2687 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2703 struct net_device *dev, 2688 struct net_device *dev,
2704 struct netdev_queue *txq) 2689 struct netdev_queue *txq)
2705 { 2690 {
2706 spinlock_t *root_lock = qdisc_lock(q); 2691 spinlock_t *root_lock = qdisc_lock(q);
2707 bool contended; 2692 bool contended;
2708 int rc; 2693 int rc;
2709 2694
2710 qdisc_pkt_len_init(skb); 2695 qdisc_pkt_len_init(skb);
2711 qdisc_calculate_pkt_len(skb, q); 2696 qdisc_calculate_pkt_len(skb, q);
2712 /* 2697 /*
2713 * Heuristic to force contended enqueues to serialize on a 2698 * Heuristic to force contended enqueues to serialize on a
2714 * separate lock before trying to get qdisc main lock. 2699 * separate lock before trying to get qdisc main lock.
2715 * This permits __QDISC_STATE_RUNNING owner to get the lock more often 2700 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2716 * and dequeue packets faster. 2701 * and dequeue packets faster.
2717 */ 2702 */
2718 contended = qdisc_is_running(q); 2703 contended = qdisc_is_running(q);
2719 if (unlikely(contended)) 2704 if (unlikely(contended))
2720 spin_lock(&q->busylock); 2705 spin_lock(&q->busylock);
2721 2706
2722 spin_lock(root_lock); 2707 spin_lock(root_lock);
2723 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { 2708 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2724 kfree_skb(skb); 2709 kfree_skb(skb);
2725 rc = NET_XMIT_DROP; 2710 rc = NET_XMIT_DROP;
2726 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && 2711 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2727 qdisc_run_begin(q)) { 2712 qdisc_run_begin(q)) {
2728 /* 2713 /*
2729 * This is a work-conserving queue; there are no old skbs 2714 * This is a work-conserving queue; there are no old skbs
2730 * waiting to be sent out; and the qdisc is not running - 2715 * waiting to be sent out; and the qdisc is not running -
2731 * xmit the skb directly. 2716 * xmit the skb directly.
2732 */ 2717 */
2733 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) 2718 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2734 skb_dst_force(skb); 2719 skb_dst_force(skb);
2735 2720
2736 qdisc_bstats_update(q, skb); 2721 qdisc_bstats_update(q, skb);
2737 2722
2738 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { 2723 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2739 if (unlikely(contended)) { 2724 if (unlikely(contended)) {
2740 spin_unlock(&q->busylock); 2725 spin_unlock(&q->busylock);
2741 contended = false; 2726 contended = false;
2742 } 2727 }
2743 __qdisc_run(q); 2728 __qdisc_run(q);
2744 } else 2729 } else
2745 qdisc_run_end(q); 2730 qdisc_run_end(q);
2746 2731
2747 rc = NET_XMIT_SUCCESS; 2732 rc = NET_XMIT_SUCCESS;
2748 } else { 2733 } else {
2749 skb_dst_force(skb); 2734 skb_dst_force(skb);
2750 rc = q->enqueue(skb, q) & NET_XMIT_MASK; 2735 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2751 if (qdisc_run_begin(q)) { 2736 if (qdisc_run_begin(q)) {
2752 if (unlikely(contended)) { 2737 if (unlikely(contended)) {
2753 spin_unlock(&q->busylock); 2738 spin_unlock(&q->busylock);
2754 contended = false; 2739 contended = false;
2755 } 2740 }
2756 __qdisc_run(q); 2741 __qdisc_run(q);
2757 } 2742 }
2758 } 2743 }
2759 spin_unlock(root_lock); 2744 spin_unlock(root_lock);
2760 if (unlikely(contended)) 2745 if (unlikely(contended))
2761 spin_unlock(&q->busylock); 2746 spin_unlock(&q->busylock);
2762 return rc; 2747 return rc;
2763 } 2748 }
2764 2749
2765 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) 2750 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
2766 static void skb_update_prio(struct sk_buff *skb) 2751 static void skb_update_prio(struct sk_buff *skb)
2767 { 2752 {
2768 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); 2753 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
2769 2754
2770 if (!skb->priority && skb->sk && map) { 2755 if (!skb->priority && skb->sk && map) {
2771 unsigned int prioidx = skb->sk->sk_cgrp_prioidx; 2756 unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
2772 2757
2773 if (prioidx < map->priomap_len) 2758 if (prioidx < map->priomap_len)
2774 skb->priority = map->priomap[prioidx]; 2759 skb->priority = map->priomap[prioidx];
2775 } 2760 }
2776 } 2761 }
2777 #else 2762 #else
2778 #define skb_update_prio(skb) 2763 #define skb_update_prio(skb)
2779 #endif 2764 #endif
2780 2765
2781 static DEFINE_PER_CPU(int, xmit_recursion); 2766 static DEFINE_PER_CPU(int, xmit_recursion);
2782 #define RECURSION_LIMIT 10 2767 #define RECURSION_LIMIT 10
2783 2768
2784 /** 2769 /**
2785 * dev_loopback_xmit - loop back @skb 2770 * dev_loopback_xmit - loop back @skb
2786 * @skb: buffer to transmit 2771 * @skb: buffer to transmit
2787 */ 2772 */
2788 int dev_loopback_xmit(struct sk_buff *skb) 2773 int dev_loopback_xmit(struct sk_buff *skb)
2789 { 2774 {
2790 skb_reset_mac_header(skb); 2775 skb_reset_mac_header(skb);
2791 __skb_pull(skb, skb_network_offset(skb)); 2776 __skb_pull(skb, skb_network_offset(skb));
2792 skb->pkt_type = PACKET_LOOPBACK; 2777 skb->pkt_type = PACKET_LOOPBACK;
2793 skb->ip_summed = CHECKSUM_UNNECESSARY; 2778 skb->ip_summed = CHECKSUM_UNNECESSARY;
2794 WARN_ON(!skb_dst(skb)); 2779 WARN_ON(!skb_dst(skb));
2795 skb_dst_force(skb); 2780 skb_dst_force(skb);
2796 netif_rx_ni(skb); 2781 netif_rx_ni(skb);
2797 return 0; 2782 return 0;
2798 } 2783 }
2799 EXPORT_SYMBOL(dev_loopback_xmit); 2784 EXPORT_SYMBOL(dev_loopback_xmit);
2800 2785
2801 /** 2786 /**
2802 * dev_queue_xmit - transmit a buffer 2787 * dev_queue_xmit - transmit a buffer
2803 * @skb: buffer to transmit 2788 * @skb: buffer to transmit
2804 * 2789 *
2805 * Queue a buffer for transmission to a network device. The caller must 2790 * Queue a buffer for transmission to a network device. The caller must
2806 * have set the device and priority and built the buffer before calling 2791 * have set the device and priority and built the buffer before calling
2807 * this function. The function can be called from an interrupt. 2792 * this function. The function can be called from an interrupt.
2808 * 2793 *
2809 * A negative errno code is returned on a failure. A success does not 2794 * A negative errno code is returned on a failure. A success does not
2810 * guarantee the frame will be transmitted as it may be dropped due 2795 * guarantee the frame will be transmitted as it may be dropped due
2811 * to congestion or traffic shaping. 2796 * to congestion or traffic shaping.
2812 * 2797 *
2813 * ----------------------------------------------------------------------------------- 2798 * -----------------------------------------------------------------------------------
2814 * I notice this method can also return errors from the queue disciplines, 2799 * I notice this method can also return errors from the queue disciplines,
2815 * including NET_XMIT_DROP, which is a positive value. So, errors can also 2800 * including NET_XMIT_DROP, which is a positive value. So, errors can also
2816 * be positive. 2801 * be positive.
2817 * 2802 *
2818 * Regardless of the return value, the skb is consumed, so it is currently 2803 * Regardless of the return value, the skb is consumed, so it is currently
2819 * difficult to retry a send to this method. (You can bump the ref count 2804 * difficult to retry a send to this method. (You can bump the ref count
2820 * before sending to hold a reference for retry if you are careful.) 2805 * before sending to hold a reference for retry if you are careful.)
2821 * 2806 *
2822 * When calling this method, interrupts MUST be enabled. This is because 2807 * When calling this method, interrupts MUST be enabled. This is because
2823 * the BH enable code must have IRQs enabled so that it will not deadlock. 2808 * the BH enable code must have IRQs enabled so that it will not deadlock.
2824 * --BLG 2809 * --BLG
2825 */ 2810 */
2826 int dev_queue_xmit(struct sk_buff *skb) 2811 int dev_queue_xmit(struct sk_buff *skb)
2827 { 2812 {
2828 struct net_device *dev = skb->dev; 2813 struct net_device *dev = skb->dev;
2829 struct netdev_queue *txq; 2814 struct netdev_queue *txq;
2830 struct Qdisc *q; 2815 struct Qdisc *q;
2831 int rc = -ENOMEM; 2816 int rc = -ENOMEM;
2832 2817
2833 skb_reset_mac_header(skb); 2818 skb_reset_mac_header(skb);
2834 2819
2835 /* Disable soft irqs for various locks below. Also 2820 /* Disable soft irqs for various locks below. Also
2836 * stops preemption for RCU. 2821 * stops preemption for RCU.
2837 */ 2822 */
2838 rcu_read_lock_bh(); 2823 rcu_read_lock_bh();
2839 2824
2840 skb_update_prio(skb); 2825 skb_update_prio(skb);
2841 2826
2842 txq = netdev_pick_tx(dev, skb); 2827 txq = netdev_pick_tx(dev, skb);
2843 q = rcu_dereference_bh(txq->qdisc); 2828 q = rcu_dereference_bh(txq->qdisc);
2844 2829
2845 #ifdef CONFIG_NET_CLS_ACT 2830 #ifdef CONFIG_NET_CLS_ACT
2846 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); 2831 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2847 #endif 2832 #endif
2848 trace_net_dev_queue(skb); 2833 trace_net_dev_queue(skb);
2849 if (q->enqueue) { 2834 if (q->enqueue) {
2850 rc = __dev_xmit_skb(skb, q, dev, txq); 2835 rc = __dev_xmit_skb(skb, q, dev, txq);
2851 goto out; 2836 goto out;
2852 } 2837 }
2853 2838
2854 /* The device has no queue. Common case for software devices: 2839 /* The device has no queue. Common case for software devices:
2855 loopback, all the sorts of tunnels... 2840 loopback, all the sorts of tunnels...
2856 2841
2857 Really, it is unlikely that netif_tx_lock protection is necessary 2842 Really, it is unlikely that netif_tx_lock protection is necessary
2858 here. (f.e. loopback and IP tunnels are clean ignoring statistics 2843 here. (f.e. loopback and IP tunnels are clean ignoring statistics
2859 counters.) 2844 counters.)
2860 However, it is possible, that they rely on protection 2845 However, it is possible, that they rely on protection
2861 made by us here. 2846 made by us here.
2862 2847
2863 Check this and shot the lock. It is not prone from deadlocks. 2848 Check this and shot the lock. It is not prone from deadlocks.
2864 Either shot noqueue qdisc, it is even simpler 8) 2849 Either shot noqueue qdisc, it is even simpler 8)
2865 */ 2850 */
2866 if (dev->flags & IFF_UP) { 2851 if (dev->flags & IFF_UP) {
2867 int cpu = smp_processor_id(); /* ok because BHs are off */ 2852 int cpu = smp_processor_id(); /* ok because BHs are off */
2868 2853
2869 if (txq->xmit_lock_owner != cpu) { 2854 if (txq->xmit_lock_owner != cpu) {
2870 2855
2871 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) 2856 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2872 goto recursion_alert; 2857 goto recursion_alert;
2873 2858
2874 HARD_TX_LOCK(dev, txq, cpu); 2859 HARD_TX_LOCK(dev, txq, cpu);
2875 2860
2876 if (!netif_xmit_stopped(txq)) { 2861 if (!netif_xmit_stopped(txq)) {
2877 __this_cpu_inc(xmit_recursion); 2862 __this_cpu_inc(xmit_recursion);
2878 rc = dev_hard_start_xmit(skb, dev, txq, NULL); 2863 rc = dev_hard_start_xmit(skb, dev, txq, NULL);
2879 __this_cpu_dec(xmit_recursion); 2864 __this_cpu_dec(xmit_recursion);
2880 if (dev_xmit_complete(rc)) { 2865 if (dev_xmit_complete(rc)) {
2881 HARD_TX_UNLOCK(dev, txq); 2866 HARD_TX_UNLOCK(dev, txq);
2882 goto out; 2867 goto out;
2883 } 2868 }
2884 } 2869 }
2885 HARD_TX_UNLOCK(dev, txq); 2870 HARD_TX_UNLOCK(dev, txq);
2886 net_crit_ratelimited("Virtual device %s asks to queue packet!\n", 2871 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
2887 dev->name); 2872 dev->name);
2888 } else { 2873 } else {
2889 /* Recursion is detected! It is possible, 2874 /* Recursion is detected! It is possible,
2890 * unfortunately 2875 * unfortunately
2891 */ 2876 */
2892 recursion_alert: 2877 recursion_alert:
2893 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", 2878 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
2894 dev->name); 2879 dev->name);
2895 } 2880 }
2896 } 2881 }
2897 2882
2898 rc = -ENETDOWN; 2883 rc = -ENETDOWN;
2899 rcu_read_unlock_bh(); 2884 rcu_read_unlock_bh();
2900 2885
2901 kfree_skb(skb); 2886 kfree_skb(skb);
2902 return rc; 2887 return rc;
2903 out: 2888 out:
2904 rcu_read_unlock_bh(); 2889 rcu_read_unlock_bh();
2905 return rc; 2890 return rc;
2906 } 2891 }
2907 EXPORT_SYMBOL(dev_queue_xmit); 2892 EXPORT_SYMBOL(dev_queue_xmit);
2908 2893
2909 2894
2910 /*======================================================================= 2895 /*=======================================================================
2911 Receiver routines 2896 Receiver routines
2912 =======================================================================*/ 2897 =======================================================================*/
2913 2898
2914 int netdev_max_backlog __read_mostly = 1000; 2899 int netdev_max_backlog __read_mostly = 1000;
2915 EXPORT_SYMBOL(netdev_max_backlog); 2900 EXPORT_SYMBOL(netdev_max_backlog);
2916 2901
2917 int netdev_tstamp_prequeue __read_mostly = 1; 2902 int netdev_tstamp_prequeue __read_mostly = 1;
2918 int netdev_budget __read_mostly = 300; 2903 int netdev_budget __read_mostly = 300;
2919 int weight_p __read_mostly = 64; /* old backlog weight */ 2904 int weight_p __read_mostly = 64; /* old backlog weight */
2920 2905
2921 /* Called with irq disabled */ 2906 /* Called with irq disabled */
2922 static inline void ____napi_schedule(struct softnet_data *sd, 2907 static inline void ____napi_schedule(struct softnet_data *sd,
2923 struct napi_struct *napi) 2908 struct napi_struct *napi)
2924 { 2909 {
2925 list_add_tail(&napi->poll_list, &sd->poll_list); 2910 list_add_tail(&napi->poll_list, &sd->poll_list);
2926 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2911 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2927 } 2912 }
2928 2913
2929 #ifdef CONFIG_RPS 2914 #ifdef CONFIG_RPS
2930 2915
2931 /* One global table that all flow-based protocols share. */ 2916 /* One global table that all flow-based protocols share. */
2932 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; 2917 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2933 EXPORT_SYMBOL(rps_sock_flow_table); 2918 EXPORT_SYMBOL(rps_sock_flow_table);
2934 2919
2935 struct static_key rps_needed __read_mostly; 2920 struct static_key rps_needed __read_mostly;
2936 2921
2937 static struct rps_dev_flow * 2922 static struct rps_dev_flow *
2938 set_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2923 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2939 struct rps_dev_flow *rflow, u16 next_cpu) 2924 struct rps_dev_flow *rflow, u16 next_cpu)
2940 { 2925 {
2941 if (next_cpu != RPS_NO_CPU) { 2926 if (next_cpu != RPS_NO_CPU) {
2942 #ifdef CONFIG_RFS_ACCEL 2927 #ifdef CONFIG_RFS_ACCEL
2943 struct netdev_rx_queue *rxqueue; 2928 struct netdev_rx_queue *rxqueue;
2944 struct rps_dev_flow_table *flow_table; 2929 struct rps_dev_flow_table *flow_table;
2945 struct rps_dev_flow *old_rflow; 2930 struct rps_dev_flow *old_rflow;
2946 u32 flow_id; 2931 u32 flow_id;
2947 u16 rxq_index; 2932 u16 rxq_index;
2948 int rc; 2933 int rc;
2949 2934
2950 /* Should we steer this flow to a different hardware queue? */ 2935 /* Should we steer this flow to a different hardware queue? */
2951 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || 2936 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2952 !(dev->features & NETIF_F_NTUPLE)) 2937 !(dev->features & NETIF_F_NTUPLE))
2953 goto out; 2938 goto out;
2954 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); 2939 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2955 if (rxq_index == skb_get_rx_queue(skb)) 2940 if (rxq_index == skb_get_rx_queue(skb))
2956 goto out; 2941 goto out;
2957 2942
2958 rxqueue = dev->_rx + rxq_index; 2943 rxqueue = dev->_rx + rxq_index;
2959 flow_table = rcu_dereference(rxqueue->rps_flow_table); 2944 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2960 if (!flow_table) 2945 if (!flow_table)
2961 goto out; 2946 goto out;
2962 flow_id = skb->rxhash & flow_table->mask; 2947 flow_id = skb->rxhash & flow_table->mask;
2963 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, 2948 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2964 rxq_index, flow_id); 2949 rxq_index, flow_id);
2965 if (rc < 0) 2950 if (rc < 0)
2966 goto out; 2951 goto out;
2967 old_rflow = rflow; 2952 old_rflow = rflow;
2968 rflow = &flow_table->flows[flow_id]; 2953 rflow = &flow_table->flows[flow_id];
2969 rflow->filter = rc; 2954 rflow->filter = rc;
2970 if (old_rflow->filter == rflow->filter) 2955 if (old_rflow->filter == rflow->filter)
2971 old_rflow->filter = RPS_NO_FILTER; 2956 old_rflow->filter = RPS_NO_FILTER;
2972 out: 2957 out:
2973 #endif 2958 #endif
2974 rflow->last_qtail = 2959 rflow->last_qtail =
2975 per_cpu(softnet_data, next_cpu).input_queue_head; 2960 per_cpu(softnet_data, next_cpu).input_queue_head;
2976 } 2961 }
2977 2962
2978 rflow->cpu = next_cpu; 2963 rflow->cpu = next_cpu;
2979 return rflow; 2964 return rflow;
2980 } 2965 }
2981 2966
2982 /* 2967 /*
2983 * get_rps_cpu is called from netif_receive_skb and returns the target 2968 * get_rps_cpu is called from netif_receive_skb and returns the target
2984 * CPU from the RPS map of the receiving queue for a given skb. 2969 * CPU from the RPS map of the receiving queue for a given skb.
2985 * rcu_read_lock must be held on entry. 2970 * rcu_read_lock must be held on entry.
2986 */ 2971 */
2987 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2972 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2988 struct rps_dev_flow **rflowp) 2973 struct rps_dev_flow **rflowp)
2989 { 2974 {
2990 struct netdev_rx_queue *rxqueue; 2975 struct netdev_rx_queue *rxqueue;
2991 struct rps_map *map; 2976 struct rps_map *map;
2992 struct rps_dev_flow_table *flow_table; 2977 struct rps_dev_flow_table *flow_table;
2993 struct rps_sock_flow_table *sock_flow_table; 2978 struct rps_sock_flow_table *sock_flow_table;
2994 int cpu = -1; 2979 int cpu = -1;
2995 u16 tcpu; 2980 u16 tcpu;
2996 2981
2997 if (skb_rx_queue_recorded(skb)) { 2982 if (skb_rx_queue_recorded(skb)) {
2998 u16 index = skb_get_rx_queue(skb); 2983 u16 index = skb_get_rx_queue(skb);
2999 if (unlikely(index >= dev->real_num_rx_queues)) { 2984 if (unlikely(index >= dev->real_num_rx_queues)) {
3000 WARN_ONCE(dev->real_num_rx_queues > 1, 2985 WARN_ONCE(dev->real_num_rx_queues > 1,
3001 "%s received packet on queue %u, but number " 2986 "%s received packet on queue %u, but number "
3002 "of RX queues is %u\n", 2987 "of RX queues is %u\n",
3003 dev->name, index, dev->real_num_rx_queues); 2988 dev->name, index, dev->real_num_rx_queues);
3004 goto done; 2989 goto done;
3005 } 2990 }
3006 rxqueue = dev->_rx + index; 2991 rxqueue = dev->_rx + index;
3007 } else 2992 } else
3008 rxqueue = dev->_rx; 2993 rxqueue = dev->_rx;
3009 2994
3010 map = rcu_dereference(rxqueue->rps_map); 2995 map = rcu_dereference(rxqueue->rps_map);
3011 if (map) { 2996 if (map) {
3012 if (map->len == 1 && 2997 if (map->len == 1 &&
3013 !rcu_access_pointer(rxqueue->rps_flow_table)) { 2998 !rcu_access_pointer(rxqueue->rps_flow_table)) {
3014 tcpu = map->cpus[0]; 2999 tcpu = map->cpus[0];
3015 if (cpu_online(tcpu)) 3000 if (cpu_online(tcpu))
3016 cpu = tcpu; 3001 cpu = tcpu;
3017 goto done; 3002 goto done;
3018 } 3003 }
3019 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) { 3004 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
3020 goto done; 3005 goto done;
3021 } 3006 }
3022 3007
3023 skb_reset_network_header(skb); 3008 skb_reset_network_header(skb);
3024 if (!skb_get_rxhash(skb)) 3009 if (!skb_get_rxhash(skb))
3025 goto done; 3010 goto done;
3026 3011
3027 flow_table = rcu_dereference(rxqueue->rps_flow_table); 3012 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3028 sock_flow_table = rcu_dereference(rps_sock_flow_table); 3013 sock_flow_table = rcu_dereference(rps_sock_flow_table);
3029 if (flow_table && sock_flow_table) { 3014 if (flow_table && sock_flow_table) {
3030 u16 next_cpu; 3015 u16 next_cpu;
3031 struct rps_dev_flow *rflow; 3016 struct rps_dev_flow *rflow;
3032 3017
3033 rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; 3018 rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
3034 tcpu = rflow->cpu; 3019 tcpu = rflow->cpu;
3035 3020
3036 next_cpu = sock_flow_table->ents[skb->rxhash & 3021 next_cpu = sock_flow_table->ents[skb->rxhash &
3037 sock_flow_table->mask]; 3022 sock_flow_table->mask];
3038 3023
3039 /* 3024 /*
3040 * If the desired CPU (where last recvmsg was done) is 3025 * If the desired CPU (where last recvmsg was done) is
3041 * different from current CPU (one in the rx-queue flow 3026 * different from current CPU (one in the rx-queue flow
3042 * table entry), switch if one of the following holds: 3027 * table entry), switch if one of the following holds:
3043 * - Current CPU is unset (equal to RPS_NO_CPU). 3028 * - Current CPU is unset (equal to RPS_NO_CPU).
3044 * - Current CPU is offline. 3029 * - Current CPU is offline.
3045 * - The current CPU's queue tail has advanced beyond the 3030 * - The current CPU's queue tail has advanced beyond the
3046 * last packet that was enqueued using this table entry. 3031 * last packet that was enqueued using this table entry.
3047 * This guarantees that all previous packets for the flow 3032 * This guarantees that all previous packets for the flow
3048 * have been dequeued, thus preserving in order delivery. 3033 * have been dequeued, thus preserving in order delivery.
3049 */ 3034 */
3050 if (unlikely(tcpu != next_cpu) && 3035 if (unlikely(tcpu != next_cpu) &&
3051 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || 3036 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
3052 ((int)(per_cpu(softnet_data, tcpu).input_queue_head - 3037 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
3053 rflow->last_qtail)) >= 0)) { 3038 rflow->last_qtail)) >= 0)) {
3054 tcpu = next_cpu; 3039 tcpu = next_cpu;
3055 rflow = set_rps_cpu(dev, skb, rflow, next_cpu); 3040 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
3056 } 3041 }
3057 3042
3058 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { 3043 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
3059 *rflowp = rflow; 3044 *rflowp = rflow;
3060 cpu = tcpu; 3045 cpu = tcpu;
3061 goto done; 3046 goto done;
3062 } 3047 }
3063 } 3048 }
3064 3049
3065 if (map) { 3050 if (map) {
3066 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; 3051 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
3067 3052
3068 if (cpu_online(tcpu)) { 3053 if (cpu_online(tcpu)) {
3069 cpu = tcpu; 3054 cpu = tcpu;
3070 goto done; 3055 goto done;
3071 } 3056 }
3072 } 3057 }
3073 3058
3074 done: 3059 done:
3075 return cpu; 3060 return cpu;
3076 } 3061 }
3077 3062
3078 #ifdef CONFIG_RFS_ACCEL 3063 #ifdef CONFIG_RFS_ACCEL
3079 3064
3080 /** 3065 /**
3081 * rps_may_expire_flow - check whether an RFS hardware filter may be removed 3066 * rps_may_expire_flow - check whether an RFS hardware filter may be removed
3082 * @dev: Device on which the filter was set 3067 * @dev: Device on which the filter was set
3083 * @rxq_index: RX queue index 3068 * @rxq_index: RX queue index
3084 * @flow_id: Flow ID passed to ndo_rx_flow_steer() 3069 * @flow_id: Flow ID passed to ndo_rx_flow_steer()
3085 * @filter_id: Filter ID returned by ndo_rx_flow_steer() 3070 * @filter_id: Filter ID returned by ndo_rx_flow_steer()
3086 * 3071 *
3087 * Drivers that implement ndo_rx_flow_steer() should periodically call 3072 * Drivers that implement ndo_rx_flow_steer() should periodically call
3088 * this function for each installed filter and remove the filters for 3073 * this function for each installed filter and remove the filters for
3089 * which it returns %true. 3074 * which it returns %true.
3090 */ 3075 */
3091 bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, 3076 bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
3092 u32 flow_id, u16 filter_id) 3077 u32 flow_id, u16 filter_id)
3093 { 3078 {
3094 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; 3079 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
3095 struct rps_dev_flow_table *flow_table; 3080 struct rps_dev_flow_table *flow_table;
3096 struct rps_dev_flow *rflow; 3081 struct rps_dev_flow *rflow;
3097 bool expire = true; 3082 bool expire = true;
3098 int cpu; 3083 int cpu;
3099 3084
3100 rcu_read_lock(); 3085 rcu_read_lock();
3101 flow_table = rcu_dereference(rxqueue->rps_flow_table); 3086 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3102 if (flow_table && flow_id <= flow_table->mask) { 3087 if (flow_table && flow_id <= flow_table->mask) {
3103 rflow = &flow_table->flows[flow_id]; 3088 rflow = &flow_table->flows[flow_id];
3104 cpu = ACCESS_ONCE(rflow->cpu); 3089 cpu = ACCESS_ONCE(rflow->cpu);
3105 if (rflow->filter == filter_id && cpu != RPS_NO_CPU && 3090 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
3106 ((int)(per_cpu(softnet_data, cpu).input_queue_head - 3091 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
3107 rflow->last_qtail) < 3092 rflow->last_qtail) <
3108 (int)(10 * flow_table->mask))) 3093 (int)(10 * flow_table->mask)))
3109 expire = false; 3094 expire = false;
3110 } 3095 }
3111 rcu_read_unlock(); 3096 rcu_read_unlock();
3112 return expire; 3097 return expire;
3113 } 3098 }
3114 EXPORT_SYMBOL(rps_may_expire_flow); 3099 EXPORT_SYMBOL(rps_may_expire_flow);
3115 3100
3116 #endif /* CONFIG_RFS_ACCEL */ 3101 #endif /* CONFIG_RFS_ACCEL */
3117 3102
3118 /* Called from hardirq (IPI) context */ 3103 /* Called from hardirq (IPI) context */
3119 static void rps_trigger_softirq(void *data) 3104 static void rps_trigger_softirq(void *data)
3120 { 3105 {
3121 struct softnet_data *sd = data; 3106 struct softnet_data *sd = data;
3122 3107
3123 ____napi_schedule(sd, &sd->backlog); 3108 ____napi_schedule(sd, &sd->backlog);
3124 sd->received_rps++; 3109 sd->received_rps++;
3125 } 3110 }
3126 3111
3127 #endif /* CONFIG_RPS */ 3112 #endif /* CONFIG_RPS */
3128 3113
3129 /* 3114 /*
3130 * Check if this softnet_data structure is another cpu one 3115 * Check if this softnet_data structure is another cpu one
3131 * If yes, queue it to our IPI list and return 1 3116 * If yes, queue it to our IPI list and return 1
3132 * If no, return 0 3117 * If no, return 0
3133 */ 3118 */
3134 static int rps_ipi_queued(struct softnet_data *sd) 3119 static int rps_ipi_queued(struct softnet_data *sd)
3135 { 3120 {
3136 #ifdef CONFIG_RPS 3121 #ifdef CONFIG_RPS
3137 struct softnet_data *mysd = &__get_cpu_var(softnet_data); 3122 struct softnet_data *mysd = &__get_cpu_var(softnet_data);
3138 3123
3139 if (sd != mysd) { 3124 if (sd != mysd) {
3140 sd->rps_ipi_next = mysd->rps_ipi_list; 3125 sd->rps_ipi_next = mysd->rps_ipi_list;
3141 mysd->rps_ipi_list = sd; 3126 mysd->rps_ipi_list = sd;
3142 3127
3143 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 3128 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3144 return 1; 3129 return 1;
3145 } 3130 }
3146 #endif /* CONFIG_RPS */ 3131 #endif /* CONFIG_RPS */
3147 return 0; 3132 return 0;
3148 } 3133 }
3149 3134
3150 #ifdef CONFIG_NET_FLOW_LIMIT 3135 #ifdef CONFIG_NET_FLOW_LIMIT
3151 int netdev_flow_limit_table_len __read_mostly = (1 << 12); 3136 int netdev_flow_limit_table_len __read_mostly = (1 << 12);
3152 #endif 3137 #endif
3153 3138
3154 static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) 3139 static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3155 { 3140 {
3156 #ifdef CONFIG_NET_FLOW_LIMIT 3141 #ifdef CONFIG_NET_FLOW_LIMIT
3157 struct sd_flow_limit *fl; 3142 struct sd_flow_limit *fl;
3158 struct softnet_data *sd; 3143 struct softnet_data *sd;
3159 unsigned int old_flow, new_flow; 3144 unsigned int old_flow, new_flow;
3160 3145
3161 if (qlen < (netdev_max_backlog >> 1)) 3146 if (qlen < (netdev_max_backlog >> 1))
3162 return false; 3147 return false;
3163 3148
3164 sd = &__get_cpu_var(softnet_data); 3149 sd = &__get_cpu_var(softnet_data);
3165 3150
3166 rcu_read_lock(); 3151 rcu_read_lock();
3167 fl = rcu_dereference(sd->flow_limit); 3152 fl = rcu_dereference(sd->flow_limit);
3168 if (fl) { 3153 if (fl) {
3169 new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); 3154 new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1);
3170 old_flow = fl->history[fl->history_head]; 3155 old_flow = fl->history[fl->history_head];
3171 fl->history[fl->history_head] = new_flow; 3156 fl->history[fl->history_head] = new_flow;
3172 3157
3173 fl->history_head++; 3158 fl->history_head++;
3174 fl->history_head &= FLOW_LIMIT_HISTORY - 1; 3159 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
3175 3160
3176 if (likely(fl->buckets[old_flow])) 3161 if (likely(fl->buckets[old_flow]))
3177 fl->buckets[old_flow]--; 3162 fl->buckets[old_flow]--;
3178 3163
3179 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) { 3164 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
3180 fl->count++; 3165 fl->count++;
3181 rcu_read_unlock(); 3166 rcu_read_unlock();
3182 return true; 3167 return true;
3183 } 3168 }
3184 } 3169 }
3185 rcu_read_unlock(); 3170 rcu_read_unlock();
3186 #endif 3171 #endif
3187 return false; 3172 return false;
3188 } 3173 }
3189 3174
3190 /* 3175 /*
3191 * enqueue_to_backlog is called to queue an skb to a per CPU backlog 3176 * enqueue_to_backlog is called to queue an skb to a per CPU backlog
3192 * queue (may be a remote CPU queue). 3177 * queue (may be a remote CPU queue).
3193 */ 3178 */
3194 static int enqueue_to_backlog(struct sk_buff *skb, int cpu, 3179 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
3195 unsigned int *qtail) 3180 unsigned int *qtail)
3196 { 3181 {
3197 struct softnet_data *sd; 3182 struct softnet_data *sd;
3198 unsigned long flags; 3183 unsigned long flags;
3199 unsigned int qlen; 3184 unsigned int qlen;
3200 3185
3201 sd = &per_cpu(softnet_data, cpu); 3186 sd = &per_cpu(softnet_data, cpu);
3202 3187
3203 local_irq_save(flags); 3188 local_irq_save(flags);
3204 3189
3205 rps_lock(sd); 3190 rps_lock(sd);
3206 qlen = skb_queue_len(&sd->input_pkt_queue); 3191 qlen = skb_queue_len(&sd->input_pkt_queue);
3207 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { 3192 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
3208 if (skb_queue_len(&sd->input_pkt_queue)) { 3193 if (skb_queue_len(&sd->input_pkt_queue)) {
3209 enqueue: 3194 enqueue:
3210 __skb_queue_tail(&sd->input_pkt_queue, skb); 3195 __skb_queue_tail(&sd->input_pkt_queue, skb);
3211 input_queue_tail_incr_save(sd, qtail); 3196 input_queue_tail_incr_save(sd, qtail);
3212 rps_unlock(sd); 3197 rps_unlock(sd);
3213 local_irq_restore(flags); 3198 local_irq_restore(flags);
3214 return NET_RX_SUCCESS; 3199 return NET_RX_SUCCESS;
3215 } 3200 }
3216 3201
3217 /* Schedule NAPI for backlog device 3202 /* Schedule NAPI for backlog device
3218 * We can use non atomic operation since we own the queue lock 3203 * We can use non atomic operation since we own the queue lock
3219 */ 3204 */
3220 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) { 3205 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
3221 if (!rps_ipi_queued(sd)) 3206 if (!rps_ipi_queued(sd))
3222 ____napi_schedule(sd, &sd->backlog); 3207 ____napi_schedule(sd, &sd->backlog);
3223 } 3208 }
3224 goto enqueue; 3209 goto enqueue;
3225 } 3210 }
3226 3211
3227 sd->dropped++; 3212 sd->dropped++;
3228 rps_unlock(sd); 3213 rps_unlock(sd);
3229 3214
3230 local_irq_restore(flags); 3215 local_irq_restore(flags);
3231 3216
3232 atomic_long_inc(&skb->dev->rx_dropped); 3217 atomic_long_inc(&skb->dev->rx_dropped);
3233 kfree_skb(skb); 3218 kfree_skb(skb);
3234 return NET_RX_DROP; 3219 return NET_RX_DROP;
3235 } 3220 }
3236 3221
3237 /** 3222 /**
3238 * netif_rx - post buffer to the network code 3223 * netif_rx - post buffer to the network code
3239 * @skb: buffer to post 3224 * @skb: buffer to post
3240 * 3225 *
3241 * This function receives a packet from a device driver and queues it for 3226 * This function receives a packet from a device driver and queues it for
3242 * the upper (protocol) levels to process. It always succeeds. The buffer 3227 * the upper (protocol) levels to process. It always succeeds. The buffer
3243 * may be dropped during processing for congestion control or by the 3228 * may be dropped during processing for congestion control or by the
3244 * protocol layers. 3229 * protocol layers.
3245 * 3230 *
3246 * return values: 3231 * return values:
3247 * NET_RX_SUCCESS (no congestion) 3232 * NET_RX_SUCCESS (no congestion)
3248 * NET_RX_DROP (packet was dropped) 3233 * NET_RX_DROP (packet was dropped)
3249 * 3234 *
3250 */ 3235 */
3251 3236
3252 int netif_rx(struct sk_buff *skb) 3237 int netif_rx(struct sk_buff *skb)
3253 { 3238 {
3254 int ret; 3239 int ret;
3255 3240
3256 /* if netpoll wants it, pretend we never saw it */ 3241 /* if netpoll wants it, pretend we never saw it */
3257 if (netpoll_rx(skb)) 3242 if (netpoll_rx(skb))
3258 return NET_RX_DROP; 3243 return NET_RX_DROP;
3259 3244
3260 net_timestamp_check(netdev_tstamp_prequeue, skb); 3245 net_timestamp_check(netdev_tstamp_prequeue, skb);
3261 3246
3262 trace_netif_rx(skb); 3247 trace_netif_rx(skb);
3263 #ifdef CONFIG_RPS 3248 #ifdef CONFIG_RPS
3264 if (static_key_false(&rps_needed)) { 3249 if (static_key_false(&rps_needed)) {
3265 struct rps_dev_flow voidflow, *rflow = &voidflow; 3250 struct rps_dev_flow voidflow, *rflow = &voidflow;
3266 int cpu; 3251 int cpu;
3267 3252
3268 preempt_disable(); 3253 preempt_disable();
3269 rcu_read_lock(); 3254 rcu_read_lock();
3270 3255
3271 cpu = get_rps_cpu(skb->dev, skb, &rflow); 3256 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3272 if (cpu < 0) 3257 if (cpu < 0)
3273 cpu = smp_processor_id(); 3258 cpu = smp_processor_id();
3274 3259
3275 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); 3260 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3276 3261
3277 rcu_read_unlock(); 3262 rcu_read_unlock();
3278 preempt_enable(); 3263 preempt_enable();
3279 } else 3264 } else
3280 #endif 3265 #endif
3281 { 3266 {
3282 unsigned int qtail; 3267 unsigned int qtail;
3283 ret = enqueue_to_backlog(skb, get_cpu(), &qtail); 3268 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
3284 put_cpu(); 3269 put_cpu();
3285 } 3270 }
3286 return ret; 3271 return ret;
3287 } 3272 }
3288 EXPORT_SYMBOL(netif_rx); 3273 EXPORT_SYMBOL(netif_rx);
3289 3274
3290 int netif_rx_ni(struct sk_buff *skb) 3275 int netif_rx_ni(struct sk_buff *skb)
3291 { 3276 {
3292 int err; 3277 int err;
3293 3278
3294 preempt_disable(); 3279 preempt_disable();
3295 err = netif_rx(skb); 3280 err = netif_rx(skb);
3296 if (local_softirq_pending()) 3281 if (local_softirq_pending())
3297 do_softirq(); 3282 do_softirq();
3298 preempt_enable(); 3283 preempt_enable();
3299 3284
3300 return err; 3285 return err;
3301 } 3286 }
3302 EXPORT_SYMBOL(netif_rx_ni); 3287 EXPORT_SYMBOL(netif_rx_ni);
3303 3288
3304 static void net_tx_action(struct softirq_action *h) 3289 static void net_tx_action(struct softirq_action *h)
3305 { 3290 {
3306 struct softnet_data *sd = &__get_cpu_var(softnet_data); 3291 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3307 3292
3308 if (sd->completion_queue) { 3293 if (sd->completion_queue) {
3309 struct sk_buff *clist; 3294 struct sk_buff *clist;
3310 3295
3311 local_irq_disable(); 3296 local_irq_disable();
3312 clist = sd->completion_queue; 3297 clist = sd->completion_queue;
3313 sd->completion_queue = NULL; 3298 sd->completion_queue = NULL;
3314 local_irq_enable(); 3299 local_irq_enable();
3315 3300
3316 while (clist) { 3301 while (clist) {
3317 struct sk_buff *skb = clist; 3302 struct sk_buff *skb = clist;
3318 clist = clist->next; 3303 clist = clist->next;
3319 3304
3320 WARN_ON(atomic_read(&skb->users)); 3305 WARN_ON(atomic_read(&skb->users));
3321 if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) 3306 if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
3322 trace_consume_skb(skb); 3307 trace_consume_skb(skb);
3323 else 3308 else
3324 trace_kfree_skb(skb, net_tx_action); 3309 trace_kfree_skb(skb, net_tx_action);
3325 __kfree_skb(skb); 3310 __kfree_skb(skb);
3326 } 3311 }
3327 } 3312 }
3328 3313
3329 if (sd->output_queue) { 3314 if (sd->output_queue) {
3330 struct Qdisc *head; 3315 struct Qdisc *head;
3331 3316
3332 local_irq_disable(); 3317 local_irq_disable();
3333 head = sd->output_queue; 3318 head = sd->output_queue;
3334 sd->output_queue = NULL; 3319 sd->output_queue = NULL;
3335 sd->output_queue_tailp = &sd->output_queue; 3320 sd->output_queue_tailp = &sd->output_queue;
3336 local_irq_enable(); 3321 local_irq_enable();
3337 3322
3338 while (head) { 3323 while (head) {
3339 struct Qdisc *q = head; 3324 struct Qdisc *q = head;
3340 spinlock_t *root_lock; 3325 spinlock_t *root_lock;
3341 3326
3342 head = head->next_sched; 3327 head = head->next_sched;
3343 3328
3344 root_lock = qdisc_lock(q); 3329 root_lock = qdisc_lock(q);
3345 if (spin_trylock(root_lock)) { 3330 if (spin_trylock(root_lock)) {
3346 smp_mb__before_clear_bit(); 3331 smp_mb__before_clear_bit();
3347 clear_bit(__QDISC_STATE_SCHED, 3332 clear_bit(__QDISC_STATE_SCHED,
3348 &q->state); 3333 &q->state);
3349 qdisc_run(q); 3334 qdisc_run(q);
3350 spin_unlock(root_lock); 3335 spin_unlock(root_lock);
3351 } else { 3336 } else {
3352 if (!test_bit(__QDISC_STATE_DEACTIVATED, 3337 if (!test_bit(__QDISC_STATE_DEACTIVATED,
3353 &q->state)) { 3338 &q->state)) {
3354 __netif_reschedule(q); 3339 __netif_reschedule(q);
3355 } else { 3340 } else {
3356 smp_mb__before_clear_bit(); 3341 smp_mb__before_clear_bit();
3357 clear_bit(__QDISC_STATE_SCHED, 3342 clear_bit(__QDISC_STATE_SCHED,
3358 &q->state); 3343 &q->state);
3359 } 3344 }
3360 } 3345 }
3361 } 3346 }
3362 } 3347 }
3363 } 3348 }
3364 3349
3365 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ 3350 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3366 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) 3351 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
3367 /* This hook is defined here for ATM LANE */ 3352 /* This hook is defined here for ATM LANE */
3368 int (*br_fdb_test_addr_hook)(struct net_device *dev, 3353 int (*br_fdb_test_addr_hook)(struct net_device *dev,
3369 unsigned char *addr) __read_mostly; 3354 unsigned char *addr) __read_mostly;
3370 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); 3355 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3371 #endif 3356 #endif
3372 3357
3373 #ifdef CONFIG_NET_CLS_ACT 3358 #ifdef CONFIG_NET_CLS_ACT
3374 /* TODO: Maybe we should just force sch_ingress to be compiled in 3359 /* TODO: Maybe we should just force sch_ingress to be compiled in
3375 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions 3360 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
3376 * a compare and 2 stores extra right now if we dont have it on 3361 * a compare and 2 stores extra right now if we dont have it on
3377 * but have CONFIG_NET_CLS_ACT 3362 * but have CONFIG_NET_CLS_ACT
3378 * NOTE: This doesn't stop any functionality; if you dont have 3363 * NOTE: This doesn't stop any functionality; if you dont have
3379 * the ingress scheduler, you just can't add policies on ingress. 3364 * the ingress scheduler, you just can't add policies on ingress.
3380 * 3365 *
3381 */ 3366 */
3382 static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) 3367 static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
3383 { 3368 {
3384 struct net_device *dev = skb->dev; 3369 struct net_device *dev = skb->dev;
3385 u32 ttl = G_TC_RTTL(skb->tc_verd); 3370 u32 ttl = G_TC_RTTL(skb->tc_verd);
3386 int result = TC_ACT_OK; 3371 int result = TC_ACT_OK;
3387 struct Qdisc *q; 3372 struct Qdisc *q;
3388 3373
3389 if (unlikely(MAX_RED_LOOP < ttl++)) { 3374 if (unlikely(MAX_RED_LOOP < ttl++)) {
3390 net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n", 3375 net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
3391 skb->skb_iif, dev->ifindex); 3376 skb->skb_iif, dev->ifindex);
3392 return TC_ACT_SHOT; 3377 return TC_ACT_SHOT;
3393 } 3378 }
3394 3379
3395 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); 3380 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
3396 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); 3381 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3397 3382
3398 q = rxq->qdisc; 3383 q = rxq->qdisc;
3399 if (q != &noop_qdisc) { 3384 if (q != &noop_qdisc) {
3400 spin_lock(qdisc_lock(q)); 3385 spin_lock(qdisc_lock(q));
3401 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) 3386 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
3402 result = qdisc_enqueue_root(skb, q); 3387 result = qdisc_enqueue_root(skb, q);
3403 spin_unlock(qdisc_lock(q)); 3388 spin_unlock(qdisc_lock(q));
3404 } 3389 }
3405 3390
3406 return result; 3391 return result;
3407 } 3392 }
3408 3393
3409 static inline struct sk_buff *handle_ing(struct sk_buff *skb, 3394 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3410 struct packet_type **pt_prev, 3395 struct packet_type **pt_prev,
3411 int *ret, struct net_device *orig_dev) 3396 int *ret, struct net_device *orig_dev)
3412 { 3397 {
3413 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); 3398 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
3414 3399
3415 if (!rxq || rxq->qdisc == &noop_qdisc) 3400 if (!rxq || rxq->qdisc == &noop_qdisc)
3416 goto out; 3401 goto out;
3417 3402
3418 if (*pt_prev) { 3403 if (*pt_prev) {
3419 *ret = deliver_skb(skb, *pt_prev, orig_dev); 3404 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3420 *pt_prev = NULL; 3405 *pt_prev = NULL;
3421 } 3406 }
3422 3407
3423 switch (ing_filter(skb, rxq)) { 3408 switch (ing_filter(skb, rxq)) {
3424 case TC_ACT_SHOT: 3409 case TC_ACT_SHOT:
3425 case TC_ACT_STOLEN: 3410 case TC_ACT_STOLEN:
3426 kfree_skb(skb); 3411 kfree_skb(skb);
3427 return NULL; 3412 return NULL;
3428 } 3413 }
3429 3414
3430 out: 3415 out:
3431 skb->tc_verd = 0; 3416 skb->tc_verd = 0;
3432 return skb; 3417 return skb;
3433 } 3418 }
3434 #endif 3419 #endif
3435 3420
3436 /** 3421 /**
3437 * netdev_rx_handler_register - register receive handler 3422 * netdev_rx_handler_register - register receive handler
3438 * @dev: device to register a handler for 3423 * @dev: device to register a handler for
3439 * @rx_handler: receive handler to register 3424 * @rx_handler: receive handler to register
3440 * @rx_handler_data: data pointer that is used by rx handler 3425 * @rx_handler_data: data pointer that is used by rx handler
3441 * 3426 *
3442 * Register a receive hander for a device. This handler will then be 3427 * Register a receive hander for a device. This handler will then be
3443 * called from __netif_receive_skb. A negative errno code is returned 3428 * called from __netif_receive_skb. A negative errno code is returned
3444 * on a failure. 3429 * on a failure.
3445 * 3430 *
3446 * The caller must hold the rtnl_mutex. 3431 * The caller must hold the rtnl_mutex.
3447 * 3432 *
3448 * For a general description of rx_handler, see enum rx_handler_result. 3433 * For a general description of rx_handler, see enum rx_handler_result.
3449 */ 3434 */
3450 int netdev_rx_handler_register(struct net_device *dev, 3435 int netdev_rx_handler_register(struct net_device *dev,
3451 rx_handler_func_t *rx_handler, 3436 rx_handler_func_t *rx_handler,
3452 void *rx_handler_data) 3437 void *rx_handler_data)
3453 { 3438 {
3454 ASSERT_RTNL(); 3439 ASSERT_RTNL();
3455 3440
3456 if (dev->rx_handler) 3441 if (dev->rx_handler)
3457 return -EBUSY; 3442 return -EBUSY;
3458 3443
3459 /* Note: rx_handler_data must be set before rx_handler */ 3444 /* Note: rx_handler_data must be set before rx_handler */
3460 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); 3445 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
3461 rcu_assign_pointer(dev->rx_handler, rx_handler); 3446 rcu_assign_pointer(dev->rx_handler, rx_handler);
3462 3447
3463 return 0; 3448 return 0;
3464 } 3449 }
3465 EXPORT_SYMBOL_GPL(netdev_rx_handler_register); 3450 EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3466 3451
3467 /** 3452 /**
3468 * netdev_rx_handler_unregister - unregister receive handler 3453 * netdev_rx_handler_unregister - unregister receive handler
3469 * @dev: device to unregister a handler from 3454 * @dev: device to unregister a handler from
3470 * 3455 *
3471 * Unregister a receive handler from a device. 3456 * Unregister a receive handler from a device.
3472 * 3457 *
3473 * The caller must hold the rtnl_mutex. 3458 * The caller must hold the rtnl_mutex.
3474 */ 3459 */
3475 void netdev_rx_handler_unregister(struct net_device *dev) 3460 void netdev_rx_handler_unregister(struct net_device *dev)
3476 { 3461 {
3477 3462
3478 ASSERT_RTNL(); 3463 ASSERT_RTNL();
3479 RCU_INIT_POINTER(dev->rx_handler, NULL); 3464 RCU_INIT_POINTER(dev->rx_handler, NULL);
3480 /* a reader seeing a non NULL rx_handler in a rcu_read_lock() 3465 /* a reader seeing a non NULL rx_handler in a rcu_read_lock()
3481 * section has a guarantee to see a non NULL rx_handler_data 3466 * section has a guarantee to see a non NULL rx_handler_data
3482 * as well. 3467 * as well.
3483 */ 3468 */
3484 synchronize_net(); 3469 synchronize_net();
3485 RCU_INIT_POINTER(dev->rx_handler_data, NULL); 3470 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
3486 } 3471 }
3487 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); 3472 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3488 3473
3489 /* 3474 /*
3490 * Limit the use of PFMEMALLOC reserves to those protocols that implement 3475 * Limit the use of PFMEMALLOC reserves to those protocols that implement
3491 * the special handling of PFMEMALLOC skbs. 3476 * the special handling of PFMEMALLOC skbs.
3492 */ 3477 */
3493 static bool skb_pfmemalloc_protocol(struct sk_buff *skb) 3478 static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3494 { 3479 {
3495 switch (skb->protocol) { 3480 switch (skb->protocol) {
3496 case __constant_htons(ETH_P_ARP): 3481 case __constant_htons(ETH_P_ARP):
3497 case __constant_htons(ETH_P_IP): 3482 case __constant_htons(ETH_P_IP):
3498 case __constant_htons(ETH_P_IPV6): 3483 case __constant_htons(ETH_P_IPV6):
3499 case __constant_htons(ETH_P_8021Q): 3484 case __constant_htons(ETH_P_8021Q):
3500 case __constant_htons(ETH_P_8021AD): 3485 case __constant_htons(ETH_P_8021AD):
3501 return true; 3486 return true;
3502 default: 3487 default:
3503 return false; 3488 return false;
3504 } 3489 }
3505 } 3490 }
3506 3491
3507 static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) 3492 static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
3508 { 3493 {
3509 struct packet_type *ptype, *pt_prev; 3494 struct packet_type *ptype, *pt_prev;
3510 rx_handler_func_t *rx_handler; 3495 rx_handler_func_t *rx_handler;
3511 struct net_device *orig_dev; 3496 struct net_device *orig_dev;
3512 struct net_device *null_or_dev; 3497 struct net_device *null_or_dev;
3513 bool deliver_exact = false; 3498 bool deliver_exact = false;
3514 int ret = NET_RX_DROP; 3499 int ret = NET_RX_DROP;
3515 __be16 type; 3500 __be16 type;
3516 3501
3517 net_timestamp_check(!netdev_tstamp_prequeue, skb); 3502 net_timestamp_check(!netdev_tstamp_prequeue, skb);
3518 3503
3519 trace_netif_receive_skb(skb); 3504 trace_netif_receive_skb(skb);
3520 3505
3521 /* if we've gotten here through NAPI, check netpoll */ 3506 /* if we've gotten here through NAPI, check netpoll */
3522 if (netpoll_receive_skb(skb)) 3507 if (netpoll_receive_skb(skb))
3523 goto out; 3508 goto out;
3524 3509
3525 orig_dev = skb->dev; 3510 orig_dev = skb->dev;
3526 3511
3527 skb_reset_network_header(skb); 3512 skb_reset_network_header(skb);
3528 if (!skb_transport_header_was_set(skb)) 3513 if (!skb_transport_header_was_set(skb))
3529 skb_reset_transport_header(skb); 3514 skb_reset_transport_header(skb);
3530 skb_reset_mac_len(skb); 3515 skb_reset_mac_len(skb);
3531 3516
3532 pt_prev = NULL; 3517 pt_prev = NULL;
3533 3518
3534 rcu_read_lock(); 3519 rcu_read_lock();
3535 3520
3536 another_round: 3521 another_round:
3537 skb->skb_iif = skb->dev->ifindex; 3522 skb->skb_iif = skb->dev->ifindex;
3538 3523
3539 __this_cpu_inc(softnet_data.processed); 3524 __this_cpu_inc(softnet_data.processed);
3540 3525
3541 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || 3526 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
3542 skb->protocol == cpu_to_be16(ETH_P_8021AD)) { 3527 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
3543 skb = vlan_untag(skb); 3528 skb = vlan_untag(skb);
3544 if (unlikely(!skb)) 3529 if (unlikely(!skb))
3545 goto unlock; 3530 goto unlock;
3546 } 3531 }
3547 3532
3548 #ifdef CONFIG_NET_CLS_ACT 3533 #ifdef CONFIG_NET_CLS_ACT
3549 if (skb->tc_verd & TC_NCLS) { 3534 if (skb->tc_verd & TC_NCLS) {
3550 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 3535 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
3551 goto ncls; 3536 goto ncls;
3552 } 3537 }
3553 #endif 3538 #endif
3554 3539
3555 if (pfmemalloc) 3540 if (pfmemalloc)
3556 goto skip_taps; 3541 goto skip_taps;
3557 3542
3558 list_for_each_entry_rcu(ptype, &ptype_all, list) { 3543 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3559 if (!ptype->dev || ptype->dev == skb->dev) { 3544 if (!ptype->dev || ptype->dev == skb->dev) {
3560 if (pt_prev) 3545 if (pt_prev)
3561 ret = deliver_skb(skb, pt_prev, orig_dev); 3546 ret = deliver_skb(skb, pt_prev, orig_dev);
3562 pt_prev = ptype; 3547 pt_prev = ptype;
3563 } 3548 }
3564 } 3549 }
3565 3550
3566 skip_taps: 3551 skip_taps:
3567 #ifdef CONFIG_NET_CLS_ACT 3552 #ifdef CONFIG_NET_CLS_ACT
3568 skb = handle_ing(skb, &pt_prev, &ret, orig_dev); 3553 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3569 if (!skb) 3554 if (!skb)
3570 goto unlock; 3555 goto unlock;
3571 ncls: 3556 ncls:
3572 #endif 3557 #endif
3573 3558
3574 if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) 3559 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
3575 goto drop; 3560 goto drop;
3576 3561
3577 if (vlan_tx_tag_present(skb)) { 3562 if (vlan_tx_tag_present(skb)) {
3578 if (pt_prev) { 3563 if (pt_prev) {
3579 ret = deliver_skb(skb, pt_prev, orig_dev); 3564 ret = deliver_skb(skb, pt_prev, orig_dev);
3580 pt_prev = NULL; 3565 pt_prev = NULL;
3581 } 3566 }
3582 if (vlan_do_receive(&skb)) 3567 if (vlan_do_receive(&skb))
3583 goto another_round; 3568 goto another_round;
3584 else if (unlikely(!skb)) 3569 else if (unlikely(!skb))
3585 goto unlock; 3570 goto unlock;
3586 } 3571 }
3587 3572
3588 rx_handler = rcu_dereference(skb->dev->rx_handler); 3573 rx_handler = rcu_dereference(skb->dev->rx_handler);
3589 if (rx_handler) { 3574 if (rx_handler) {
3590 if (pt_prev) { 3575 if (pt_prev) {
3591 ret = deliver_skb(skb, pt_prev, orig_dev); 3576 ret = deliver_skb(skb, pt_prev, orig_dev);
3592 pt_prev = NULL; 3577 pt_prev = NULL;
3593 } 3578 }
3594 switch (rx_handler(&skb)) { 3579 switch (rx_handler(&skb)) {
3595 case RX_HANDLER_CONSUMED: 3580 case RX_HANDLER_CONSUMED:
3596 ret = NET_RX_SUCCESS; 3581 ret = NET_RX_SUCCESS;
3597 goto unlock; 3582 goto unlock;
3598 case RX_HANDLER_ANOTHER: 3583 case RX_HANDLER_ANOTHER:
3599 goto another_round; 3584 goto another_round;
3600 case RX_HANDLER_EXACT: 3585 case RX_HANDLER_EXACT:
3601 deliver_exact = true; 3586 deliver_exact = true;
3602 case RX_HANDLER_PASS: 3587 case RX_HANDLER_PASS:
3603 break; 3588 break;
3604 default: 3589 default:
3605 BUG(); 3590 BUG();
3606 } 3591 }
3607 } 3592 }
3608 3593
3609 if (unlikely(vlan_tx_tag_present(skb))) { 3594 if (unlikely(vlan_tx_tag_present(skb))) {
3610 if (vlan_tx_tag_get_id(skb)) 3595 if (vlan_tx_tag_get_id(skb))
3611 skb->pkt_type = PACKET_OTHERHOST; 3596 skb->pkt_type = PACKET_OTHERHOST;
3612 /* Note: we might in the future use prio bits 3597 /* Note: we might in the future use prio bits
3613 * and set skb->priority like in vlan_do_receive() 3598 * and set skb->priority like in vlan_do_receive()
3614 * For the time being, just ignore Priority Code Point 3599 * For the time being, just ignore Priority Code Point
3615 */ 3600 */
3616 skb->vlan_tci = 0; 3601 skb->vlan_tci = 0;
3617 } 3602 }
3618 3603
3619 /* deliver only exact match when indicated */ 3604 /* deliver only exact match when indicated */
3620 null_or_dev = deliver_exact ? skb->dev : NULL; 3605 null_or_dev = deliver_exact ? skb->dev : NULL;
3621 3606
3622 type = skb->protocol; 3607 type = skb->protocol;
3623 list_for_each_entry_rcu(ptype, 3608 list_for_each_entry_rcu(ptype,
3624 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 3609 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3625 if (ptype->type == type && 3610 if (ptype->type == type &&
3626 (ptype->dev == null_or_dev || ptype->dev == skb->dev || 3611 (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3627 ptype->dev == orig_dev)) { 3612 ptype->dev == orig_dev)) {
3628 if (pt_prev) 3613 if (pt_prev)
3629 ret = deliver_skb(skb, pt_prev, orig_dev); 3614 ret = deliver_skb(skb, pt_prev, orig_dev);
3630 pt_prev = ptype; 3615 pt_prev = ptype;
3631 } 3616 }
3632 } 3617 }
3633 3618
3634 if (pt_prev) { 3619 if (pt_prev) {
3635 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) 3620 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
3636 goto drop; 3621 goto drop;
3637 else 3622 else
3638 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 3623 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3639 } else { 3624 } else {
3640 drop: 3625 drop:
3641 atomic_long_inc(&skb->dev->rx_dropped); 3626 atomic_long_inc(&skb->dev->rx_dropped);
3642 kfree_skb(skb); 3627 kfree_skb(skb);
3643 /* Jamal, now you will not able to escape explaining 3628 /* Jamal, now you will not able to escape explaining
3644 * me how you were going to use this. :-) 3629 * me how you were going to use this. :-)
3645 */ 3630 */
3646 ret = NET_RX_DROP; 3631 ret = NET_RX_DROP;
3647 } 3632 }
3648 3633
3649 unlock: 3634 unlock:
3650 rcu_read_unlock(); 3635 rcu_read_unlock();
3651 out: 3636 out:
3652 return ret; 3637 return ret;
3653 } 3638 }
3654 3639
3655 static int __netif_receive_skb(struct sk_buff *skb) 3640 static int __netif_receive_skb(struct sk_buff *skb)
3656 { 3641 {
3657 int ret; 3642 int ret;
3658 3643
3659 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) { 3644 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
3660 unsigned long pflags = current->flags; 3645 unsigned long pflags = current->flags;
3661 3646
3662 /* 3647 /*
3663 * PFMEMALLOC skbs are special, they should 3648 * PFMEMALLOC skbs are special, they should
3664 * - be delivered to SOCK_MEMALLOC sockets only 3649 * - be delivered to SOCK_MEMALLOC sockets only
3665 * - stay away from userspace 3650 * - stay away from userspace
3666 * - have bounded memory usage 3651 * - have bounded memory usage
3667 * 3652 *
3668 * Use PF_MEMALLOC as this saves us from propagating the allocation 3653 * Use PF_MEMALLOC as this saves us from propagating the allocation
3669 * context down to all allocation sites. 3654 * context down to all allocation sites.
3670 */ 3655 */
3671 current->flags |= PF_MEMALLOC; 3656 current->flags |= PF_MEMALLOC;
3672 ret = __netif_receive_skb_core(skb, true); 3657 ret = __netif_receive_skb_core(skb, true);
3673 tsk_restore_flags(current, pflags, PF_MEMALLOC); 3658 tsk_restore_flags(current, pflags, PF_MEMALLOC);
3674 } else 3659 } else
3675 ret = __netif_receive_skb_core(skb, false); 3660 ret = __netif_receive_skb_core(skb, false);
3676 3661
3677 return ret; 3662 return ret;
3678 } 3663 }
3679 3664
3680 /** 3665 /**
3681 * netif_receive_skb - process receive buffer from network 3666 * netif_receive_skb - process receive buffer from network
3682 * @skb: buffer to process 3667 * @skb: buffer to process
3683 * 3668 *
3684 * netif_receive_skb() is the main receive data processing function. 3669 * netif_receive_skb() is the main receive data processing function.
3685 * It always succeeds. The buffer may be dropped during processing 3670 * It always succeeds. The buffer may be dropped during processing
3686 * for congestion control or by the protocol layers. 3671 * for congestion control or by the protocol layers.
3687 * 3672 *
3688 * This function may only be called from softirq context and interrupts 3673 * This function may only be called from softirq context and interrupts
3689 * should be enabled. 3674 * should be enabled.
3690 * 3675 *
3691 * Return values (usually ignored): 3676 * Return values (usually ignored):
3692 * NET_RX_SUCCESS: no congestion 3677 * NET_RX_SUCCESS: no congestion
3693 * NET_RX_DROP: packet was dropped 3678 * NET_RX_DROP: packet was dropped
3694 */ 3679 */
3695 int netif_receive_skb(struct sk_buff *skb) 3680 int netif_receive_skb(struct sk_buff *skb)
3696 { 3681 {
3697 net_timestamp_check(netdev_tstamp_prequeue, skb); 3682 net_timestamp_check(netdev_tstamp_prequeue, skb);
3698 3683
3699 if (skb_defer_rx_timestamp(skb)) 3684 if (skb_defer_rx_timestamp(skb))
3700 return NET_RX_SUCCESS; 3685 return NET_RX_SUCCESS;
3701 3686
3702 #ifdef CONFIG_RPS 3687 #ifdef CONFIG_RPS
3703 if (static_key_false(&rps_needed)) { 3688 if (static_key_false(&rps_needed)) {
3704 struct rps_dev_flow voidflow, *rflow = &voidflow; 3689 struct rps_dev_flow voidflow, *rflow = &voidflow;
3705 int cpu, ret; 3690 int cpu, ret;
3706 3691
3707 rcu_read_lock(); 3692 rcu_read_lock();
3708 3693
3709 cpu = get_rps_cpu(skb->dev, skb, &rflow); 3694 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3710 3695
3711 if (cpu >= 0) { 3696 if (cpu >= 0) {
3712 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); 3697 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3713 rcu_read_unlock(); 3698 rcu_read_unlock();
3714 return ret; 3699 return ret;
3715 } 3700 }
3716 rcu_read_unlock(); 3701 rcu_read_unlock();
3717 } 3702 }
3718 #endif 3703 #endif
3719 return __netif_receive_skb(skb); 3704 return __netif_receive_skb(skb);
3720 } 3705 }
3721 EXPORT_SYMBOL(netif_receive_skb); 3706 EXPORT_SYMBOL(netif_receive_skb);
3722 3707
3723 /* Network device is going away, flush any packets still pending 3708 /* Network device is going away, flush any packets still pending
3724 * Called with irqs disabled. 3709 * Called with irqs disabled.
3725 */ 3710 */
3726 static void flush_backlog(void *arg) 3711 static void flush_backlog(void *arg)
3727 { 3712 {
3728 struct net_device *dev = arg; 3713 struct net_device *dev = arg;
3729 struct softnet_data *sd = &__get_cpu_var(softnet_data); 3714 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3730 struct sk_buff *skb, *tmp; 3715 struct sk_buff *skb, *tmp;
3731 3716
3732 rps_lock(sd); 3717 rps_lock(sd);
3733 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { 3718 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
3734 if (skb->dev == dev) { 3719 if (skb->dev == dev) {
3735 __skb_unlink(skb, &sd->input_pkt_queue); 3720 __skb_unlink(skb, &sd->input_pkt_queue);
3736 kfree_skb(skb); 3721 kfree_skb(skb);
3737 input_queue_head_incr(sd); 3722 input_queue_head_incr(sd);
3738 } 3723 }
3739 } 3724 }
3740 rps_unlock(sd); 3725 rps_unlock(sd);
3741 3726
3742 skb_queue_walk_safe(&sd->process_queue, skb, tmp) { 3727 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3743 if (skb->dev == dev) { 3728 if (skb->dev == dev) {
3744 __skb_unlink(skb, &sd->process_queue); 3729 __skb_unlink(skb, &sd->process_queue);
3745 kfree_skb(skb); 3730 kfree_skb(skb);
3746 input_queue_head_incr(sd); 3731 input_queue_head_incr(sd);
3747 } 3732 }
3748 } 3733 }
3749 } 3734 }
3750 3735
3751 static int napi_gro_complete(struct sk_buff *skb) 3736 static int napi_gro_complete(struct sk_buff *skb)
3752 { 3737 {
3753 struct packet_offload *ptype; 3738 struct packet_offload *ptype;
3754 __be16 type = skb->protocol; 3739 __be16 type = skb->protocol;
3755 struct list_head *head = &offload_base; 3740 struct list_head *head = &offload_base;
3756 int err = -ENOENT; 3741 int err = -ENOENT;
3757 3742
3758 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); 3743 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
3759 3744
3760 if (NAPI_GRO_CB(skb)->count == 1) { 3745 if (NAPI_GRO_CB(skb)->count == 1) {
3761 skb_shinfo(skb)->gso_size = 0; 3746 skb_shinfo(skb)->gso_size = 0;
3762 goto out; 3747 goto out;
3763 } 3748 }
3764 3749
3765 rcu_read_lock(); 3750 rcu_read_lock();
3766 list_for_each_entry_rcu(ptype, head, list) { 3751 list_for_each_entry_rcu(ptype, head, list) {
3767 if (ptype->type != type || !ptype->callbacks.gro_complete) 3752 if (ptype->type != type || !ptype->callbacks.gro_complete)
3768 continue; 3753 continue;
3769 3754
3770 err = ptype->callbacks.gro_complete(skb); 3755 err = ptype->callbacks.gro_complete(skb);
3771 break; 3756 break;
3772 } 3757 }
3773 rcu_read_unlock(); 3758 rcu_read_unlock();
3774 3759
3775 if (err) { 3760 if (err) {
3776 WARN_ON(&ptype->list == head); 3761 WARN_ON(&ptype->list == head);
3777 kfree_skb(skb); 3762 kfree_skb(skb);
3778 return NET_RX_SUCCESS; 3763 return NET_RX_SUCCESS;
3779 } 3764 }
3780 3765
3781 out: 3766 out:
3782 return netif_receive_skb(skb); 3767 return netif_receive_skb(skb);
3783 } 3768 }
3784 3769
3785 /* napi->gro_list contains packets ordered by age. 3770 /* napi->gro_list contains packets ordered by age.
3786 * youngest packets at the head of it. 3771 * youngest packets at the head of it.
3787 * Complete skbs in reverse order to reduce latencies. 3772 * Complete skbs in reverse order to reduce latencies.
3788 */ 3773 */
3789 void napi_gro_flush(struct napi_struct *napi, bool flush_old) 3774 void napi_gro_flush(struct napi_struct *napi, bool flush_old)
3790 { 3775 {
3791 struct sk_buff *skb, *prev = NULL; 3776 struct sk_buff *skb, *prev = NULL;
3792 3777
3793 /* scan list and build reverse chain */ 3778 /* scan list and build reverse chain */
3794 for (skb = napi->gro_list; skb != NULL; skb = skb->next) { 3779 for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
3795 skb->prev = prev; 3780 skb->prev = prev;
3796 prev = skb; 3781 prev = skb;
3797 } 3782 }
3798 3783
3799 for (skb = prev; skb; skb = prev) { 3784 for (skb = prev; skb; skb = prev) {
3800 skb->next = NULL; 3785 skb->next = NULL;
3801 3786
3802 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) 3787 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
3803 return; 3788 return;
3804 3789
3805 prev = skb->prev; 3790 prev = skb->prev;
3806 napi_gro_complete(skb); 3791 napi_gro_complete(skb);
3807 napi->gro_count--; 3792 napi->gro_count--;
3808 } 3793 }
3809 3794
3810 napi->gro_list = NULL; 3795 napi->gro_list = NULL;
3811 } 3796 }
3812 EXPORT_SYMBOL(napi_gro_flush); 3797 EXPORT_SYMBOL(napi_gro_flush);
3813 3798
3814 static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) 3799 static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3815 { 3800 {
3816 struct sk_buff *p; 3801 struct sk_buff *p;
3817 unsigned int maclen = skb->dev->hard_header_len; 3802 unsigned int maclen = skb->dev->hard_header_len;
3818 3803
3819 for (p = napi->gro_list; p; p = p->next) { 3804 for (p = napi->gro_list; p; p = p->next) {
3820 unsigned long diffs; 3805 unsigned long diffs;
3821 3806
3822 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; 3807 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3823 diffs |= p->vlan_tci ^ skb->vlan_tci; 3808 diffs |= p->vlan_tci ^ skb->vlan_tci;
3824 if (maclen == ETH_HLEN) 3809 if (maclen == ETH_HLEN)
3825 diffs |= compare_ether_header(skb_mac_header(p), 3810 diffs |= compare_ether_header(skb_mac_header(p),
3826 skb_gro_mac_header(skb)); 3811 skb_gro_mac_header(skb));
3827 else if (!diffs) 3812 else if (!diffs)
3828 diffs = memcmp(skb_mac_header(p), 3813 diffs = memcmp(skb_mac_header(p),
3829 skb_gro_mac_header(skb), 3814 skb_gro_mac_header(skb),
3830 maclen); 3815 maclen);
3831 NAPI_GRO_CB(p)->same_flow = !diffs; 3816 NAPI_GRO_CB(p)->same_flow = !diffs;
3832 NAPI_GRO_CB(p)->flush = 0; 3817 NAPI_GRO_CB(p)->flush = 0;
3833 } 3818 }
3834 } 3819 }
3835 3820
3836 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3821 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3837 { 3822 {
3838 struct sk_buff **pp = NULL; 3823 struct sk_buff **pp = NULL;
3839 struct packet_offload *ptype; 3824 struct packet_offload *ptype;
3840 __be16 type = skb->protocol; 3825 __be16 type = skb->protocol;
3841 struct list_head *head = &offload_base; 3826 struct list_head *head = &offload_base;
3842 int same_flow; 3827 int same_flow;
3843 enum gro_result ret; 3828 enum gro_result ret;
3844 3829
3845 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) 3830 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3846 goto normal; 3831 goto normal;
3847 3832
3848 if (skb_is_gso(skb) || skb_has_frag_list(skb)) 3833 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3849 goto normal; 3834 goto normal;
3850 3835
3851 gro_list_prepare(napi, skb); 3836 gro_list_prepare(napi, skb);
3852 3837
3853 rcu_read_lock(); 3838 rcu_read_lock();
3854 list_for_each_entry_rcu(ptype, head, list) { 3839 list_for_each_entry_rcu(ptype, head, list) {
3855 if (ptype->type != type || !ptype->callbacks.gro_receive) 3840 if (ptype->type != type || !ptype->callbacks.gro_receive)
3856 continue; 3841 continue;
3857 3842
3858 skb_set_network_header(skb, skb_gro_offset(skb)); 3843 skb_set_network_header(skb, skb_gro_offset(skb));
3859 skb_reset_mac_len(skb); 3844 skb_reset_mac_len(skb);
3860 NAPI_GRO_CB(skb)->same_flow = 0; 3845 NAPI_GRO_CB(skb)->same_flow = 0;
3861 NAPI_GRO_CB(skb)->flush = 0; 3846 NAPI_GRO_CB(skb)->flush = 0;
3862 NAPI_GRO_CB(skb)->free = 0; 3847 NAPI_GRO_CB(skb)->free = 0;
3863 3848
3864 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); 3849 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
3865 break; 3850 break;
3866 } 3851 }
3867 rcu_read_unlock(); 3852 rcu_read_unlock();
3868 3853
3869 if (&ptype->list == head) 3854 if (&ptype->list == head)
3870 goto normal; 3855 goto normal;
3871 3856
3872 same_flow = NAPI_GRO_CB(skb)->same_flow; 3857 same_flow = NAPI_GRO_CB(skb)->same_flow;
3873 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; 3858 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
3874 3859
3875 if (pp) { 3860 if (pp) {
3876 struct sk_buff *nskb = *pp; 3861 struct sk_buff *nskb = *pp;
3877 3862
3878 *pp = nskb->next; 3863 *pp = nskb->next;
3879 nskb->next = NULL; 3864 nskb->next = NULL;
3880 napi_gro_complete(nskb); 3865 napi_gro_complete(nskb);
3881 napi->gro_count--; 3866 napi->gro_count--;
3882 } 3867 }
3883 3868
3884 if (same_flow) 3869 if (same_flow)
3885 goto ok; 3870 goto ok;
3886 3871
3887 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) 3872 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
3888 goto normal; 3873 goto normal;
3889 3874
3890 napi->gro_count++; 3875 napi->gro_count++;
3891 NAPI_GRO_CB(skb)->count = 1; 3876 NAPI_GRO_CB(skb)->count = 1;
3892 NAPI_GRO_CB(skb)->age = jiffies; 3877 NAPI_GRO_CB(skb)->age = jiffies;
3893 skb_shinfo(skb)->gso_size = skb_gro_len(skb); 3878 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3894 skb->next = napi->gro_list; 3879 skb->next = napi->gro_list;
3895 napi->gro_list = skb; 3880 napi->gro_list = skb;
3896 ret = GRO_HELD; 3881 ret = GRO_HELD;
3897 3882
3898 pull: 3883 pull:
3899 if (skb_headlen(skb) < skb_gro_offset(skb)) { 3884 if (skb_headlen(skb) < skb_gro_offset(skb)) {
3900 int grow = skb_gro_offset(skb) - skb_headlen(skb); 3885 int grow = skb_gro_offset(skb) - skb_headlen(skb);
3901 3886
3902 BUG_ON(skb->end - skb->tail < grow); 3887 BUG_ON(skb->end - skb->tail < grow);
3903 3888
3904 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); 3889 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3905 3890
3906 skb->tail += grow; 3891 skb->tail += grow;
3907 skb->data_len -= grow; 3892 skb->data_len -= grow;
3908 3893
3909 skb_shinfo(skb)->frags[0].page_offset += grow; 3894 skb_shinfo(skb)->frags[0].page_offset += grow;
3910 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow); 3895 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
3911 3896
3912 if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) { 3897 if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
3913 skb_frag_unref(skb, 0); 3898 skb_frag_unref(skb, 0);
3914 memmove(skb_shinfo(skb)->frags, 3899 memmove(skb_shinfo(skb)->frags,
3915 skb_shinfo(skb)->frags + 1, 3900 skb_shinfo(skb)->frags + 1,
3916 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); 3901 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
3917 } 3902 }
3918 } 3903 }
3919 3904
3920 ok: 3905 ok:
3921 return ret; 3906 return ret;
3922 3907
3923 normal: 3908 normal:
3924 ret = GRO_NORMAL; 3909 ret = GRO_NORMAL;
3925 goto pull; 3910 goto pull;
3926 } 3911 }
3927 3912
3928 3913
3929 static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) 3914 static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3930 { 3915 {
3931 switch (ret) { 3916 switch (ret) {
3932 case GRO_NORMAL: 3917 case GRO_NORMAL:
3933 if (netif_receive_skb(skb)) 3918 if (netif_receive_skb(skb))
3934 ret = GRO_DROP; 3919 ret = GRO_DROP;
3935 break; 3920 break;
3936 3921
3937 case GRO_DROP: 3922 case GRO_DROP:
3938 kfree_skb(skb); 3923 kfree_skb(skb);
3939 break; 3924 break;
3940 3925
3941 case GRO_MERGED_FREE: 3926 case GRO_MERGED_FREE:
3942 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) 3927 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
3943 kmem_cache_free(skbuff_head_cache, skb); 3928 kmem_cache_free(skbuff_head_cache, skb);
3944 else 3929 else
3945 __kfree_skb(skb); 3930 __kfree_skb(skb);
3946 break; 3931 break;
3947 3932
3948 case GRO_HELD: 3933 case GRO_HELD:
3949 case GRO_MERGED: 3934 case GRO_MERGED:
3950 break; 3935 break;
3951 } 3936 }
3952 3937
3953 return ret; 3938 return ret;
3954 } 3939 }
3955 3940
3956 static void skb_gro_reset_offset(struct sk_buff *skb) 3941 static void skb_gro_reset_offset(struct sk_buff *skb)
3957 { 3942 {
3958 const struct skb_shared_info *pinfo = skb_shinfo(skb); 3943 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3959 const skb_frag_t *frag0 = &pinfo->frags[0]; 3944 const skb_frag_t *frag0 = &pinfo->frags[0];
3960 3945
3961 NAPI_GRO_CB(skb)->data_offset = 0; 3946 NAPI_GRO_CB(skb)->data_offset = 0;
3962 NAPI_GRO_CB(skb)->frag0 = NULL; 3947 NAPI_GRO_CB(skb)->frag0 = NULL;
3963 NAPI_GRO_CB(skb)->frag0_len = 0; 3948 NAPI_GRO_CB(skb)->frag0_len = 0;
3964 3949
3965 if (skb_mac_header(skb) == skb_tail_pointer(skb) && 3950 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3966 pinfo->nr_frags && 3951 pinfo->nr_frags &&
3967 !PageHighMem(skb_frag_page(frag0))) { 3952 !PageHighMem(skb_frag_page(frag0))) {
3968 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); 3953 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3969 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); 3954 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3970 } 3955 }
3971 } 3956 }
3972 3957
3973 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3958 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3974 { 3959 {
3975 skb_gro_reset_offset(skb); 3960 skb_gro_reset_offset(skb);
3976 3961
3977 return napi_skb_finish(dev_gro_receive(napi, skb), skb); 3962 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
3978 } 3963 }
3979 EXPORT_SYMBOL(napi_gro_receive); 3964 EXPORT_SYMBOL(napi_gro_receive);
3980 3965
3981 static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) 3966 static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3982 { 3967 {
3983 __skb_pull(skb, skb_headlen(skb)); 3968 __skb_pull(skb, skb_headlen(skb));
3984 /* restore the reserve we had after netdev_alloc_skb_ip_align() */ 3969 /* restore the reserve we had after netdev_alloc_skb_ip_align() */
3985 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); 3970 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
3986 skb->vlan_tci = 0; 3971 skb->vlan_tci = 0;
3987 skb->dev = napi->dev; 3972 skb->dev = napi->dev;
3988 skb->skb_iif = 0; 3973 skb->skb_iif = 0;
3989 3974
3990 napi->skb = skb; 3975 napi->skb = skb;
3991 } 3976 }
3992 3977
3993 struct sk_buff *napi_get_frags(struct napi_struct *napi) 3978 struct sk_buff *napi_get_frags(struct napi_struct *napi)
3994 { 3979 {
3995 struct sk_buff *skb = napi->skb; 3980 struct sk_buff *skb = napi->skb;
3996 3981
3997 if (!skb) { 3982 if (!skb) {
3998 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); 3983 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3999 napi->skb = skb; 3984 napi->skb = skb;
4000 } 3985 }
4001 return skb; 3986 return skb;
4002 } 3987 }
4003 EXPORT_SYMBOL(napi_get_frags); 3988 EXPORT_SYMBOL(napi_get_frags);
4004 3989
4005 static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, 3990 static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
4006 gro_result_t ret) 3991 gro_result_t ret)
4007 { 3992 {
4008 switch (ret) { 3993 switch (ret) {
4009 case GRO_NORMAL: 3994 case GRO_NORMAL:
4010 case GRO_HELD: 3995 case GRO_HELD:
4011 skb->protocol = eth_type_trans(skb, skb->dev); 3996 skb->protocol = eth_type_trans(skb, skb->dev);
4012 3997
4013 if (ret == GRO_HELD) 3998 if (ret == GRO_HELD)
4014 skb_gro_pull(skb, -ETH_HLEN); 3999 skb_gro_pull(skb, -ETH_HLEN);
4015 else if (netif_receive_skb(skb)) 4000 else if (netif_receive_skb(skb))
4016 ret = GRO_DROP; 4001 ret = GRO_DROP;
4017 break; 4002 break;
4018 4003
4019 case GRO_DROP: 4004 case GRO_DROP:
4020 case GRO_MERGED_FREE: 4005 case GRO_MERGED_FREE:
4021 napi_reuse_skb(napi, skb); 4006 napi_reuse_skb(napi, skb);
4022 break; 4007 break;
4023 4008
4024 case GRO_MERGED: 4009 case GRO_MERGED:
4025 break; 4010 break;
4026 } 4011 }
4027 4012
4028 return ret; 4013 return ret;
4029 } 4014 }
4030 4015
4031 static struct sk_buff *napi_frags_skb(struct napi_struct *napi) 4016 static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
4032 { 4017 {
4033 struct sk_buff *skb = napi->skb; 4018 struct sk_buff *skb = napi->skb;
4034 struct ethhdr *eth; 4019 struct ethhdr *eth;
4035 unsigned int hlen; 4020 unsigned int hlen;
4036 unsigned int off; 4021 unsigned int off;
4037 4022
4038 napi->skb = NULL; 4023 napi->skb = NULL;
4039 4024
4040 skb_reset_mac_header(skb); 4025 skb_reset_mac_header(skb);
4041 skb_gro_reset_offset(skb); 4026 skb_gro_reset_offset(skb);
4042 4027
4043 off = skb_gro_offset(skb); 4028 off = skb_gro_offset(skb);
4044 hlen = off + sizeof(*eth); 4029 hlen = off + sizeof(*eth);
4045 eth = skb_gro_header_fast(skb, off); 4030 eth = skb_gro_header_fast(skb, off);
4046 if (skb_gro_header_hard(skb, hlen)) { 4031 if (skb_gro_header_hard(skb, hlen)) {
4047 eth = skb_gro_header_slow(skb, hlen, off); 4032 eth = skb_gro_header_slow(skb, hlen, off);
4048 if (unlikely(!eth)) { 4033 if (unlikely(!eth)) {
4049 napi_reuse_skb(napi, skb); 4034 napi_reuse_skb(napi, skb);
4050 skb = NULL; 4035 skb = NULL;
4051 goto out; 4036 goto out;
4052 } 4037 }
4053 } 4038 }
4054 4039
4055 skb_gro_pull(skb, sizeof(*eth)); 4040 skb_gro_pull(skb, sizeof(*eth));
4056 4041
4057 /* 4042 /*
4058 * This works because the only protocols we care about don't require 4043 * This works because the only protocols we care about don't require
4059 * special handling. We'll fix it up properly at the end. 4044 * special handling. We'll fix it up properly at the end.
4060 */ 4045 */
4061 skb->protocol = eth->h_proto; 4046 skb->protocol = eth->h_proto;
4062 4047
4063 out: 4048 out:
4064 return skb; 4049 return skb;
4065 } 4050 }
4066 4051
4067 gro_result_t napi_gro_frags(struct napi_struct *napi) 4052 gro_result_t napi_gro_frags(struct napi_struct *napi)
4068 { 4053 {
4069 struct sk_buff *skb = napi_frags_skb(napi); 4054 struct sk_buff *skb = napi_frags_skb(napi);
4070 4055
4071 if (!skb) 4056 if (!skb)
4072 return GRO_DROP; 4057 return GRO_DROP;
4073 4058
4074 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); 4059 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
4075 } 4060 }
4076 EXPORT_SYMBOL(napi_gro_frags); 4061 EXPORT_SYMBOL(napi_gro_frags);
4077 4062
4078 /* 4063 /*
4079 * net_rps_action sends any pending IPI's for rps. 4064 * net_rps_action sends any pending IPI's for rps.
4080 * Note: called with local irq disabled, but exits with local irq enabled. 4065 * Note: called with local irq disabled, but exits with local irq enabled.
4081 */ 4066 */
4082 static void net_rps_action_and_irq_enable(struct softnet_data *sd) 4067 static void net_rps_action_and_irq_enable(struct softnet_data *sd)
4083 { 4068 {
4084 #ifdef CONFIG_RPS 4069 #ifdef CONFIG_RPS
4085 struct softnet_data *remsd = sd->rps_ipi_list; 4070 struct softnet_data *remsd = sd->rps_ipi_list;
4086 4071
4087 if (remsd) { 4072 if (remsd) {
4088 sd->rps_ipi_list = NULL; 4073 sd->rps_ipi_list = NULL;
4089 4074
4090 local_irq_enable(); 4075 local_irq_enable();
4091 4076
4092 /* Send pending IPI's to kick RPS processing on remote cpus. */ 4077 /* Send pending IPI's to kick RPS processing on remote cpus. */
4093 while (remsd) { 4078 while (remsd) {
4094 struct softnet_data *next = remsd->rps_ipi_next; 4079 struct softnet_data *next = remsd->rps_ipi_next;
4095 4080
4096 if (cpu_online(remsd->cpu)) 4081 if (cpu_online(remsd->cpu))
4097 __smp_call_function_single(remsd->cpu, 4082 __smp_call_function_single(remsd->cpu,
4098 &remsd->csd, 0); 4083 &remsd->csd, 0);
4099 remsd = next; 4084 remsd = next;
4100 } 4085 }
4101 } else 4086 } else
4102 #endif 4087 #endif
4103 local_irq_enable(); 4088 local_irq_enable();
4104 } 4089 }
4105 4090
4106 static int process_backlog(struct napi_struct *napi, int quota) 4091 static int process_backlog(struct napi_struct *napi, int quota)
4107 { 4092 {
4108 int work = 0; 4093 int work = 0;
4109 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); 4094 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
4110 4095
4111 #ifdef CONFIG_RPS 4096 #ifdef CONFIG_RPS
4112 /* Check if we have pending ipi, its better to send them now, 4097 /* Check if we have pending ipi, its better to send them now,
4113 * not waiting net_rx_action() end. 4098 * not waiting net_rx_action() end.
4114 */ 4099 */
4115 if (sd->rps_ipi_list) { 4100 if (sd->rps_ipi_list) {
4116 local_irq_disable(); 4101 local_irq_disable();
4117 net_rps_action_and_irq_enable(sd); 4102 net_rps_action_and_irq_enable(sd);
4118 } 4103 }
4119 #endif 4104 #endif
4120 napi->weight = weight_p; 4105 napi->weight = weight_p;
4121 local_irq_disable(); 4106 local_irq_disable();
4122 while (work < quota) { 4107 while (work < quota) {
4123 struct sk_buff *skb; 4108 struct sk_buff *skb;
4124 unsigned int qlen; 4109 unsigned int qlen;
4125 4110
4126 while ((skb = __skb_dequeue(&sd->process_queue))) { 4111 while ((skb = __skb_dequeue(&sd->process_queue))) {
4127 local_irq_enable(); 4112 local_irq_enable();
4128 __netif_receive_skb(skb); 4113 __netif_receive_skb(skb);
4129 local_irq_disable(); 4114 local_irq_disable();
4130 input_queue_head_incr(sd); 4115 input_queue_head_incr(sd);
4131 if (++work >= quota) { 4116 if (++work >= quota) {
4132 local_irq_enable(); 4117 local_irq_enable();
4133 return work; 4118 return work;
4134 } 4119 }
4135 } 4120 }
4136 4121
4137 rps_lock(sd); 4122 rps_lock(sd);
4138 qlen = skb_queue_len(&sd->input_pkt_queue); 4123 qlen = skb_queue_len(&sd->input_pkt_queue);
4139 if (qlen) 4124 if (qlen)
4140 skb_queue_splice_tail_init(&sd->input_pkt_queue, 4125 skb_queue_splice_tail_init(&sd->input_pkt_queue,
4141 &sd->process_queue); 4126 &sd->process_queue);
4142 4127
4143 if (qlen < quota - work) { 4128 if (qlen < quota - work) {
4144 /* 4129 /*
4145 * Inline a custom version of __napi_complete(). 4130 * Inline a custom version of __napi_complete().
4146 * only current cpu owns and manipulates this napi, 4131 * only current cpu owns and manipulates this napi,
4147 * and NAPI_STATE_SCHED is the only possible flag set on backlog. 4132 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
4148 * we can use a plain write instead of clear_bit(), 4133 * we can use a plain write instead of clear_bit(),
4149 * and we dont need an smp_mb() memory barrier. 4134 * and we dont need an smp_mb() memory barrier.
4150 */ 4135 */
4151 list_del(&napi->poll_list); 4136 list_del(&napi->poll_list);
4152 napi->state = 0; 4137 napi->state = 0;
4153 4138
4154 quota = work + qlen; 4139 quota = work + qlen;
4155 } 4140 }
4156 rps_unlock(sd); 4141 rps_unlock(sd);
4157 } 4142 }
4158 local_irq_enable(); 4143 local_irq_enable();
4159 4144
4160 return work; 4145 return work;
4161 } 4146 }
4162 4147
4163 /** 4148 /**
4164 * __napi_schedule - schedule for receive 4149 * __napi_schedule - schedule for receive
4165 * @n: entry to schedule 4150 * @n: entry to schedule
4166 * 4151 *
4167 * The entry's receive function will be scheduled to run 4152 * The entry's receive function will be scheduled to run
4168 */ 4153 */
4169 void __napi_schedule(struct napi_struct *n) 4154 void __napi_schedule(struct napi_struct *n)
4170 { 4155 {
4171 unsigned long flags; 4156 unsigned long flags;
4172 4157
4173 local_irq_save(flags); 4158 local_irq_save(flags);
4174 ____napi_schedule(&__get_cpu_var(softnet_data), n); 4159 ____napi_schedule(&__get_cpu_var(softnet_data), n);
4175 local_irq_restore(flags); 4160 local_irq_restore(flags);
4176 } 4161 }
4177 EXPORT_SYMBOL(__napi_schedule); 4162 EXPORT_SYMBOL(__napi_schedule);
4178 4163
4179 void __napi_complete(struct napi_struct *n) 4164 void __napi_complete(struct napi_struct *n)
4180 { 4165 {
4181 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); 4166 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
4182 BUG_ON(n->gro_list); 4167 BUG_ON(n->gro_list);
4183 4168
4184 list_del(&n->poll_list); 4169 list_del(&n->poll_list);
4185 smp_mb__before_clear_bit(); 4170 smp_mb__before_clear_bit();
4186 clear_bit(NAPI_STATE_SCHED, &n->state); 4171 clear_bit(NAPI_STATE_SCHED, &n->state);
4187 } 4172 }
4188 EXPORT_SYMBOL(__napi_complete); 4173 EXPORT_SYMBOL(__napi_complete);
4189 4174
4190 void napi_complete(struct napi_struct *n) 4175 void napi_complete(struct napi_struct *n)
4191 { 4176 {
4192 unsigned long flags; 4177 unsigned long flags;
4193 4178
4194 /* 4179 /*
4195 * don't let napi dequeue from the cpu poll list 4180 * don't let napi dequeue from the cpu poll list
4196 * just in case its running on a different cpu 4181 * just in case its running on a different cpu
4197 */ 4182 */
4198 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) 4183 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
4199 return; 4184 return;
4200 4185
4201 napi_gro_flush(n, false); 4186 napi_gro_flush(n, false);
4202 local_irq_save(flags); 4187 local_irq_save(flags);
4203 __napi_complete(n); 4188 __napi_complete(n);
4204 local_irq_restore(flags); 4189 local_irq_restore(flags);
4205 } 4190 }
4206 EXPORT_SYMBOL(napi_complete); 4191 EXPORT_SYMBOL(napi_complete);
4207 4192
4208 /* must be called under rcu_read_lock(), as we dont take a reference */ 4193 /* must be called under rcu_read_lock(), as we dont take a reference */
4209 struct napi_struct *napi_by_id(unsigned int napi_id) 4194 struct napi_struct *napi_by_id(unsigned int napi_id)
4210 { 4195 {
4211 unsigned int hash = napi_id % HASH_SIZE(napi_hash); 4196 unsigned int hash = napi_id % HASH_SIZE(napi_hash);
4212 struct napi_struct *napi; 4197 struct napi_struct *napi;
4213 4198
4214 hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node) 4199 hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
4215 if (napi->napi_id == napi_id) 4200 if (napi->napi_id == napi_id)
4216 return napi; 4201 return napi;
4217 4202
4218 return NULL; 4203 return NULL;
4219 } 4204 }
4220 EXPORT_SYMBOL_GPL(napi_by_id); 4205 EXPORT_SYMBOL_GPL(napi_by_id);
4221 4206
4222 void napi_hash_add(struct napi_struct *napi) 4207 void napi_hash_add(struct napi_struct *napi)
4223 { 4208 {
4224 if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) { 4209 if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
4225 4210
4226 spin_lock(&napi_hash_lock); 4211 spin_lock(&napi_hash_lock);
4227 4212
4228 /* 0 is not a valid id, we also skip an id that is taken 4213 /* 0 is not a valid id, we also skip an id that is taken
4229 * we expect both events to be extremely rare 4214 * we expect both events to be extremely rare
4230 */ 4215 */
4231 napi->napi_id = 0; 4216 napi->napi_id = 0;
4232 while (!napi->napi_id) { 4217 while (!napi->napi_id) {
4233 napi->napi_id = ++napi_gen_id; 4218 napi->napi_id = ++napi_gen_id;
4234 if (napi_by_id(napi->napi_id)) 4219 if (napi_by_id(napi->napi_id))
4235 napi->napi_id = 0; 4220 napi->napi_id = 0;
4236 } 4221 }
4237 4222
4238 hlist_add_head_rcu(&napi->napi_hash_node, 4223 hlist_add_head_rcu(&napi->napi_hash_node,
4239 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); 4224 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
4240 4225
4241 spin_unlock(&napi_hash_lock); 4226 spin_unlock(&napi_hash_lock);
4242 } 4227 }
4243 } 4228 }
4244 EXPORT_SYMBOL_GPL(napi_hash_add); 4229 EXPORT_SYMBOL_GPL(napi_hash_add);
4245 4230
4246 /* Warning : caller is responsible to make sure rcu grace period 4231 /* Warning : caller is responsible to make sure rcu grace period
4247 * is respected before freeing memory containing @napi 4232 * is respected before freeing memory containing @napi
4248 */ 4233 */
4249 void napi_hash_del(struct napi_struct *napi) 4234 void napi_hash_del(struct napi_struct *napi)
4250 { 4235 {
4251 spin_lock(&napi_hash_lock); 4236 spin_lock(&napi_hash_lock);
4252 4237
4253 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) 4238 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
4254 hlist_del_rcu(&napi->napi_hash_node); 4239 hlist_del_rcu(&napi->napi_hash_node);
4255 4240
4256 spin_unlock(&napi_hash_lock); 4241 spin_unlock(&napi_hash_lock);
4257 } 4242 }
4258 EXPORT_SYMBOL_GPL(napi_hash_del); 4243 EXPORT_SYMBOL_GPL(napi_hash_del);
4259 4244
4260 void netif_napi_add(struct net_device *dev, struct napi_struct *napi, 4245 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
4261 int (*poll)(struct napi_struct *, int), int weight) 4246 int (*poll)(struct napi_struct *, int), int weight)
4262 { 4247 {
4263 INIT_LIST_HEAD(&napi->poll_list); 4248 INIT_LIST_HEAD(&napi->poll_list);
4264 napi->gro_count = 0; 4249 napi->gro_count = 0;
4265 napi->gro_list = NULL; 4250 napi->gro_list = NULL;
4266 napi->skb = NULL; 4251 napi->skb = NULL;
4267 napi->poll = poll; 4252 napi->poll = poll;
4268 if (weight > NAPI_POLL_WEIGHT) 4253 if (weight > NAPI_POLL_WEIGHT)
4269 pr_err_once("netif_napi_add() called with weight %d on device %s\n", 4254 pr_err_once("netif_napi_add() called with weight %d on device %s\n",
4270 weight, dev->name); 4255 weight, dev->name);
4271 napi->weight = weight; 4256 napi->weight = weight;
4272 list_add(&napi->dev_list, &dev->napi_list); 4257 list_add(&napi->dev_list, &dev->napi_list);
4273 napi->dev = dev; 4258 napi->dev = dev;
4274 #ifdef CONFIG_NETPOLL 4259 #ifdef CONFIG_NETPOLL
4275 spin_lock_init(&napi->poll_lock); 4260 spin_lock_init(&napi->poll_lock);
4276 napi->poll_owner = -1; 4261 napi->poll_owner = -1;
4277 #endif 4262 #endif
4278 set_bit(NAPI_STATE_SCHED, &napi->state); 4263 set_bit(NAPI_STATE_SCHED, &napi->state);
4279 } 4264 }
4280 EXPORT_SYMBOL(netif_napi_add); 4265 EXPORT_SYMBOL(netif_napi_add);
4281 4266
4282 void netif_napi_del(struct napi_struct *napi) 4267 void netif_napi_del(struct napi_struct *napi)
4283 { 4268 {
4284 struct sk_buff *skb, *next; 4269 struct sk_buff *skb, *next;
4285 4270
4286 list_del_init(&napi->dev_list); 4271 list_del_init(&napi->dev_list);
4287 napi_free_frags(napi); 4272 napi_free_frags(napi);
4288 4273
4289 for (skb = napi->gro_list; skb; skb = next) { 4274 for (skb = napi->gro_list; skb; skb = next) {
4290 next = skb->next; 4275 next = skb->next;
4291 skb->next = NULL; 4276 skb->next = NULL;
4292 kfree_skb(skb); 4277 kfree_skb(skb);
4293 } 4278 }
4294 4279
4295 napi->gro_list = NULL; 4280 napi->gro_list = NULL;
4296 napi->gro_count = 0; 4281 napi->gro_count = 0;
4297 } 4282 }
4298 EXPORT_SYMBOL(netif_napi_del); 4283 EXPORT_SYMBOL(netif_napi_del);
4299 4284
4300 static void net_rx_action(struct softirq_action *h) 4285 static void net_rx_action(struct softirq_action *h)
4301 { 4286 {
4302 struct softnet_data *sd = &__get_cpu_var(softnet_data); 4287 struct softnet_data *sd = &__get_cpu_var(softnet_data);
4303 unsigned long time_limit = jiffies + 2; 4288 unsigned long time_limit = jiffies + 2;
4304 int budget = netdev_budget; 4289 int budget = netdev_budget;
4305 void *have; 4290 void *have;
4306 4291
4307 local_irq_disable(); 4292 local_irq_disable();
4308 4293
4309 while (!list_empty(&sd->poll_list)) { 4294 while (!list_empty(&sd->poll_list)) {
4310 struct napi_struct *n; 4295 struct napi_struct *n;
4311 int work, weight; 4296 int work, weight;
4312 4297
4313 /* If softirq window is exhuasted then punt. 4298 /* If softirq window is exhuasted then punt.
4314 * Allow this to run for 2 jiffies since which will allow 4299 * Allow this to run for 2 jiffies since which will allow
4315 * an average latency of 1.5/HZ. 4300 * an average latency of 1.5/HZ.
4316 */ 4301 */
4317 if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) 4302 if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
4318 goto softnet_break; 4303 goto softnet_break;
4319 4304
4320 local_irq_enable(); 4305 local_irq_enable();
4321 4306
4322 /* Even though interrupts have been re-enabled, this 4307 /* Even though interrupts have been re-enabled, this
4323 * access is safe because interrupts can only add new 4308 * access is safe because interrupts can only add new
4324 * entries to the tail of this list, and only ->poll() 4309 * entries to the tail of this list, and only ->poll()
4325 * calls can remove this head entry from the list. 4310 * calls can remove this head entry from the list.
4326 */ 4311 */
4327 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list); 4312 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
4328 4313
4329 have = netpoll_poll_lock(n); 4314 have = netpoll_poll_lock(n);
4330 4315
4331 weight = n->weight; 4316 weight = n->weight;
4332 4317
4333 /* This NAPI_STATE_SCHED test is for avoiding a race 4318 /* This NAPI_STATE_SCHED test is for avoiding a race
4334 * with netpoll's poll_napi(). Only the entity which 4319 * with netpoll's poll_napi(). Only the entity which
4335 * obtains the lock and sees NAPI_STATE_SCHED set will 4320 * obtains the lock and sees NAPI_STATE_SCHED set will
4336 * actually make the ->poll() call. Therefore we avoid 4321 * actually make the ->poll() call. Therefore we avoid
4337 * accidentally calling ->poll() when NAPI is not scheduled. 4322 * accidentally calling ->poll() when NAPI is not scheduled.
4338 */ 4323 */
4339 work = 0; 4324 work = 0;
4340 if (test_bit(NAPI_STATE_SCHED, &n->state)) { 4325 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
4341 work = n->poll(n, weight); 4326 work = n->poll(n, weight);
4342 trace_napi_poll(n); 4327 trace_napi_poll(n);
4343 } 4328 }
4344 4329
4345 WARN_ON_ONCE(work > weight); 4330 WARN_ON_ONCE(work > weight);
4346 4331
4347 budget -= work; 4332 budget -= work;
4348 4333
4349 local_irq_disable(); 4334 local_irq_disable();
4350 4335
4351 /* Drivers must not modify the NAPI state if they 4336 /* Drivers must not modify the NAPI state if they
4352 * consume the entire weight. In such cases this code 4337 * consume the entire weight. In such cases this code
4353 * still "owns" the NAPI instance and therefore can 4338 * still "owns" the NAPI instance and therefore can
4354 * move the instance around on the list at-will. 4339 * move the instance around on the list at-will.
4355 */ 4340 */
4356 if (unlikely(work == weight)) { 4341 if (unlikely(work == weight)) {
4357 if (unlikely(napi_disable_pending(n))) { 4342 if (unlikely(napi_disable_pending(n))) {
4358 local_irq_enable(); 4343 local_irq_enable();
4359 napi_complete(n); 4344 napi_complete(n);
4360 local_irq_disable(); 4345 local_irq_disable();
4361 } else { 4346 } else {
4362 if (n->gro_list) { 4347 if (n->gro_list) {
4363 /* flush too old packets 4348 /* flush too old packets
4364 * If HZ < 1000, flush all packets. 4349 * If HZ < 1000, flush all packets.
4365 */ 4350 */
4366 local_irq_enable(); 4351 local_irq_enable();
4367 napi_gro_flush(n, HZ >= 1000); 4352 napi_gro_flush(n, HZ >= 1000);
4368 local_irq_disable(); 4353 local_irq_disable();
4369 } 4354 }
4370 list_move_tail(&n->poll_list, &sd->poll_list); 4355 list_move_tail(&n->poll_list, &sd->poll_list);
4371 } 4356 }
4372 } 4357 }
4373 4358
4374 netpoll_poll_unlock(have); 4359 netpoll_poll_unlock(have);
4375 } 4360 }
4376 out: 4361 out:
4377 net_rps_action_and_irq_enable(sd); 4362 net_rps_action_and_irq_enable(sd);
4378 4363
4379 #ifdef CONFIG_NET_DMA 4364 #ifdef CONFIG_NET_DMA
4380 /* 4365 /*
4381 * There may not be any more sk_buffs coming right now, so push 4366 * There may not be any more sk_buffs coming right now, so push
4382 * any pending DMA copies to hardware 4367 * any pending DMA copies to hardware
4383 */ 4368 */
4384 dma_issue_pending_all(); 4369 dma_issue_pending_all();
4385 #endif 4370 #endif
4386 4371
4387 return; 4372 return;
4388 4373
4389 softnet_break: 4374 softnet_break:
4390 sd->time_squeeze++; 4375 sd->time_squeeze++;
4391 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 4376 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4392 goto out; 4377 goto out;
4393 } 4378 }
4394 4379
4395 struct netdev_adjacent { 4380 struct netdev_adjacent {
4396 struct net_device *dev; 4381 struct net_device *dev;
4397 4382
4398 /* upper master flag, there can only be one master device per list */ 4383 /* upper master flag, there can only be one master device per list */
4399 bool master; 4384 bool master;
4400 4385
4401 /* counter for the number of times this device was added to us */ 4386 /* counter for the number of times this device was added to us */
4402 u16 ref_nr; 4387 u16 ref_nr;
4403 4388
4404 /* private field for the users */ 4389 /* private field for the users */
4405 void *private; 4390 void *private;
4406 4391
4407 struct list_head list; 4392 struct list_head list;
4408 struct rcu_head rcu; 4393 struct rcu_head rcu;
4409 }; 4394 };
4410 4395
4411 static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev, 4396 static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev,
4412 struct net_device *adj_dev, 4397 struct net_device *adj_dev,
4413 struct list_head *adj_list) 4398 struct list_head *adj_list)
4414 { 4399 {
4415 struct netdev_adjacent *adj; 4400 struct netdev_adjacent *adj;
4416 4401
4417 list_for_each_entry_rcu(adj, adj_list, list) { 4402 list_for_each_entry_rcu(adj, adj_list, list) {
4418 if (adj->dev == adj_dev) 4403 if (adj->dev == adj_dev)
4419 return adj; 4404 return adj;
4420 } 4405 }
4421 return NULL; 4406 return NULL;
4422 } 4407 }
4423 4408
4424 static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, 4409 static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
4425 struct net_device *adj_dev, 4410 struct net_device *adj_dev,
4426 struct list_head *adj_list) 4411 struct list_head *adj_list)
4427 { 4412 {
4428 struct netdev_adjacent *adj; 4413 struct netdev_adjacent *adj;
4429 4414
4430 list_for_each_entry(adj, adj_list, list) { 4415 list_for_each_entry(adj, adj_list, list) {
4431 if (adj->dev == adj_dev) 4416 if (adj->dev == adj_dev)
4432 return adj; 4417 return adj;
4433 } 4418 }
4434 return NULL; 4419 return NULL;
4435 } 4420 }
4436 4421
4437 /** 4422 /**
4438 * netdev_has_upper_dev - Check if device is linked to an upper device 4423 * netdev_has_upper_dev - Check if device is linked to an upper device
4439 * @dev: device 4424 * @dev: device
4440 * @upper_dev: upper device to check 4425 * @upper_dev: upper device to check
4441 * 4426 *
4442 * Find out if a device is linked to specified upper device and return true 4427 * Find out if a device is linked to specified upper device and return true
4443 * in case it is. Note that this checks only immediate upper device, 4428 * in case it is. Note that this checks only immediate upper device,
4444 * not through a complete stack of devices. The caller must hold the RTNL lock. 4429 * not through a complete stack of devices. The caller must hold the RTNL lock.
4445 */ 4430 */
4446 bool netdev_has_upper_dev(struct net_device *dev, 4431 bool netdev_has_upper_dev(struct net_device *dev,
4447 struct net_device *upper_dev) 4432 struct net_device *upper_dev)
4448 { 4433 {
4449 ASSERT_RTNL(); 4434 ASSERT_RTNL();
4450 4435
4451 return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper); 4436 return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
4452 } 4437 }
4453 EXPORT_SYMBOL(netdev_has_upper_dev); 4438 EXPORT_SYMBOL(netdev_has_upper_dev);
4454 4439
4455 /** 4440 /**
4456 * netdev_has_any_upper_dev - Check if device is linked to some device 4441 * netdev_has_any_upper_dev - Check if device is linked to some device
4457 * @dev: device 4442 * @dev: device
4458 * 4443 *
4459 * Find out if a device is linked to an upper device and return true in case 4444 * Find out if a device is linked to an upper device and return true in case
4460 * it is. The caller must hold the RTNL lock. 4445 * it is. The caller must hold the RTNL lock.
4461 */ 4446 */
4462 bool netdev_has_any_upper_dev(struct net_device *dev) 4447 bool netdev_has_any_upper_dev(struct net_device *dev)
4463 { 4448 {
4464 ASSERT_RTNL(); 4449 ASSERT_RTNL();
4465 4450
4466 return !list_empty(&dev->all_adj_list.upper); 4451 return !list_empty(&dev->all_adj_list.upper);
4467 } 4452 }
4468 EXPORT_SYMBOL(netdev_has_any_upper_dev); 4453 EXPORT_SYMBOL(netdev_has_any_upper_dev);
4469 4454
4470 /** 4455 /**
4471 * netdev_master_upper_dev_get - Get master upper device 4456 * netdev_master_upper_dev_get - Get master upper device
4472 * @dev: device 4457 * @dev: device
4473 * 4458 *
4474 * Find a master upper device and return pointer to it or NULL in case 4459 * Find a master upper device and return pointer to it or NULL in case
4475 * it's not there. The caller must hold the RTNL lock. 4460 * it's not there. The caller must hold the RTNL lock.
4476 */ 4461 */
4477 struct net_device *netdev_master_upper_dev_get(struct net_device *dev) 4462 struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
4478 { 4463 {
4479 struct netdev_adjacent *upper; 4464 struct netdev_adjacent *upper;
4480 4465
4481 ASSERT_RTNL(); 4466 ASSERT_RTNL();
4482 4467
4483 if (list_empty(&dev->adj_list.upper)) 4468 if (list_empty(&dev->adj_list.upper))
4484 return NULL; 4469 return NULL;
4485 4470
4486 upper = list_first_entry(&dev->adj_list.upper, 4471 upper = list_first_entry(&dev->adj_list.upper,
4487 struct netdev_adjacent, list); 4472 struct netdev_adjacent, list);
4488 if (likely(upper->master)) 4473 if (likely(upper->master))
4489 return upper->dev; 4474 return upper->dev;
4490 return NULL; 4475 return NULL;
4491 } 4476 }
4492 EXPORT_SYMBOL(netdev_master_upper_dev_get); 4477 EXPORT_SYMBOL(netdev_master_upper_dev_get);
4493 4478
4494 void *netdev_adjacent_get_private(struct list_head *adj_list) 4479 void *netdev_adjacent_get_private(struct list_head *adj_list)
4495 { 4480 {
4496 struct netdev_adjacent *adj; 4481 struct netdev_adjacent *adj;
4497 4482
4498 adj = list_entry(adj_list, struct netdev_adjacent, list); 4483 adj = list_entry(adj_list, struct netdev_adjacent, list);
4499 4484
4500 return adj->private; 4485 return adj->private;
4501 } 4486 }
4502 EXPORT_SYMBOL(netdev_adjacent_get_private); 4487 EXPORT_SYMBOL(netdev_adjacent_get_private);
4503 4488
4504 /** 4489 /**
4505 * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list 4490 * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
4506 * @dev: device 4491 * @dev: device
4507 * @iter: list_head ** of the current position 4492 * @iter: list_head ** of the current position
4508 * 4493 *
4509 * Gets the next device from the dev's upper list, starting from iter 4494 * Gets the next device from the dev's upper list, starting from iter
4510 * position. The caller must hold RCU read lock. 4495 * position. The caller must hold RCU read lock.
4511 */ 4496 */
4512 struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, 4497 struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
4513 struct list_head **iter) 4498 struct list_head **iter)
4514 { 4499 {
4515 struct netdev_adjacent *upper; 4500 struct netdev_adjacent *upper;
4516 4501
4517 WARN_ON_ONCE(!rcu_read_lock_held()); 4502 WARN_ON_ONCE(!rcu_read_lock_held());
4518 4503
4519 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); 4504 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
4520 4505
4521 if (&upper->list == &dev->all_adj_list.upper) 4506 if (&upper->list == &dev->all_adj_list.upper)
4522 return NULL; 4507 return NULL;
4523 4508
4524 *iter = &upper->list; 4509 *iter = &upper->list;
4525 4510
4526 return upper->dev; 4511 return upper->dev;
4527 } 4512 }
4528 EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu); 4513 EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
4529 4514
4530 /** 4515 /**
4531 * netdev_lower_get_next_private - Get the next ->private from the 4516 * netdev_lower_get_next_private - Get the next ->private from the
4532 * lower neighbour list 4517 * lower neighbour list
4533 * @dev: device 4518 * @dev: device
4534 * @iter: list_head ** of the current position 4519 * @iter: list_head ** of the current position
4535 * 4520 *
4536 * Gets the next netdev_adjacent->private from the dev's lower neighbour 4521 * Gets the next netdev_adjacent->private from the dev's lower neighbour
4537 * list, starting from iter position. The caller must hold either hold the 4522 * list, starting from iter position. The caller must hold either hold the
4538 * RTNL lock or its own locking that guarantees that the neighbour lower 4523 * RTNL lock or its own locking that guarantees that the neighbour lower
4539 * list will remain unchainged. 4524 * list will remain unchainged.
4540 */ 4525 */
4541 void *netdev_lower_get_next_private(struct net_device *dev, 4526 void *netdev_lower_get_next_private(struct net_device *dev,
4542 struct list_head **iter) 4527 struct list_head **iter)
4543 { 4528 {
4544 struct netdev_adjacent *lower; 4529 struct netdev_adjacent *lower;
4545 4530
4546 lower = list_entry(*iter, struct netdev_adjacent, list); 4531 lower = list_entry(*iter, struct netdev_adjacent, list);
4547 4532
4548 if (&lower->list == &dev->adj_list.lower) 4533 if (&lower->list == &dev->adj_list.lower)
4549 return NULL; 4534 return NULL;
4550 4535
4551 if (iter) 4536 if (iter)
4552 *iter = lower->list.next; 4537 *iter = lower->list.next;
4553 4538
4554 return lower->private; 4539 return lower->private;
4555 } 4540 }
4556 EXPORT_SYMBOL(netdev_lower_get_next_private); 4541 EXPORT_SYMBOL(netdev_lower_get_next_private);
4557 4542
4558 /** 4543 /**
4559 * netdev_lower_get_next_private_rcu - Get the next ->private from the 4544 * netdev_lower_get_next_private_rcu - Get the next ->private from the
4560 * lower neighbour list, RCU 4545 * lower neighbour list, RCU
4561 * variant 4546 * variant
4562 * @dev: device 4547 * @dev: device
4563 * @iter: list_head ** of the current position 4548 * @iter: list_head ** of the current position
4564 * 4549 *
4565 * Gets the next netdev_adjacent->private from the dev's lower neighbour 4550 * Gets the next netdev_adjacent->private from the dev's lower neighbour
4566 * list, starting from iter position. The caller must hold RCU read lock. 4551 * list, starting from iter position. The caller must hold RCU read lock.
4567 */ 4552 */
4568 void *netdev_lower_get_next_private_rcu(struct net_device *dev, 4553 void *netdev_lower_get_next_private_rcu(struct net_device *dev,
4569 struct list_head **iter) 4554 struct list_head **iter)
4570 { 4555 {
4571 struct netdev_adjacent *lower; 4556 struct netdev_adjacent *lower;
4572 4557
4573 WARN_ON_ONCE(!rcu_read_lock_held()); 4558 WARN_ON_ONCE(!rcu_read_lock_held());
4574 4559
4575 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); 4560 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
4576 4561
4577 if (&lower->list == &dev->adj_list.lower) 4562 if (&lower->list == &dev->adj_list.lower)
4578 return NULL; 4563 return NULL;
4579 4564
4580 if (iter) 4565 if (iter)
4581 *iter = &lower->list; 4566 *iter = &lower->list;
4582 4567
4583 return lower->private; 4568 return lower->private;
4584 } 4569 }
4585 EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); 4570 EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
4586 4571
4587 /** 4572 /**
4588 * netdev_master_upper_dev_get_rcu - Get master upper device 4573 * netdev_master_upper_dev_get_rcu - Get master upper device
4589 * @dev: device 4574 * @dev: device
4590 * 4575 *
4591 * Find a master upper device and return pointer to it or NULL in case 4576 * Find a master upper device and return pointer to it or NULL in case
4592 * it's not there. The caller must hold the RCU read lock. 4577 * it's not there. The caller must hold the RCU read lock.
4593 */ 4578 */
4594 struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) 4579 struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4595 { 4580 {
4596 struct netdev_adjacent *upper; 4581 struct netdev_adjacent *upper;
4597 4582
4598 upper = list_first_or_null_rcu(&dev->adj_list.upper, 4583 upper = list_first_or_null_rcu(&dev->adj_list.upper,
4599 struct netdev_adjacent, list); 4584 struct netdev_adjacent, list);
4600 if (upper && likely(upper->master)) 4585 if (upper && likely(upper->master))
4601 return upper->dev; 4586 return upper->dev;
4602 return NULL; 4587 return NULL;
4603 } 4588 }
4604 EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); 4589 EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4605 4590
4606 static int __netdev_adjacent_dev_insert(struct net_device *dev, 4591 static int __netdev_adjacent_dev_insert(struct net_device *dev,
4607 struct net_device *adj_dev, 4592 struct net_device *adj_dev,
4608 struct list_head *dev_list, 4593 struct list_head *dev_list,
4609 void *private, bool master) 4594 void *private, bool master)
4610 { 4595 {
4611 struct netdev_adjacent *adj; 4596 struct netdev_adjacent *adj;
4612 char linkname[IFNAMSIZ+7]; 4597 char linkname[IFNAMSIZ+7];
4613 int ret; 4598 int ret;
4614 4599
4615 adj = __netdev_find_adj(dev, adj_dev, dev_list); 4600 adj = __netdev_find_adj(dev, adj_dev, dev_list);
4616 4601
4617 if (adj) { 4602 if (adj) {
4618 adj->ref_nr++; 4603 adj->ref_nr++;
4619 return 0; 4604 return 0;
4620 } 4605 }
4621 4606
4622 adj = kmalloc(sizeof(*adj), GFP_KERNEL); 4607 adj = kmalloc(sizeof(*adj), GFP_KERNEL);
4623 if (!adj) 4608 if (!adj)
4624 return -ENOMEM; 4609 return -ENOMEM;
4625 4610
4626 adj->dev = adj_dev; 4611 adj->dev = adj_dev;
4627 adj->master = master; 4612 adj->master = master;
4628 adj->ref_nr = 1; 4613 adj->ref_nr = 1;
4629 adj->private = private; 4614 adj->private = private;
4630 dev_hold(adj_dev); 4615 dev_hold(adj_dev);
4631 4616
4632 pr_debug("dev_hold for %s, because of link added from %s to %s\n", 4617 pr_debug("dev_hold for %s, because of link added from %s to %s\n",
4633 adj_dev->name, dev->name, adj_dev->name); 4618 adj_dev->name, dev->name, adj_dev->name);
4634 4619
4635 if (dev_list == &dev->adj_list.lower) { 4620 if (dev_list == &dev->adj_list.lower) {
4636 sprintf(linkname, "lower_%s", adj_dev->name); 4621 sprintf(linkname, "lower_%s", adj_dev->name);
4637 ret = sysfs_create_link(&(dev->dev.kobj), 4622 ret = sysfs_create_link(&(dev->dev.kobj),
4638 &(adj_dev->dev.kobj), linkname); 4623 &(adj_dev->dev.kobj), linkname);
4639 if (ret) 4624 if (ret)
4640 goto free_adj; 4625 goto free_adj;
4641 } else if (dev_list == &dev->adj_list.upper) { 4626 } else if (dev_list == &dev->adj_list.upper) {
4642 sprintf(linkname, "upper_%s", adj_dev->name); 4627 sprintf(linkname, "upper_%s", adj_dev->name);
4643 ret = sysfs_create_link(&(dev->dev.kobj), 4628 ret = sysfs_create_link(&(dev->dev.kobj),
4644 &(adj_dev->dev.kobj), linkname); 4629 &(adj_dev->dev.kobj), linkname);
4645 if (ret) 4630 if (ret)
4646 goto free_adj; 4631 goto free_adj;
4647 } 4632 }
4648 4633
4649 /* Ensure that master link is always the first item in list. */ 4634 /* Ensure that master link is always the first item in list. */
4650 if (master) { 4635 if (master) {
4651 ret = sysfs_create_link(&(dev->dev.kobj), 4636 ret = sysfs_create_link(&(dev->dev.kobj),
4652 &(adj_dev->dev.kobj), "master"); 4637 &(adj_dev->dev.kobj), "master");
4653 if (ret) 4638 if (ret)
4654 goto remove_symlinks; 4639 goto remove_symlinks;
4655 4640
4656 list_add_rcu(&adj->list, dev_list); 4641 list_add_rcu(&adj->list, dev_list);
4657 } else { 4642 } else {
4658 list_add_tail_rcu(&adj->list, dev_list); 4643 list_add_tail_rcu(&adj->list, dev_list);
4659 } 4644 }
4660 4645
4661 return 0; 4646 return 0;
4662 4647
4663 remove_symlinks: 4648 remove_symlinks:
4664 if (dev_list == &dev->adj_list.lower) { 4649 if (dev_list == &dev->adj_list.lower) {
4665 sprintf(linkname, "lower_%s", adj_dev->name); 4650 sprintf(linkname, "lower_%s", adj_dev->name);
4666 sysfs_remove_link(&(dev->dev.kobj), linkname); 4651 sysfs_remove_link(&(dev->dev.kobj), linkname);
4667 } else if (dev_list == &dev->adj_list.upper) { 4652 } else if (dev_list == &dev->adj_list.upper) {
4668 sprintf(linkname, "upper_%s", adj_dev->name); 4653 sprintf(linkname, "upper_%s", adj_dev->name);
4669 sysfs_remove_link(&(dev->dev.kobj), linkname); 4654 sysfs_remove_link(&(dev->dev.kobj), linkname);
4670 } 4655 }
4671 4656
4672 free_adj: 4657 free_adj:
4673 kfree(adj); 4658 kfree(adj);
4674 dev_put(adj_dev); 4659 dev_put(adj_dev);
4675 4660
4676 return ret; 4661 return ret;
4677 } 4662 }
4678 4663
4679 void __netdev_adjacent_dev_remove(struct net_device *dev, 4664 void __netdev_adjacent_dev_remove(struct net_device *dev,
4680 struct net_device *adj_dev, 4665 struct net_device *adj_dev,
4681 struct list_head *dev_list) 4666 struct list_head *dev_list)
4682 { 4667 {
4683 struct netdev_adjacent *adj; 4668 struct netdev_adjacent *adj;
4684 char linkname[IFNAMSIZ+7]; 4669 char linkname[IFNAMSIZ+7];
4685 4670
4686 adj = __netdev_find_adj(dev, adj_dev, dev_list); 4671 adj = __netdev_find_adj(dev, adj_dev, dev_list);
4687 4672
4688 if (!adj) { 4673 if (!adj) {
4689 pr_err("tried to remove device %s from %s\n", 4674 pr_err("tried to remove device %s from %s\n",
4690 dev->name, adj_dev->name); 4675 dev->name, adj_dev->name);
4691 BUG(); 4676 BUG();
4692 } 4677 }
4693 4678
4694 if (adj->ref_nr > 1) { 4679 if (adj->ref_nr > 1) {
4695 pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name, 4680 pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
4696 adj->ref_nr-1); 4681 adj->ref_nr-1);
4697 adj->ref_nr--; 4682 adj->ref_nr--;
4698 return; 4683 return;
4699 } 4684 }
4700 4685
4701 if (adj->master) 4686 if (adj->master)
4702 sysfs_remove_link(&(dev->dev.kobj), "master"); 4687 sysfs_remove_link(&(dev->dev.kobj), "master");
4703 4688
4704 if (dev_list == &dev->adj_list.lower) { 4689 if (dev_list == &dev->adj_list.lower) {
4705 sprintf(linkname, "lower_%s", adj_dev->name); 4690 sprintf(linkname, "lower_%s", adj_dev->name);
4706 sysfs_remove_link(&(dev->dev.kobj), linkname); 4691 sysfs_remove_link(&(dev->dev.kobj), linkname);
4707 } else if (dev_list == &dev->adj_list.upper) { 4692 } else if (dev_list == &dev->adj_list.upper) {
4708 sprintf(linkname, "upper_%s", adj_dev->name); 4693 sprintf(linkname, "upper_%s", adj_dev->name);
4709 sysfs_remove_link(&(dev->dev.kobj), linkname); 4694 sysfs_remove_link(&(dev->dev.kobj), linkname);
4710 } 4695 }
4711 4696
4712 list_del_rcu(&adj->list); 4697 list_del_rcu(&adj->list);
4713 pr_debug("dev_put for %s, because link removed from %s to %s\n", 4698 pr_debug("dev_put for %s, because link removed from %s to %s\n",
4714 adj_dev->name, dev->name, adj_dev->name); 4699 adj_dev->name, dev->name, adj_dev->name);
4715 dev_put(adj_dev); 4700 dev_put(adj_dev);
4716 kfree_rcu(adj, rcu); 4701 kfree_rcu(adj, rcu);
4717 } 4702 }
4718 4703
4719 int __netdev_adjacent_dev_link_lists(struct net_device *dev, 4704 int __netdev_adjacent_dev_link_lists(struct net_device *dev,
4720 struct net_device *upper_dev, 4705 struct net_device *upper_dev,
4721 struct list_head *up_list, 4706 struct list_head *up_list,
4722 struct list_head *down_list, 4707 struct list_head *down_list,
4723 void *private, bool master) 4708 void *private, bool master)
4724 { 4709 {
4725 int ret; 4710 int ret;
4726 4711
4727 ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, 4712 ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
4728 master); 4713 master);
4729 if (ret) 4714 if (ret)
4730 return ret; 4715 return ret;
4731 4716
4732 ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, 4717 ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
4733 false); 4718 false);
4734 if (ret) { 4719 if (ret) {
4735 __netdev_adjacent_dev_remove(dev, upper_dev, up_list); 4720 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
4736 return ret; 4721 return ret;
4737 } 4722 }
4738 4723
4739 return 0; 4724 return 0;
4740 } 4725 }
4741 4726
4742 int __netdev_adjacent_dev_link(struct net_device *dev, 4727 int __netdev_adjacent_dev_link(struct net_device *dev,
4743 struct net_device *upper_dev) 4728 struct net_device *upper_dev)
4744 { 4729 {
4745 return __netdev_adjacent_dev_link_lists(dev, upper_dev, 4730 return __netdev_adjacent_dev_link_lists(dev, upper_dev,
4746 &dev->all_adj_list.upper, 4731 &dev->all_adj_list.upper,
4747 &upper_dev->all_adj_list.lower, 4732 &upper_dev->all_adj_list.lower,
4748 NULL, false); 4733 NULL, false);
4749 } 4734 }
4750 4735
4751 void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, 4736 void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
4752 struct net_device *upper_dev, 4737 struct net_device *upper_dev,
4753 struct list_head *up_list, 4738 struct list_head *up_list,
4754 struct list_head *down_list) 4739 struct list_head *down_list)
4755 { 4740 {
4756 __netdev_adjacent_dev_remove(dev, upper_dev, up_list); 4741 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
4757 __netdev_adjacent_dev_remove(upper_dev, dev, down_list); 4742 __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
4758 } 4743 }
4759 4744
4760 void __netdev_adjacent_dev_unlink(struct net_device *dev, 4745 void __netdev_adjacent_dev_unlink(struct net_device *dev,
4761 struct net_device *upper_dev) 4746 struct net_device *upper_dev)
4762 { 4747 {
4763 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 4748 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
4764 &dev->all_adj_list.upper, 4749 &dev->all_adj_list.upper,
4765 &upper_dev->all_adj_list.lower); 4750 &upper_dev->all_adj_list.lower);
4766 } 4751 }
4767 4752
4768 int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, 4753 int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
4769 struct net_device *upper_dev, 4754 struct net_device *upper_dev,
4770 void *private, bool master) 4755 void *private, bool master)
4771 { 4756 {
4772 int ret = __netdev_adjacent_dev_link(dev, upper_dev); 4757 int ret = __netdev_adjacent_dev_link(dev, upper_dev);
4773 4758
4774 if (ret) 4759 if (ret)
4775 return ret; 4760 return ret;
4776 4761
4777 ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 4762 ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
4778 &dev->adj_list.upper, 4763 &dev->adj_list.upper,
4779 &upper_dev->adj_list.lower, 4764 &upper_dev->adj_list.lower,
4780 private, master); 4765 private, master);
4781 if (ret) { 4766 if (ret) {
4782 __netdev_adjacent_dev_unlink(dev, upper_dev); 4767 __netdev_adjacent_dev_unlink(dev, upper_dev);
4783 return ret; 4768 return ret;
4784 } 4769 }
4785 4770
4786 return 0; 4771 return 0;
4787 } 4772 }
4788 4773
4789 void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, 4774 void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
4790 struct net_device *upper_dev) 4775 struct net_device *upper_dev)
4791 { 4776 {
4792 __netdev_adjacent_dev_unlink(dev, upper_dev); 4777 __netdev_adjacent_dev_unlink(dev, upper_dev);
4793 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 4778 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
4794 &dev->adj_list.upper, 4779 &dev->adj_list.upper,
4795 &upper_dev->adj_list.lower); 4780 &upper_dev->adj_list.lower);
4796 } 4781 }
4797 4782
4798 static int __netdev_upper_dev_link(struct net_device *dev, 4783 static int __netdev_upper_dev_link(struct net_device *dev,
4799 struct net_device *upper_dev, bool master, 4784 struct net_device *upper_dev, bool master,
4800 void *private) 4785 void *private)
4801 { 4786 {
4802 struct netdev_adjacent *i, *j, *to_i, *to_j; 4787 struct netdev_adjacent *i, *j, *to_i, *to_j;
4803 int ret = 0; 4788 int ret = 0;
4804 4789
4805 ASSERT_RTNL(); 4790 ASSERT_RTNL();
4806 4791
4807 if (dev == upper_dev) 4792 if (dev == upper_dev)
4808 return -EBUSY; 4793 return -EBUSY;
4809 4794
4810 /* To prevent loops, check if dev is not upper device to upper_dev. */ 4795 /* To prevent loops, check if dev is not upper device to upper_dev. */
4811 if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper)) 4796 if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
4812 return -EBUSY; 4797 return -EBUSY;
4813 4798
4814 if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper)) 4799 if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))
4815 return -EEXIST; 4800 return -EEXIST;
4816 4801
4817 if (master && netdev_master_upper_dev_get(dev)) 4802 if (master && netdev_master_upper_dev_get(dev))
4818 return -EBUSY; 4803 return -EBUSY;
4819 4804
4820 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, 4805 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
4821 master); 4806 master);
4822 if (ret) 4807 if (ret)
4823 return ret; 4808 return ret;
4824 4809
4825 /* Now that we linked these devs, make all the upper_dev's 4810 /* Now that we linked these devs, make all the upper_dev's
4826 * all_adj_list.upper visible to every dev's all_adj_list.lower an 4811 * all_adj_list.upper visible to every dev's all_adj_list.lower an
4827 * versa, and don't forget the devices itself. All of these 4812 * versa, and don't forget the devices itself. All of these
4828 * links are non-neighbours. 4813 * links are non-neighbours.
4829 */ 4814 */
4830 list_for_each_entry(i, &dev->all_adj_list.lower, list) { 4815 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
4831 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { 4816 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
4832 pr_debug("Interlinking %s with %s, non-neighbour\n", 4817 pr_debug("Interlinking %s with %s, non-neighbour\n",
4833 i->dev->name, j->dev->name); 4818 i->dev->name, j->dev->name);
4834 ret = __netdev_adjacent_dev_link(i->dev, j->dev); 4819 ret = __netdev_adjacent_dev_link(i->dev, j->dev);
4835 if (ret) 4820 if (ret)
4836 goto rollback_mesh; 4821 goto rollback_mesh;
4837 } 4822 }
4838 } 4823 }
4839 4824
4840 /* add dev to every upper_dev's upper device */ 4825 /* add dev to every upper_dev's upper device */
4841 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { 4826 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
4842 pr_debug("linking %s's upper device %s with %s\n", 4827 pr_debug("linking %s's upper device %s with %s\n",
4843 upper_dev->name, i->dev->name, dev->name); 4828 upper_dev->name, i->dev->name, dev->name);
4844 ret = __netdev_adjacent_dev_link(dev, i->dev); 4829 ret = __netdev_adjacent_dev_link(dev, i->dev);
4845 if (ret) 4830 if (ret)
4846 goto rollback_upper_mesh; 4831 goto rollback_upper_mesh;
4847 } 4832 }
4848 4833
4849 /* add upper_dev to every dev's lower device */ 4834 /* add upper_dev to every dev's lower device */
4850 list_for_each_entry(i, &dev->all_adj_list.lower, list) { 4835 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
4851 pr_debug("linking %s's lower device %s with %s\n", dev->name, 4836 pr_debug("linking %s's lower device %s with %s\n", dev->name,
4852 i->dev->name, upper_dev->name); 4837 i->dev->name, upper_dev->name);
4853 ret = __netdev_adjacent_dev_link(i->dev, upper_dev); 4838 ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
4854 if (ret) 4839 if (ret)
4855 goto rollback_lower_mesh; 4840 goto rollback_lower_mesh;
4856 } 4841 }
4857 4842
4858 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); 4843 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4859 return 0; 4844 return 0;
4860 4845
4861 rollback_lower_mesh: 4846 rollback_lower_mesh:
4862 to_i = i; 4847 to_i = i;
4863 list_for_each_entry(i, &dev->all_adj_list.lower, list) { 4848 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
4864 if (i == to_i) 4849 if (i == to_i)
4865 break; 4850 break;
4866 __netdev_adjacent_dev_unlink(i->dev, upper_dev); 4851 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
4867 } 4852 }
4868 4853
4869 i = NULL; 4854 i = NULL;
4870 4855
4871 rollback_upper_mesh: 4856 rollback_upper_mesh:
4872 to_i = i; 4857 to_i = i;
4873 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { 4858 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
4874 if (i == to_i) 4859 if (i == to_i)
4875 break; 4860 break;
4876 __netdev_adjacent_dev_unlink(dev, i->dev); 4861 __netdev_adjacent_dev_unlink(dev, i->dev);
4877 } 4862 }
4878 4863
4879 i = j = NULL; 4864 i = j = NULL;
4880 4865
4881 rollback_mesh: 4866 rollback_mesh:
4882 to_i = i; 4867 to_i = i;
4883 to_j = j; 4868 to_j = j;
4884 list_for_each_entry(i, &dev->all_adj_list.lower, list) { 4869 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
4885 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { 4870 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
4886 if (i == to_i && j == to_j) 4871 if (i == to_i && j == to_j)
4887 break; 4872 break;
4888 __netdev_adjacent_dev_unlink(i->dev, j->dev); 4873 __netdev_adjacent_dev_unlink(i->dev, j->dev);
4889 } 4874 }
4890 if (i == to_i) 4875 if (i == to_i)
4891 break; 4876 break;
4892 } 4877 }
4893 4878
4894 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); 4879 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
4895 4880
4896 return ret; 4881 return ret;
4897 } 4882 }
4898 4883
4899 /** 4884 /**
4900 * netdev_upper_dev_link - Add a link to the upper device 4885 * netdev_upper_dev_link - Add a link to the upper device
4901 * @dev: device 4886 * @dev: device
4902 * @upper_dev: new upper device 4887 * @upper_dev: new upper device
4903 * 4888 *
4904 * Adds a link to device which is upper to this one. The caller must hold 4889 * Adds a link to device which is upper to this one. The caller must hold
4905 * the RTNL lock. On a failure a negative errno code is returned. 4890 * the RTNL lock. On a failure a negative errno code is returned.
4906 * On success the reference counts are adjusted and the function 4891 * On success the reference counts are adjusted and the function
4907 * returns zero. 4892 * returns zero.
4908 */ 4893 */
4909 int netdev_upper_dev_link(struct net_device *dev, 4894 int netdev_upper_dev_link(struct net_device *dev,
4910 struct net_device *upper_dev) 4895 struct net_device *upper_dev)
4911 { 4896 {
4912 return __netdev_upper_dev_link(dev, upper_dev, false, NULL); 4897 return __netdev_upper_dev_link(dev, upper_dev, false, NULL);
4913 } 4898 }
4914 EXPORT_SYMBOL(netdev_upper_dev_link); 4899 EXPORT_SYMBOL(netdev_upper_dev_link);
4915 4900
4916 /** 4901 /**
4917 * netdev_master_upper_dev_link - Add a master link to the upper device 4902 * netdev_master_upper_dev_link - Add a master link to the upper device
4918 * @dev: device 4903 * @dev: device
4919 * @upper_dev: new upper device 4904 * @upper_dev: new upper device
4920 * 4905 *
4921 * Adds a link to device which is upper to this one. In this case, only 4906 * Adds a link to device which is upper to this one. In this case, only
4922 * one master upper device can be linked, although other non-master devices 4907 * one master upper device can be linked, although other non-master devices
4923 * might be linked as well. The caller must hold the RTNL lock. 4908 * might be linked as well. The caller must hold the RTNL lock.
4924 * On a failure a negative errno code is returned. On success the reference 4909 * On a failure a negative errno code is returned. On success the reference
4925 * counts are adjusted and the function returns zero. 4910 * counts are adjusted and the function returns zero.
4926 */ 4911 */
4927 int netdev_master_upper_dev_link(struct net_device *dev, 4912 int netdev_master_upper_dev_link(struct net_device *dev,
4928 struct net_device *upper_dev) 4913 struct net_device *upper_dev)
4929 { 4914 {
4930 return __netdev_upper_dev_link(dev, upper_dev, true, NULL); 4915 return __netdev_upper_dev_link(dev, upper_dev, true, NULL);
4931 } 4916 }
4932 EXPORT_SYMBOL(netdev_master_upper_dev_link); 4917 EXPORT_SYMBOL(netdev_master_upper_dev_link);
4933 4918
4934 int netdev_master_upper_dev_link_private(struct net_device *dev, 4919 int netdev_master_upper_dev_link_private(struct net_device *dev,
4935 struct net_device *upper_dev, 4920 struct net_device *upper_dev,
4936 void *private) 4921 void *private)
4937 { 4922 {
4938 return __netdev_upper_dev_link(dev, upper_dev, true, private); 4923 return __netdev_upper_dev_link(dev, upper_dev, true, private);
4939 } 4924 }
4940 EXPORT_SYMBOL(netdev_master_upper_dev_link_private); 4925 EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
4941 4926
4942 /** 4927 /**
4943 * netdev_upper_dev_unlink - Removes a link to upper device 4928 * netdev_upper_dev_unlink - Removes a link to upper device
4944 * @dev: device 4929 * @dev: device
4945 * @upper_dev: new upper device 4930 * @upper_dev: new upper device
4946 * 4931 *
4947 * Removes a link to device which is upper to this one. The caller must hold 4932 * Removes a link to device which is upper to this one. The caller must hold
4948 * the RTNL lock. 4933 * the RTNL lock.
4949 */ 4934 */
4950 void netdev_upper_dev_unlink(struct net_device *dev, 4935 void netdev_upper_dev_unlink(struct net_device *dev,
4951 struct net_device *upper_dev) 4936 struct net_device *upper_dev)
4952 { 4937 {
4953 struct netdev_adjacent *i, *j; 4938 struct netdev_adjacent *i, *j;
4954 ASSERT_RTNL(); 4939 ASSERT_RTNL();
4955 4940
4956 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); 4941 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
4957 4942
4958 /* Here is the tricky part. We must remove all dev's lower 4943 /* Here is the tricky part. We must remove all dev's lower
4959 * devices from all upper_dev's upper devices and vice 4944 * devices from all upper_dev's upper devices and vice
4960 * versa, to maintain the graph relationship. 4945 * versa, to maintain the graph relationship.
4961 */ 4946 */
4962 list_for_each_entry(i, &dev->all_adj_list.lower, list) 4947 list_for_each_entry(i, &dev->all_adj_list.lower, list)
4963 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) 4948 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
4964 __netdev_adjacent_dev_unlink(i->dev, j->dev); 4949 __netdev_adjacent_dev_unlink(i->dev, j->dev);
4965 4950
4966 /* remove also the devices itself from lower/upper device 4951 /* remove also the devices itself from lower/upper device
4967 * list 4952 * list
4968 */ 4953 */
4969 list_for_each_entry(i, &dev->all_adj_list.lower, list) 4954 list_for_each_entry(i, &dev->all_adj_list.lower, list)
4970 __netdev_adjacent_dev_unlink(i->dev, upper_dev); 4955 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
4971 4956
4972 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) 4957 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
4973 __netdev_adjacent_dev_unlink(dev, i->dev); 4958 __netdev_adjacent_dev_unlink(dev, i->dev);
4974 4959
4975 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); 4960 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4976 } 4961 }
4977 EXPORT_SYMBOL(netdev_upper_dev_unlink); 4962 EXPORT_SYMBOL(netdev_upper_dev_unlink);
4978 4963
4979 void *netdev_lower_dev_get_private_rcu(struct net_device *dev, 4964 void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
4980 struct net_device *lower_dev) 4965 struct net_device *lower_dev)
4981 { 4966 {
4982 struct netdev_adjacent *lower; 4967 struct netdev_adjacent *lower;
4983 4968
4984 if (!lower_dev) 4969 if (!lower_dev)
4985 return NULL; 4970 return NULL;
4986 lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower); 4971 lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower);
4987 if (!lower) 4972 if (!lower)
4988 return NULL; 4973 return NULL;
4989 4974
4990 return lower->private; 4975 return lower->private;
4991 } 4976 }
4992 EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu); 4977 EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu);
4993 4978
4994 void *netdev_lower_dev_get_private(struct net_device *dev, 4979 void *netdev_lower_dev_get_private(struct net_device *dev,
4995 struct net_device *lower_dev) 4980 struct net_device *lower_dev)
4996 { 4981 {
4997 struct netdev_adjacent *lower; 4982 struct netdev_adjacent *lower;
4998 4983
4999 if (!lower_dev) 4984 if (!lower_dev)
5000 return NULL; 4985 return NULL;
5001 lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower); 4986 lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
5002 if (!lower) 4987 if (!lower)
5003 return NULL; 4988 return NULL;
5004 4989
5005 return lower->private; 4990 return lower->private;
5006 } 4991 }
5007 EXPORT_SYMBOL(netdev_lower_dev_get_private); 4992 EXPORT_SYMBOL(netdev_lower_dev_get_private);
5008 4993
5009 static void dev_change_rx_flags(struct net_device *dev, int flags) 4994 static void dev_change_rx_flags(struct net_device *dev, int flags)
5010 { 4995 {
5011 const struct net_device_ops *ops = dev->netdev_ops; 4996 const struct net_device_ops *ops = dev->netdev_ops;
5012 4997
5013 if (ops->ndo_change_rx_flags) 4998 if (ops->ndo_change_rx_flags)
5014 ops->ndo_change_rx_flags(dev, flags); 4999 ops->ndo_change_rx_flags(dev, flags);
5015 } 5000 }
5016 5001
5017 static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) 5002 static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
5018 { 5003 {
5019 unsigned int old_flags = dev->flags; 5004 unsigned int old_flags = dev->flags;
5020 kuid_t uid; 5005 kuid_t uid;
5021 kgid_t gid; 5006 kgid_t gid;
5022 5007
5023 ASSERT_RTNL(); 5008 ASSERT_RTNL();
5024 5009
5025 dev->flags |= IFF_PROMISC; 5010 dev->flags |= IFF_PROMISC;
5026 dev->promiscuity += inc; 5011 dev->promiscuity += inc;
5027 if (dev->promiscuity == 0) { 5012 if (dev->promiscuity == 0) {
5028 /* 5013 /*
5029 * Avoid overflow. 5014 * Avoid overflow.
5030 * If inc causes overflow, untouch promisc and return error. 5015 * If inc causes overflow, untouch promisc and return error.
5031 */ 5016 */
5032 if (inc < 0) 5017 if (inc < 0)
5033 dev->flags &= ~IFF_PROMISC; 5018 dev->flags &= ~IFF_PROMISC;
5034 else { 5019 else {
5035 dev->promiscuity -= inc; 5020 dev->promiscuity -= inc;
5036 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n", 5021 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
5037 dev->name); 5022 dev->name);
5038 return -EOVERFLOW; 5023 return -EOVERFLOW;
5039 } 5024 }
5040 } 5025 }
5041 if (dev->flags != old_flags) { 5026 if (dev->flags != old_flags) {
5042 pr_info("device %s %s promiscuous mode\n", 5027 pr_info("device %s %s promiscuous mode\n",
5043 dev->name, 5028 dev->name,
5044 dev->flags & IFF_PROMISC ? "entered" : "left"); 5029 dev->flags & IFF_PROMISC ? "entered" : "left");
5045 if (audit_enabled) { 5030 if (audit_enabled) {
5046 current_uid_gid(&uid, &gid); 5031 current_uid_gid(&uid, &gid);
5047 audit_log(current->audit_context, GFP_ATOMIC, 5032 audit_log(current->audit_context, GFP_ATOMIC,
5048 AUDIT_ANOM_PROMISCUOUS, 5033 AUDIT_ANOM_PROMISCUOUS,
5049 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", 5034 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
5050 dev->name, (dev->flags & IFF_PROMISC), 5035 dev->name, (dev->flags & IFF_PROMISC),
5051 (old_flags & IFF_PROMISC), 5036 (old_flags & IFF_PROMISC),
5052 from_kuid(&init_user_ns, audit_get_loginuid(current)), 5037 from_kuid(&init_user_ns, audit_get_loginuid(current)),
5053 from_kuid(&init_user_ns, uid), 5038 from_kuid(&init_user_ns, uid),
5054 from_kgid(&init_user_ns, gid), 5039 from_kgid(&init_user_ns, gid),
5055 audit_get_sessionid(current)); 5040 audit_get_sessionid(current));
5056 } 5041 }
5057 5042
5058 dev_change_rx_flags(dev, IFF_PROMISC); 5043 dev_change_rx_flags(dev, IFF_PROMISC);
5059 } 5044 }
5060 if (notify) 5045 if (notify)
5061 __dev_notify_flags(dev, old_flags, IFF_PROMISC); 5046 __dev_notify_flags(dev, old_flags, IFF_PROMISC);
5062 return 0; 5047 return 0;
5063 } 5048 }
5064 5049
5065 /** 5050 /**
5066 * dev_set_promiscuity - update promiscuity count on a device 5051 * dev_set_promiscuity - update promiscuity count on a device
5067 * @dev: device 5052 * @dev: device
5068 * @inc: modifier 5053 * @inc: modifier
5069 * 5054 *
5070 * Add or remove promiscuity from a device. While the count in the device 5055 * Add or remove promiscuity from a device. While the count in the device
5071 * remains above zero the interface remains promiscuous. Once it hits zero 5056 * remains above zero the interface remains promiscuous. Once it hits zero
5072 * the device reverts back to normal filtering operation. A negative inc 5057 * the device reverts back to normal filtering operation. A negative inc
5073 * value is used to drop promiscuity on the device. 5058 * value is used to drop promiscuity on the device.
5074 * Return 0 if successful or a negative errno code on error. 5059 * Return 0 if successful or a negative errno code on error.
5075 */ 5060 */
5076 int dev_set_promiscuity(struct net_device *dev, int inc) 5061 int dev_set_promiscuity(struct net_device *dev, int inc)
5077 { 5062 {
5078 unsigned int old_flags = dev->flags; 5063 unsigned int old_flags = dev->flags;
5079 int err; 5064 int err;
5080 5065
5081 err = __dev_set_promiscuity(dev, inc, true); 5066 err = __dev_set_promiscuity(dev, inc, true);
5082 if (err < 0) 5067 if (err < 0)
5083 return err; 5068 return err;
5084 if (dev->flags != old_flags) 5069 if (dev->flags != old_flags)
5085 dev_set_rx_mode(dev); 5070 dev_set_rx_mode(dev);
5086 return err; 5071 return err;
5087 } 5072 }
5088 EXPORT_SYMBOL(dev_set_promiscuity); 5073 EXPORT_SYMBOL(dev_set_promiscuity);
5089 5074
5090 static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) 5075 static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
5091 { 5076 {
5092 unsigned int old_flags = dev->flags, old_gflags = dev->gflags; 5077 unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
5093 5078
5094 ASSERT_RTNL(); 5079 ASSERT_RTNL();
5095 5080
5096 dev->flags |= IFF_ALLMULTI; 5081 dev->flags |= IFF_ALLMULTI;
5097 dev->allmulti += inc; 5082 dev->allmulti += inc;
5098 if (dev->allmulti == 0) { 5083 if (dev->allmulti == 0) {
5099 /* 5084 /*
5100 * Avoid overflow. 5085 * Avoid overflow.
5101 * If inc causes overflow, untouch allmulti and return error. 5086 * If inc causes overflow, untouch allmulti and return error.
5102 */ 5087 */
5103 if (inc < 0) 5088 if (inc < 0)
5104 dev->flags &= ~IFF_ALLMULTI; 5089 dev->flags &= ~IFF_ALLMULTI;
5105 else { 5090 else {
5106 dev->allmulti -= inc; 5091 dev->allmulti -= inc;
5107 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n", 5092 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
5108 dev->name); 5093 dev->name);
5109 return -EOVERFLOW; 5094 return -EOVERFLOW;
5110 } 5095 }
5111 } 5096 }
5112 if (dev->flags ^ old_flags) { 5097 if (dev->flags ^ old_flags) {
5113 dev_change_rx_flags(dev, IFF_ALLMULTI); 5098 dev_change_rx_flags(dev, IFF_ALLMULTI);
5114 dev_set_rx_mode(dev); 5099 dev_set_rx_mode(dev);
5115 if (notify) 5100 if (notify)
5116 __dev_notify_flags(dev, old_flags, 5101 __dev_notify_flags(dev, old_flags,
5117 dev->gflags ^ old_gflags); 5102 dev->gflags ^ old_gflags);
5118 } 5103 }
5119 return 0; 5104 return 0;
5120 } 5105 }
5121 5106
5122 /** 5107 /**
5123 * dev_set_allmulti - update allmulti count on a device 5108 * dev_set_allmulti - update allmulti count on a device
5124 * @dev: device 5109 * @dev: device
5125 * @inc: modifier 5110 * @inc: modifier
5126 * 5111 *
5127 * Add or remove reception of all multicast frames to a device. While the 5112 * Add or remove reception of all multicast frames to a device. While the
5128 * count in the device remains above zero the interface remains listening 5113 * count in the device remains above zero the interface remains listening
5129 * to all interfaces. Once it hits zero the device reverts back to normal 5114 * to all interfaces. Once it hits zero the device reverts back to normal
5130 * filtering operation. A negative @inc value is used to drop the counter 5115 * filtering operation. A negative @inc value is used to drop the counter
5131 * when releasing a resource needing all multicasts. 5116 * when releasing a resource needing all multicasts.
5132 * Return 0 if successful or a negative errno code on error. 5117 * Return 0 if successful or a negative errno code on error.
5133 */ 5118 */
5134 5119
5135 int dev_set_allmulti(struct net_device *dev, int inc) 5120 int dev_set_allmulti(struct net_device *dev, int inc)
5136 { 5121 {
5137 return __dev_set_allmulti(dev, inc, true); 5122 return __dev_set_allmulti(dev, inc, true);
5138 } 5123 }
5139 EXPORT_SYMBOL(dev_set_allmulti); 5124 EXPORT_SYMBOL(dev_set_allmulti);
5140 5125
5141 /* 5126 /*
5142 * Upload unicast and multicast address lists to device and 5127 * Upload unicast and multicast address lists to device and
5143 * configure RX filtering. When the device doesn't support unicast 5128 * configure RX filtering. When the device doesn't support unicast
5144 * filtering it is put in promiscuous mode while unicast addresses 5129 * filtering it is put in promiscuous mode while unicast addresses
5145 * are present. 5130 * are present.
5146 */ 5131 */
5147 void __dev_set_rx_mode(struct net_device *dev) 5132 void __dev_set_rx_mode(struct net_device *dev)
5148 { 5133 {
5149 const struct net_device_ops *ops = dev->netdev_ops; 5134 const struct net_device_ops *ops = dev->netdev_ops;
5150 5135
5151 /* dev_open will call this function so the list will stay sane. */ 5136 /* dev_open will call this function so the list will stay sane. */
5152 if (!(dev->flags&IFF_UP)) 5137 if (!(dev->flags&IFF_UP))
5153 return; 5138 return;
5154 5139
5155 if (!netif_device_present(dev)) 5140 if (!netif_device_present(dev))
5156 return; 5141 return;
5157 5142
5158 if (!(dev->priv_flags & IFF_UNICAST_FLT)) { 5143 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
5159 /* Unicast addresses changes may only happen under the rtnl, 5144 /* Unicast addresses changes may only happen under the rtnl,
5160 * therefore calling __dev_set_promiscuity here is safe. 5145 * therefore calling __dev_set_promiscuity here is safe.
5161 */ 5146 */
5162 if (!netdev_uc_empty(dev) && !dev->uc_promisc) { 5147 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
5163 __dev_set_promiscuity(dev, 1, false); 5148 __dev_set_promiscuity(dev, 1, false);
5164 dev->uc_promisc = true; 5149 dev->uc_promisc = true;
5165 } else if (netdev_uc_empty(dev) && dev->uc_promisc) { 5150 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
5166 __dev_set_promiscuity(dev, -1, false); 5151 __dev_set_promiscuity(dev, -1, false);
5167 dev->uc_promisc = false; 5152 dev->uc_promisc = false;
5168 } 5153 }
5169 } 5154 }
5170 5155
5171 if (ops->ndo_set_rx_mode) 5156 if (ops->ndo_set_rx_mode)
5172 ops->ndo_set_rx_mode(dev); 5157 ops->ndo_set_rx_mode(dev);
5173 } 5158 }
5174 5159
5175 void dev_set_rx_mode(struct net_device *dev) 5160 void dev_set_rx_mode(struct net_device *dev)
5176 { 5161 {
5177 netif_addr_lock_bh(dev); 5162 netif_addr_lock_bh(dev);
5178 __dev_set_rx_mode(dev); 5163 __dev_set_rx_mode(dev);
5179 netif_addr_unlock_bh(dev); 5164 netif_addr_unlock_bh(dev);
5180 } 5165 }
5181 5166
5182 /** 5167 /**
5183 * dev_get_flags - get flags reported to userspace 5168 * dev_get_flags - get flags reported to userspace
5184 * @dev: device 5169 * @dev: device
5185 * 5170 *
5186 * Get the combination of flag bits exported through APIs to userspace. 5171 * Get the combination of flag bits exported through APIs to userspace.
5187 */ 5172 */
5188 unsigned int dev_get_flags(const struct net_device *dev) 5173 unsigned int dev_get_flags(const struct net_device *dev)
5189 { 5174 {
5190 unsigned int flags; 5175 unsigned int flags;
5191 5176
5192 flags = (dev->flags & ~(IFF_PROMISC | 5177 flags = (dev->flags & ~(IFF_PROMISC |
5193 IFF_ALLMULTI | 5178 IFF_ALLMULTI |
5194 IFF_RUNNING | 5179 IFF_RUNNING |
5195 IFF_LOWER_UP | 5180 IFF_LOWER_UP |
5196 IFF_DORMANT)) | 5181 IFF_DORMANT)) |
5197 (dev->gflags & (IFF_PROMISC | 5182 (dev->gflags & (IFF_PROMISC |
5198 IFF_ALLMULTI)); 5183 IFF_ALLMULTI));
5199 5184
5200 if (netif_running(dev)) { 5185 if (netif_running(dev)) {
5201 if (netif_oper_up(dev)) 5186 if (netif_oper_up(dev))
5202 flags |= IFF_RUNNING; 5187 flags |= IFF_RUNNING;
5203 if (netif_carrier_ok(dev)) 5188 if (netif_carrier_ok(dev))
5204 flags |= IFF_LOWER_UP; 5189 flags |= IFF_LOWER_UP;
5205 if (netif_dormant(dev)) 5190 if (netif_dormant(dev))
5206 flags |= IFF_DORMANT; 5191 flags |= IFF_DORMANT;
5207 } 5192 }
5208 5193
5209 return flags; 5194 return flags;
5210 } 5195 }
5211 EXPORT_SYMBOL(dev_get_flags); 5196 EXPORT_SYMBOL(dev_get_flags);
5212 5197
5213 int __dev_change_flags(struct net_device *dev, unsigned int flags) 5198 int __dev_change_flags(struct net_device *dev, unsigned int flags)
5214 { 5199 {
5215 unsigned int old_flags = dev->flags; 5200 unsigned int old_flags = dev->flags;
5216 int ret; 5201 int ret;
5217 5202
5218 ASSERT_RTNL(); 5203 ASSERT_RTNL();
5219 5204
5220 /* 5205 /*
5221 * Set the flags on our device. 5206 * Set the flags on our device.
5222 */ 5207 */
5223 5208
5224 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | 5209 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
5225 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | 5210 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
5226 IFF_AUTOMEDIA)) | 5211 IFF_AUTOMEDIA)) |
5227 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | 5212 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
5228 IFF_ALLMULTI)); 5213 IFF_ALLMULTI));
5229 5214
5230 /* 5215 /*
5231 * Load in the correct multicast list now the flags have changed. 5216 * Load in the correct multicast list now the flags have changed.
5232 */ 5217 */
5233 5218
5234 if ((old_flags ^ flags) & IFF_MULTICAST) 5219 if ((old_flags ^ flags) & IFF_MULTICAST)
5235 dev_change_rx_flags(dev, IFF_MULTICAST); 5220 dev_change_rx_flags(dev, IFF_MULTICAST);
5236 5221
5237 dev_set_rx_mode(dev); 5222 dev_set_rx_mode(dev);
5238 5223
5239 /* 5224 /*
5240 * Have we downed the interface. We handle IFF_UP ourselves 5225 * Have we downed the interface. We handle IFF_UP ourselves
5241 * according to user attempts to set it, rather than blindly 5226 * according to user attempts to set it, rather than blindly
5242 * setting it. 5227 * setting it.
5243 */ 5228 */
5244 5229
5245 ret = 0; 5230 ret = 0;
5246 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ 5231 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
5247 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev); 5232 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
5248 5233
5249 if (!ret) 5234 if (!ret)
5250 dev_set_rx_mode(dev); 5235 dev_set_rx_mode(dev);
5251 } 5236 }
5252 5237
5253 if ((flags ^ dev->gflags) & IFF_PROMISC) { 5238 if ((flags ^ dev->gflags) & IFF_PROMISC) {
5254 int inc = (flags & IFF_PROMISC) ? 1 : -1; 5239 int inc = (flags & IFF_PROMISC) ? 1 : -1;
5255 unsigned int old_flags = dev->flags; 5240 unsigned int old_flags = dev->flags;
5256 5241
5257 dev->gflags ^= IFF_PROMISC; 5242 dev->gflags ^= IFF_PROMISC;
5258 5243
5259 if (__dev_set_promiscuity(dev, inc, false) >= 0) 5244 if (__dev_set_promiscuity(dev, inc, false) >= 0)
5260 if (dev->flags != old_flags) 5245 if (dev->flags != old_flags)
5261 dev_set_rx_mode(dev); 5246 dev_set_rx_mode(dev);
5262 } 5247 }
5263 5248
5264 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI 5249 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
5265 is important. Some (broken) drivers set IFF_PROMISC, when 5250 is important. Some (broken) drivers set IFF_PROMISC, when
5266 IFF_ALLMULTI is requested not asking us and not reporting. 5251 IFF_ALLMULTI is requested not asking us and not reporting.
5267 */ 5252 */
5268 if ((flags ^ dev->gflags) & IFF_ALLMULTI) { 5253 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
5269 int inc = (flags & IFF_ALLMULTI) ? 1 : -1; 5254 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
5270 5255
5271 dev->gflags ^= IFF_ALLMULTI; 5256 dev->gflags ^= IFF_ALLMULTI;
5272 __dev_set_allmulti(dev, inc, false); 5257 __dev_set_allmulti(dev, inc, false);
5273 } 5258 }
5274 5259
5275 return ret; 5260 return ret;
5276 } 5261 }
5277 5262
5278 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, 5263 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
5279 unsigned int gchanges) 5264 unsigned int gchanges)
5280 { 5265 {
5281 unsigned int changes = dev->flags ^ old_flags; 5266 unsigned int changes = dev->flags ^ old_flags;
5282 5267
5283 if (gchanges) 5268 if (gchanges)
5284 rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC); 5269 rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
5285 5270
5286 if (changes & IFF_UP) { 5271 if (changes & IFF_UP) {
5287 if (dev->flags & IFF_UP) 5272 if (dev->flags & IFF_UP)
5288 call_netdevice_notifiers(NETDEV_UP, dev); 5273 call_netdevice_notifiers(NETDEV_UP, dev);
5289 else 5274 else
5290 call_netdevice_notifiers(NETDEV_DOWN, dev); 5275 call_netdevice_notifiers(NETDEV_DOWN, dev);
5291 } 5276 }
5292 5277
5293 if (dev->flags & IFF_UP && 5278 if (dev->flags & IFF_UP &&
5294 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) { 5279 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
5295 struct netdev_notifier_change_info change_info; 5280 struct netdev_notifier_change_info change_info;
5296 5281
5297 change_info.flags_changed = changes; 5282 change_info.flags_changed = changes;
5298 call_netdevice_notifiers_info(NETDEV_CHANGE, dev, 5283 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
5299 &change_info.info); 5284 &change_info.info);
5300 } 5285 }
5301 } 5286 }
5302 5287
5303 /** 5288 /**
5304 * dev_change_flags - change device settings 5289 * dev_change_flags - change device settings
5305 * @dev: device 5290 * @dev: device
5306 * @flags: device state flags 5291 * @flags: device state flags
5307 * 5292 *
5308 * Change settings on device based state flags. The flags are 5293 * Change settings on device based state flags. The flags are
5309 * in the userspace exported format. 5294 * in the userspace exported format.
5310 */ 5295 */
5311 int dev_change_flags(struct net_device *dev, unsigned int flags) 5296 int dev_change_flags(struct net_device *dev, unsigned int flags)
5312 { 5297 {
5313 int ret; 5298 int ret;
5314 unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags; 5299 unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
5315 5300
5316 ret = __dev_change_flags(dev, flags); 5301 ret = __dev_change_flags(dev, flags);
5317 if (ret < 0) 5302 if (ret < 0)
5318 return ret; 5303 return ret;
5319 5304
5320 changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags); 5305 changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
5321 __dev_notify_flags(dev, old_flags, changes); 5306 __dev_notify_flags(dev, old_flags, changes);
5322 return ret; 5307 return ret;
5323 } 5308 }
5324 EXPORT_SYMBOL(dev_change_flags); 5309 EXPORT_SYMBOL(dev_change_flags);
5325 5310
5326 /** 5311 /**
5327 * dev_set_mtu - Change maximum transfer unit 5312 * dev_set_mtu - Change maximum transfer unit
5328 * @dev: device 5313 * @dev: device
5329 * @new_mtu: new transfer unit 5314 * @new_mtu: new transfer unit
5330 * 5315 *
5331 * Change the maximum transfer size of the network device. 5316 * Change the maximum transfer size of the network device.
5332 */ 5317 */
5333 int dev_set_mtu(struct net_device *dev, int new_mtu) 5318 int dev_set_mtu(struct net_device *dev, int new_mtu)
5334 { 5319 {
5335 const struct net_device_ops *ops = dev->netdev_ops; 5320 const struct net_device_ops *ops = dev->netdev_ops;
5336 int err; 5321 int err;
5337 5322
5338 if (new_mtu == dev->mtu) 5323 if (new_mtu == dev->mtu)
5339 return 0; 5324 return 0;
5340 5325
5341 /* MTU must be positive. */ 5326 /* MTU must be positive. */
5342 if (new_mtu < 0) 5327 if (new_mtu < 0)
5343 return -EINVAL; 5328 return -EINVAL;
5344 5329
5345 if (!netif_device_present(dev)) 5330 if (!netif_device_present(dev))
5346 return -ENODEV; 5331 return -ENODEV;
5347 5332
5348 err = 0; 5333 err = 0;
5349 if (ops->ndo_change_mtu) 5334 if (ops->ndo_change_mtu)
5350 err = ops->ndo_change_mtu(dev, new_mtu); 5335 err = ops->ndo_change_mtu(dev, new_mtu);
5351 else 5336 else
5352 dev->mtu = new_mtu; 5337 dev->mtu = new_mtu;
5353 5338
5354 if (!err) 5339 if (!err)
5355 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); 5340 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5356 return err; 5341 return err;
5357 } 5342 }
5358 EXPORT_SYMBOL(dev_set_mtu); 5343 EXPORT_SYMBOL(dev_set_mtu);
5359 5344
5360 /** 5345 /**
5361 * dev_set_group - Change group this device belongs to 5346 * dev_set_group - Change group this device belongs to
5362 * @dev: device 5347 * @dev: device
5363 * @new_group: group this device should belong to 5348 * @new_group: group this device should belong to
5364 */ 5349 */
5365 void dev_set_group(struct net_device *dev, int new_group) 5350 void dev_set_group(struct net_device *dev, int new_group)
5366 { 5351 {
5367 dev->group = new_group; 5352 dev->group = new_group;
5368 } 5353 }
5369 EXPORT_SYMBOL(dev_set_group); 5354 EXPORT_SYMBOL(dev_set_group);
5370 5355
5371 /** 5356 /**
5372 * dev_set_mac_address - Change Media Access Control Address 5357 * dev_set_mac_address - Change Media Access Control Address
5373 * @dev: device 5358 * @dev: device
5374 * @sa: new address 5359 * @sa: new address
5375 * 5360 *
5376 * Change the hardware (MAC) address of the device 5361 * Change the hardware (MAC) address of the device
5377 */ 5362 */
5378 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) 5363 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
5379 { 5364 {
5380 const struct net_device_ops *ops = dev->netdev_ops; 5365 const struct net_device_ops *ops = dev->netdev_ops;
5381 int err; 5366 int err;
5382 5367
5383 if (!ops->ndo_set_mac_address) 5368 if (!ops->ndo_set_mac_address)
5384 return -EOPNOTSUPP; 5369 return -EOPNOTSUPP;
5385 if (sa->sa_family != dev->type) 5370 if (sa->sa_family != dev->type)
5386 return -EINVAL; 5371 return -EINVAL;
5387 if (!netif_device_present(dev)) 5372 if (!netif_device_present(dev))
5388 return -ENODEV; 5373 return -ENODEV;
5389 err = ops->ndo_set_mac_address(dev, sa); 5374 err = ops->ndo_set_mac_address(dev, sa);
5390 if (err) 5375 if (err)
5391 return err; 5376 return err;
5392 dev->addr_assign_type = NET_ADDR_SET; 5377 dev->addr_assign_type = NET_ADDR_SET;
5393 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); 5378 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
5394 add_device_randomness(dev->dev_addr, dev->addr_len); 5379 add_device_randomness(dev->dev_addr, dev->addr_len);
5395 return 0; 5380 return 0;
5396 } 5381 }
5397 EXPORT_SYMBOL(dev_set_mac_address); 5382 EXPORT_SYMBOL(dev_set_mac_address);
5398 5383
5399 /** 5384 /**
5400 * dev_change_carrier - Change device carrier 5385 * dev_change_carrier - Change device carrier
5401 * @dev: device 5386 * @dev: device
5402 * @new_carrier: new value 5387 * @new_carrier: new value
5403 * 5388 *
5404 * Change device carrier 5389 * Change device carrier
5405 */ 5390 */
5406 int dev_change_carrier(struct net_device *dev, bool new_carrier) 5391 int dev_change_carrier(struct net_device *dev, bool new_carrier)
5407 { 5392 {
5408 const struct net_device_ops *ops = dev->netdev_ops; 5393 const struct net_device_ops *ops = dev->netdev_ops;
5409 5394
5410 if (!ops->ndo_change_carrier) 5395 if (!ops->ndo_change_carrier)
5411 return -EOPNOTSUPP; 5396 return -EOPNOTSUPP;
5412 if (!netif_device_present(dev)) 5397 if (!netif_device_present(dev))
5413 return -ENODEV; 5398 return -ENODEV;
5414 return ops->ndo_change_carrier(dev, new_carrier); 5399 return ops->ndo_change_carrier(dev, new_carrier);
5415 } 5400 }
5416 EXPORT_SYMBOL(dev_change_carrier); 5401 EXPORT_SYMBOL(dev_change_carrier);
5417 5402
5418 /** 5403 /**
5419 * dev_get_phys_port_id - Get device physical port ID 5404 * dev_get_phys_port_id - Get device physical port ID
5420 * @dev: device 5405 * @dev: device
5421 * @ppid: port ID 5406 * @ppid: port ID
5422 * 5407 *
5423 * Get device physical port ID 5408 * Get device physical port ID
5424 */ 5409 */
5425 int dev_get_phys_port_id(struct net_device *dev, 5410 int dev_get_phys_port_id(struct net_device *dev,
5426 struct netdev_phys_port_id *ppid) 5411 struct netdev_phys_port_id *ppid)
5427 { 5412 {
5428 const struct net_device_ops *ops = dev->netdev_ops; 5413 const struct net_device_ops *ops = dev->netdev_ops;
5429 5414
5430 if (!ops->ndo_get_phys_port_id) 5415 if (!ops->ndo_get_phys_port_id)
5431 return -EOPNOTSUPP; 5416 return -EOPNOTSUPP;
5432 return ops->ndo_get_phys_port_id(dev, ppid); 5417 return ops->ndo_get_phys_port_id(dev, ppid);
5433 } 5418 }
5434 EXPORT_SYMBOL(dev_get_phys_port_id); 5419 EXPORT_SYMBOL(dev_get_phys_port_id);
5435 5420
5436 /** 5421 /**
5437 * dev_new_index - allocate an ifindex 5422 * dev_new_index - allocate an ifindex
5438 * @net: the applicable net namespace 5423 * @net: the applicable net namespace
5439 * 5424 *
5440 * Returns a suitable unique value for a new device interface 5425 * Returns a suitable unique value for a new device interface
5441 * number. The caller must hold the rtnl semaphore or the 5426 * number. The caller must hold the rtnl semaphore or the
5442 * dev_base_lock to be sure it remains unique. 5427 * dev_base_lock to be sure it remains unique.
5443 */ 5428 */
5444 static int dev_new_index(struct net *net) 5429 static int dev_new_index(struct net *net)
5445 { 5430 {
5446 int ifindex = net->ifindex; 5431 int ifindex = net->ifindex;
5447 for (;;) { 5432 for (;;) {
5448 if (++ifindex <= 0) 5433 if (++ifindex <= 0)
5449 ifindex = 1; 5434 ifindex = 1;
5450 if (!__dev_get_by_index(net, ifindex)) 5435 if (!__dev_get_by_index(net, ifindex))
5451 return net->ifindex = ifindex; 5436 return net->ifindex = ifindex;
5452 } 5437 }
5453 } 5438 }
5454 5439
5455 /* Delayed registration/unregisteration */ 5440 /* Delayed registration/unregisteration */
5456 static LIST_HEAD(net_todo_list); 5441 static LIST_HEAD(net_todo_list);
5457 static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); 5442 static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
5458 5443
5459 static void net_set_todo(struct net_device *dev) 5444 static void net_set_todo(struct net_device *dev)
5460 { 5445 {
5461 list_add_tail(&dev->todo_list, &net_todo_list); 5446 list_add_tail(&dev->todo_list, &net_todo_list);
5462 dev_net(dev)->dev_unreg_count++; 5447 dev_net(dev)->dev_unreg_count++;
5463 } 5448 }
5464 5449
5465 static void rollback_registered_many(struct list_head *head) 5450 static void rollback_registered_many(struct list_head *head)
5466 { 5451 {
5467 struct net_device *dev, *tmp; 5452 struct net_device *dev, *tmp;
5468 LIST_HEAD(close_head); 5453 LIST_HEAD(close_head);
5469 5454
5470 BUG_ON(dev_boot_phase); 5455 BUG_ON(dev_boot_phase);
5471 ASSERT_RTNL(); 5456 ASSERT_RTNL();
5472 5457
5473 list_for_each_entry_safe(dev, tmp, head, unreg_list) { 5458 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
5474 /* Some devices call without registering 5459 /* Some devices call without registering
5475 * for initialization unwind. Remove those 5460 * for initialization unwind. Remove those
5476 * devices and proceed with the remaining. 5461 * devices and proceed with the remaining.
5477 */ 5462 */
5478 if (dev->reg_state == NETREG_UNINITIALIZED) { 5463 if (dev->reg_state == NETREG_UNINITIALIZED) {
5479 pr_debug("unregister_netdevice: device %s/%p never was registered\n", 5464 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
5480 dev->name, dev); 5465 dev->name, dev);
5481 5466
5482 WARN_ON(1); 5467 WARN_ON(1);
5483 list_del(&dev->unreg_list); 5468 list_del(&dev->unreg_list);
5484 continue; 5469 continue;
5485 } 5470 }
5486 dev->dismantle = true; 5471 dev->dismantle = true;
5487 BUG_ON(dev->reg_state != NETREG_REGISTERED); 5472 BUG_ON(dev->reg_state != NETREG_REGISTERED);
5488 } 5473 }
5489 5474
5490 /* If device is running, close it first. */ 5475 /* If device is running, close it first. */
5491 list_for_each_entry(dev, head, unreg_list) 5476 list_for_each_entry(dev, head, unreg_list)
5492 list_add_tail(&dev->close_list, &close_head); 5477 list_add_tail(&dev->close_list, &close_head);
5493 dev_close_many(&close_head); 5478 dev_close_many(&close_head);
5494 5479
5495 list_for_each_entry(dev, head, unreg_list) { 5480 list_for_each_entry(dev, head, unreg_list) {
5496 /* And unlink it from device chain. */ 5481 /* And unlink it from device chain. */
5497 unlist_netdevice(dev); 5482 unlist_netdevice(dev);
5498 5483
5499 dev->reg_state = NETREG_UNREGISTERING; 5484 dev->reg_state = NETREG_UNREGISTERING;
5500 } 5485 }
5501 5486
5502 synchronize_net(); 5487 synchronize_net();
5503 5488
5504 list_for_each_entry(dev, head, unreg_list) { 5489 list_for_each_entry(dev, head, unreg_list) {
5505 /* Shutdown queueing discipline. */ 5490 /* Shutdown queueing discipline. */
5506 dev_shutdown(dev); 5491 dev_shutdown(dev);
5507 5492
5508 5493
5509 /* Notify protocols, that we are about to destroy 5494 /* Notify protocols, that we are about to destroy
5510 this device. They should clean all the things. 5495 this device. They should clean all the things.
5511 */ 5496 */
5512 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5497 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5513 5498
5514 if (!dev->rtnl_link_ops || 5499 if (!dev->rtnl_link_ops ||
5515 dev->rtnl_link_state == RTNL_LINK_INITIALIZED) 5500 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5516 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); 5501 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
5517 5502
5518 /* 5503 /*
5519 * Flush the unicast and multicast chains 5504 * Flush the unicast and multicast chains
5520 */ 5505 */
5521 dev_uc_flush(dev); 5506 dev_uc_flush(dev);
5522 dev_mc_flush(dev); 5507 dev_mc_flush(dev);
5523 5508
5524 if (dev->netdev_ops->ndo_uninit) 5509 if (dev->netdev_ops->ndo_uninit)
5525 dev->netdev_ops->ndo_uninit(dev); 5510 dev->netdev_ops->ndo_uninit(dev);
5526 5511
5527 /* Notifier chain MUST detach us all upper devices. */ 5512 /* Notifier chain MUST detach us all upper devices. */
5528 WARN_ON(netdev_has_any_upper_dev(dev)); 5513 WARN_ON(netdev_has_any_upper_dev(dev));
5529 5514
5530 /* Remove entries from kobject tree */ 5515 /* Remove entries from kobject tree */
5531 netdev_unregister_kobject(dev); 5516 netdev_unregister_kobject(dev);
5532 #ifdef CONFIG_XPS 5517 #ifdef CONFIG_XPS
5533 /* Remove XPS queueing entries */ 5518 /* Remove XPS queueing entries */
5534 netif_reset_xps_queues_gt(dev, 0); 5519 netif_reset_xps_queues_gt(dev, 0);
5535 #endif 5520 #endif
5536 } 5521 }
5537 5522
5538 synchronize_net(); 5523 synchronize_net();
5539 5524
5540 list_for_each_entry(dev, head, unreg_list) 5525 list_for_each_entry(dev, head, unreg_list)
5541 dev_put(dev); 5526 dev_put(dev);
5542 } 5527 }
5543 5528
5544 static void rollback_registered(struct net_device *dev) 5529 static void rollback_registered(struct net_device *dev)
5545 { 5530 {
5546 LIST_HEAD(single); 5531 LIST_HEAD(single);
5547 5532
5548 list_add(&dev->unreg_list, &single); 5533 list_add(&dev->unreg_list, &single);
5549 rollback_registered_many(&single); 5534 rollback_registered_many(&single);
5550 list_del(&single); 5535 list_del(&single);
5551 } 5536 }
5552 5537
5553 static netdev_features_t netdev_fix_features(struct net_device *dev, 5538 static netdev_features_t netdev_fix_features(struct net_device *dev,
5554 netdev_features_t features) 5539 netdev_features_t features)
5555 { 5540 {
5556 /* Fix illegal checksum combinations */ 5541 /* Fix illegal checksum combinations */
5557 if ((features & NETIF_F_HW_CSUM) && 5542 if ((features & NETIF_F_HW_CSUM) &&
5558 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5543 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5559 netdev_warn(dev, "mixed HW and IP checksum settings.\n"); 5544 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
5560 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); 5545 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5561 } 5546 }
5562 5547
5563 /* TSO requires that SG is present as well. */ 5548 /* TSO requires that SG is present as well. */
5564 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { 5549 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
5565 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); 5550 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
5566 features &= ~NETIF_F_ALL_TSO; 5551 features &= ~NETIF_F_ALL_TSO;
5567 } 5552 }
5568 5553
5569 if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) && 5554 if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
5570 !(features & NETIF_F_IP_CSUM)) { 5555 !(features & NETIF_F_IP_CSUM)) {
5571 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n"); 5556 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
5572 features &= ~NETIF_F_TSO; 5557 features &= ~NETIF_F_TSO;
5573 features &= ~NETIF_F_TSO_ECN; 5558 features &= ~NETIF_F_TSO_ECN;
5574 } 5559 }
5575 5560
5576 if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) && 5561 if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
5577 !(features & NETIF_F_IPV6_CSUM)) { 5562 !(features & NETIF_F_IPV6_CSUM)) {
5578 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n"); 5563 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
5579 features &= ~NETIF_F_TSO6; 5564 features &= ~NETIF_F_TSO6;
5580 } 5565 }
5581 5566
5582 /* TSO ECN requires that TSO is present as well. */ 5567 /* TSO ECN requires that TSO is present as well. */
5583 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) 5568 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
5584 features &= ~NETIF_F_TSO_ECN; 5569 features &= ~NETIF_F_TSO_ECN;
5585 5570
5586 /* Software GSO depends on SG. */ 5571 /* Software GSO depends on SG. */
5587 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { 5572 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5588 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); 5573 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
5589 features &= ~NETIF_F_GSO; 5574 features &= ~NETIF_F_GSO;
5590 } 5575 }
5591 5576
5592 /* UFO needs SG and checksumming */ 5577 /* UFO needs SG and checksumming */
5593 if (features & NETIF_F_UFO) { 5578 if (features & NETIF_F_UFO) {
5594 /* maybe split UFO into V4 and V6? */ 5579 /* maybe split UFO into V4 and V6? */
5595 if (!((features & NETIF_F_GEN_CSUM) || 5580 if (!((features & NETIF_F_GEN_CSUM) ||
5596 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) 5581 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5597 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5582 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5598 netdev_dbg(dev, 5583 netdev_dbg(dev,
5599 "Dropping NETIF_F_UFO since no checksum offload features.\n"); 5584 "Dropping NETIF_F_UFO since no checksum offload features.\n");
5600 features &= ~NETIF_F_UFO; 5585 features &= ~NETIF_F_UFO;
5601 } 5586 }
5602 5587
5603 if (!(features & NETIF_F_SG)) { 5588 if (!(features & NETIF_F_SG)) {
5604 netdev_dbg(dev, 5589 netdev_dbg(dev,
5605 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); 5590 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5606 features &= ~NETIF_F_UFO; 5591 features &= ~NETIF_F_UFO;
5607 } 5592 }
5608 } 5593 }
5609 5594
5610 return features; 5595 return features;
5611 } 5596 }
5612 5597
5613 int __netdev_update_features(struct net_device *dev) 5598 int __netdev_update_features(struct net_device *dev)
5614 { 5599 {
5615 netdev_features_t features; 5600 netdev_features_t features;
5616 int err = 0; 5601 int err = 0;
5617 5602
5618 ASSERT_RTNL(); 5603 ASSERT_RTNL();
5619 5604
5620 features = netdev_get_wanted_features(dev); 5605 features = netdev_get_wanted_features(dev);
5621 5606
5622 if (dev->netdev_ops->ndo_fix_features) 5607 if (dev->netdev_ops->ndo_fix_features)
5623 features = dev->netdev_ops->ndo_fix_features(dev, features); 5608 features = dev->netdev_ops->ndo_fix_features(dev, features);
5624 5609
5625 /* driver might be less strict about feature dependencies */ 5610 /* driver might be less strict about feature dependencies */
5626 features = netdev_fix_features(dev, features); 5611 features = netdev_fix_features(dev, features);
5627 5612
5628 if (dev->features == features) 5613 if (dev->features == features)
5629 return 0; 5614 return 0;
5630 5615
5631 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n", 5616 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
5632 &dev->features, &features); 5617 &dev->features, &features);
5633 5618
5634 if (dev->netdev_ops->ndo_set_features) 5619 if (dev->netdev_ops->ndo_set_features)
5635 err = dev->netdev_ops->ndo_set_features(dev, features); 5620 err = dev->netdev_ops->ndo_set_features(dev, features);
5636 5621
5637 if (unlikely(err < 0)) { 5622 if (unlikely(err < 0)) {
5638 netdev_err(dev, 5623 netdev_err(dev,
5639 "set_features() failed (%d); wanted %pNF, left %pNF\n", 5624 "set_features() failed (%d); wanted %pNF, left %pNF\n",
5640 err, &features, &dev->features); 5625 err, &features, &dev->features);
5641 return -1; 5626 return -1;
5642 } 5627 }
5643 5628
5644 if (!err) 5629 if (!err)
5645 dev->features = features; 5630 dev->features = features;
5646 5631
5647 return 1; 5632 return 1;
5648 } 5633 }
5649 5634
5650 /** 5635 /**
5651 * netdev_update_features - recalculate device features 5636 * netdev_update_features - recalculate device features
5652 * @dev: the device to check 5637 * @dev: the device to check
5653 * 5638 *
5654 * Recalculate dev->features set and send notifications if it 5639 * Recalculate dev->features set and send notifications if it
5655 * has changed. Should be called after driver or hardware dependent 5640 * has changed. Should be called after driver or hardware dependent
5656 * conditions might have changed that influence the features. 5641 * conditions might have changed that influence the features.
5657 */ 5642 */
5658 void netdev_update_features(struct net_device *dev) 5643 void netdev_update_features(struct net_device *dev)
5659 { 5644 {
5660 if (__netdev_update_features(dev)) 5645 if (__netdev_update_features(dev))
5661 netdev_features_change(dev); 5646 netdev_features_change(dev);
5662 } 5647 }
5663 EXPORT_SYMBOL(netdev_update_features); 5648 EXPORT_SYMBOL(netdev_update_features);
5664 5649
5665 /** 5650 /**
5666 * netdev_change_features - recalculate device features 5651 * netdev_change_features - recalculate device features
5667 * @dev: the device to check 5652 * @dev: the device to check
5668 * 5653 *
5669 * Recalculate dev->features set and send notifications even 5654 * Recalculate dev->features set and send notifications even
5670 * if they have not changed. Should be called instead of 5655 * if they have not changed. Should be called instead of
5671 * netdev_update_features() if also dev->vlan_features might 5656 * netdev_update_features() if also dev->vlan_features might
5672 * have changed to allow the changes to be propagated to stacked 5657 * have changed to allow the changes to be propagated to stacked
5673 * VLAN devices. 5658 * VLAN devices.
5674 */ 5659 */
5675 void netdev_change_features(struct net_device *dev) 5660 void netdev_change_features(struct net_device *dev)
5676 { 5661 {
5677 __netdev_update_features(dev); 5662 __netdev_update_features(dev);
5678 netdev_features_change(dev); 5663 netdev_features_change(dev);
5679 } 5664 }
5680 EXPORT_SYMBOL(netdev_change_features); 5665 EXPORT_SYMBOL(netdev_change_features);
5681 5666
5682 /** 5667 /**
5683 * netif_stacked_transfer_operstate - transfer operstate 5668 * netif_stacked_transfer_operstate - transfer operstate
5684 * @rootdev: the root or lower level device to transfer state from 5669 * @rootdev: the root or lower level device to transfer state from
5685 * @dev: the device to transfer operstate to 5670 * @dev: the device to transfer operstate to
5686 * 5671 *
5687 * Transfer operational state from root to device. This is normally 5672 * Transfer operational state from root to device. This is normally
5688 * called when a stacking relationship exists between the root 5673 * called when a stacking relationship exists between the root
5689 * device and the device(a leaf device). 5674 * device and the device(a leaf device).
5690 */ 5675 */
5691 void netif_stacked_transfer_operstate(const struct net_device *rootdev, 5676 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5692 struct net_device *dev) 5677 struct net_device *dev)
5693 { 5678 {
5694 if (rootdev->operstate == IF_OPER_DORMANT) 5679 if (rootdev->operstate == IF_OPER_DORMANT)
5695 netif_dormant_on(dev); 5680 netif_dormant_on(dev);
5696 else 5681 else
5697 netif_dormant_off(dev); 5682 netif_dormant_off(dev);
5698 5683
5699 if (netif_carrier_ok(rootdev)) { 5684 if (netif_carrier_ok(rootdev)) {
5700 if (!netif_carrier_ok(dev)) 5685 if (!netif_carrier_ok(dev))
5701 netif_carrier_on(dev); 5686 netif_carrier_on(dev);
5702 } else { 5687 } else {
5703 if (netif_carrier_ok(dev)) 5688 if (netif_carrier_ok(dev))
5704 netif_carrier_off(dev); 5689 netif_carrier_off(dev);
5705 } 5690 }
5706 } 5691 }
5707 EXPORT_SYMBOL(netif_stacked_transfer_operstate); 5692 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5708 5693
5709 #ifdef CONFIG_RPS 5694 #ifdef CONFIG_RPS
5710 static int netif_alloc_rx_queues(struct net_device *dev) 5695 static int netif_alloc_rx_queues(struct net_device *dev)
5711 { 5696 {
5712 unsigned int i, count = dev->num_rx_queues; 5697 unsigned int i, count = dev->num_rx_queues;
5713 struct netdev_rx_queue *rx; 5698 struct netdev_rx_queue *rx;
5714 5699
5715 BUG_ON(count < 1); 5700 BUG_ON(count < 1);
5716 5701
5717 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); 5702 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5718 if (!rx) 5703 if (!rx)
5719 return -ENOMEM; 5704 return -ENOMEM;
5720 5705
5721 dev->_rx = rx; 5706 dev->_rx = rx;
5722 5707
5723 for (i = 0; i < count; i++) 5708 for (i = 0; i < count; i++)
5724 rx[i].dev = dev; 5709 rx[i].dev = dev;
5725 return 0; 5710 return 0;
5726 } 5711 }
5727 #endif 5712 #endif
5728 5713
5729 static void netdev_init_one_queue(struct net_device *dev, 5714 static void netdev_init_one_queue(struct net_device *dev,
5730 struct netdev_queue *queue, void *_unused) 5715 struct netdev_queue *queue, void *_unused)
5731 { 5716 {
5732 /* Initialize queue lock */ 5717 /* Initialize queue lock */
5733 spin_lock_init(&queue->_xmit_lock); 5718 spin_lock_init(&queue->_xmit_lock);
5734 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); 5719 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5735 queue->xmit_lock_owner = -1; 5720 queue->xmit_lock_owner = -1;
5736 netdev_queue_numa_node_write(queue, NUMA_NO_NODE); 5721 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
5737 queue->dev = dev; 5722 queue->dev = dev;
5738 #ifdef CONFIG_BQL 5723 #ifdef CONFIG_BQL
5739 dql_init(&queue->dql, HZ); 5724 dql_init(&queue->dql, HZ);
5740 #endif 5725 #endif
5741 } 5726 }
5742 5727
5743 static void netif_free_tx_queues(struct net_device *dev) 5728 static void netif_free_tx_queues(struct net_device *dev)
5744 { 5729 {
5745 if (is_vmalloc_addr(dev->_tx)) 5730 if (is_vmalloc_addr(dev->_tx))
5746 vfree(dev->_tx); 5731 vfree(dev->_tx);
5747 else 5732 else
5748 kfree(dev->_tx); 5733 kfree(dev->_tx);
5749 } 5734 }
5750 5735
5751 static int netif_alloc_netdev_queues(struct net_device *dev) 5736 static int netif_alloc_netdev_queues(struct net_device *dev)
5752 { 5737 {
5753 unsigned int count = dev->num_tx_queues; 5738 unsigned int count = dev->num_tx_queues;
5754 struct netdev_queue *tx; 5739 struct netdev_queue *tx;
5755 size_t sz = count * sizeof(*tx); 5740 size_t sz = count * sizeof(*tx);
5756 5741
5757 BUG_ON(count < 1 || count > 0xffff); 5742 BUG_ON(count < 1 || count > 0xffff);
5758 5743
5759 tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); 5744 tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
5760 if (!tx) { 5745 if (!tx) {
5761 tx = vzalloc(sz); 5746 tx = vzalloc(sz);
5762 if (!tx) 5747 if (!tx)
5763 return -ENOMEM; 5748 return -ENOMEM;
5764 } 5749 }
5765 dev->_tx = tx; 5750 dev->_tx = tx;
5766 5751
5767 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); 5752 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5768 spin_lock_init(&dev->tx_global_lock); 5753 spin_lock_init(&dev->tx_global_lock);
5769 5754
5770 return 0; 5755 return 0;
5771 } 5756 }
5772 5757
5773 /** 5758 /**
5774 * register_netdevice - register a network device 5759 * register_netdevice - register a network device
5775 * @dev: device to register 5760 * @dev: device to register
5776 * 5761 *
5777 * Take a completed network device structure and add it to the kernel 5762 * Take a completed network device structure and add it to the kernel
5778 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier 5763 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5779 * chain. 0 is returned on success. A negative errno code is returned 5764 * chain. 0 is returned on success. A negative errno code is returned
5780 * on a failure to set up the device, or if the name is a duplicate. 5765 * on a failure to set up the device, or if the name is a duplicate.
5781 * 5766 *
5782 * Callers must hold the rtnl semaphore. You may want 5767 * Callers must hold the rtnl semaphore. You may want
5783 * register_netdev() instead of this. 5768 * register_netdev() instead of this.
5784 * 5769 *
5785 * BUGS: 5770 * BUGS:
5786 * The locking appears insufficient to guarantee two parallel registers 5771 * The locking appears insufficient to guarantee two parallel registers
5787 * will not get the same name. 5772 * will not get the same name.
5788 */ 5773 */
5789 5774
5790 int register_netdevice(struct net_device *dev) 5775 int register_netdevice(struct net_device *dev)
5791 { 5776 {
5792 int ret; 5777 int ret;
5793 struct net *net = dev_net(dev); 5778 struct net *net = dev_net(dev);
5794 5779
5795 BUG_ON(dev_boot_phase); 5780 BUG_ON(dev_boot_phase);
5796 ASSERT_RTNL(); 5781 ASSERT_RTNL();
5797 5782
5798 might_sleep(); 5783 might_sleep();
5799 5784
5800 /* When net_device's are persistent, this will be fatal. */ 5785 /* When net_device's are persistent, this will be fatal. */
5801 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); 5786 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
5802 BUG_ON(!net); 5787 BUG_ON(!net);
5803 5788
5804 spin_lock_init(&dev->addr_list_lock); 5789 spin_lock_init(&dev->addr_list_lock);
5805 netdev_set_addr_lockdep_class(dev); 5790 netdev_set_addr_lockdep_class(dev);
5806 5791
5807 dev->iflink = -1; 5792 dev->iflink = -1;
5808 5793
5809 ret = dev_get_valid_name(net, dev, dev->name); 5794 ret = dev_get_valid_name(net, dev, dev->name);
5810 if (ret < 0) 5795 if (ret < 0)
5811 goto out; 5796 goto out;
5812 5797
5813 /* Init, if this function is available */ 5798 /* Init, if this function is available */
5814 if (dev->netdev_ops->ndo_init) { 5799 if (dev->netdev_ops->ndo_init) {
5815 ret = dev->netdev_ops->ndo_init(dev); 5800 ret = dev->netdev_ops->ndo_init(dev);
5816 if (ret) { 5801 if (ret) {
5817 if (ret > 0) 5802 if (ret > 0)
5818 ret = -EIO; 5803 ret = -EIO;
5819 goto out; 5804 goto out;
5820 } 5805 }
5821 } 5806 }
5822 5807
5823 if (((dev->hw_features | dev->features) & 5808 if (((dev->hw_features | dev->features) &
5824 NETIF_F_HW_VLAN_CTAG_FILTER) && 5809 NETIF_F_HW_VLAN_CTAG_FILTER) &&
5825 (!dev->netdev_ops->ndo_vlan_rx_add_vid || 5810 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
5826 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) { 5811 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
5827 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n"); 5812 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
5828 ret = -EINVAL; 5813 ret = -EINVAL;
5829 goto err_uninit; 5814 goto err_uninit;
5830 } 5815 }
5831 5816
5832 ret = -EBUSY; 5817 ret = -EBUSY;
5833 if (!dev->ifindex) 5818 if (!dev->ifindex)
5834 dev->ifindex = dev_new_index(net); 5819 dev->ifindex = dev_new_index(net);
5835 else if (__dev_get_by_index(net, dev->ifindex)) 5820 else if (__dev_get_by_index(net, dev->ifindex))
5836 goto err_uninit; 5821 goto err_uninit;
5837 5822
5838 if (dev->iflink == -1) 5823 if (dev->iflink == -1)
5839 dev->iflink = dev->ifindex; 5824 dev->iflink = dev->ifindex;
5840 5825
5841 /* Transfer changeable features to wanted_features and enable 5826 /* Transfer changeable features to wanted_features and enable
5842 * software offloads (GSO and GRO). 5827 * software offloads (GSO and GRO).
5843 */ 5828 */
5844 dev->hw_features |= NETIF_F_SOFT_FEATURES; 5829 dev->hw_features |= NETIF_F_SOFT_FEATURES;
5845 dev->features |= NETIF_F_SOFT_FEATURES; 5830 dev->features |= NETIF_F_SOFT_FEATURES;
5846 dev->wanted_features = dev->features & dev->hw_features; 5831 dev->wanted_features = dev->features & dev->hw_features;
5847 5832
5848 /* Turn on no cache copy if HW is doing checksum */ 5833 /* Turn on no cache copy if HW is doing checksum */
5849 if (!(dev->flags & IFF_LOOPBACK)) { 5834 if (!(dev->flags & IFF_LOOPBACK)) {
5850 dev->hw_features |= NETIF_F_NOCACHE_COPY; 5835 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5851 if (dev->features & NETIF_F_ALL_CSUM) { 5836 if (dev->features & NETIF_F_ALL_CSUM) {
5852 dev->wanted_features |= NETIF_F_NOCACHE_COPY; 5837 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5853 dev->features |= NETIF_F_NOCACHE_COPY; 5838 dev->features |= NETIF_F_NOCACHE_COPY;
5854 } 5839 }
5855 } 5840 }
5856 5841
5857 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. 5842 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
5858 */ 5843 */
5859 dev->vlan_features |= NETIF_F_HIGHDMA; 5844 dev->vlan_features |= NETIF_F_HIGHDMA;
5860 5845
5861 /* Make NETIF_F_SG inheritable to tunnel devices. 5846 /* Make NETIF_F_SG inheritable to tunnel devices.
5862 */ 5847 */
5863 dev->hw_enc_features |= NETIF_F_SG; 5848 dev->hw_enc_features |= NETIF_F_SG;
5864 5849
5865 /* Make NETIF_F_SG inheritable to MPLS. 5850 /* Make NETIF_F_SG inheritable to MPLS.
5866 */ 5851 */
5867 dev->mpls_features |= NETIF_F_SG; 5852 dev->mpls_features |= NETIF_F_SG;
5868 5853
5869 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 5854 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5870 ret = notifier_to_errno(ret); 5855 ret = notifier_to_errno(ret);
5871 if (ret) 5856 if (ret)
5872 goto err_uninit; 5857 goto err_uninit;
5873 5858
5874 ret = netdev_register_kobject(dev); 5859 ret = netdev_register_kobject(dev);
5875 if (ret) 5860 if (ret)
5876 goto err_uninit; 5861 goto err_uninit;
5877 dev->reg_state = NETREG_REGISTERED; 5862 dev->reg_state = NETREG_REGISTERED;
5878 5863
5879 __netdev_update_features(dev); 5864 __netdev_update_features(dev);
5880 5865
5881 /* 5866 /*
5882 * Default initial state at registry is that the 5867 * Default initial state at registry is that the
5883 * device is present. 5868 * device is present.
5884 */ 5869 */
5885 5870
5886 set_bit(__LINK_STATE_PRESENT, &dev->state); 5871 set_bit(__LINK_STATE_PRESENT, &dev->state);
5887 5872
5888 linkwatch_init_dev(dev); 5873 linkwatch_init_dev(dev);
5889 5874
5890 dev_init_scheduler(dev); 5875 dev_init_scheduler(dev);
5891 dev_hold(dev); 5876 dev_hold(dev);
5892 list_netdevice(dev); 5877 list_netdevice(dev);
5893 add_device_randomness(dev->dev_addr, dev->addr_len); 5878 add_device_randomness(dev->dev_addr, dev->addr_len);
5894 5879
5895 /* If the device has permanent device address, driver should 5880 /* If the device has permanent device address, driver should
5896 * set dev_addr and also addr_assign_type should be set to 5881 * set dev_addr and also addr_assign_type should be set to
5897 * NET_ADDR_PERM (default value). 5882 * NET_ADDR_PERM (default value).
5898 */ 5883 */
5899 if (dev->addr_assign_type == NET_ADDR_PERM) 5884 if (dev->addr_assign_type == NET_ADDR_PERM)
5900 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); 5885 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
5901 5886
5902 /* Notify protocols, that a new device appeared. */ 5887 /* Notify protocols, that a new device appeared. */
5903 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); 5888 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5904 ret = notifier_to_errno(ret); 5889 ret = notifier_to_errno(ret);
5905 if (ret) { 5890 if (ret) {
5906 rollback_registered(dev); 5891 rollback_registered(dev);
5907 dev->reg_state = NETREG_UNREGISTERED; 5892 dev->reg_state = NETREG_UNREGISTERED;
5908 } 5893 }
5909 /* 5894 /*
5910 * Prevent userspace races by waiting until the network 5895 * Prevent userspace races by waiting until the network
5911 * device is fully setup before sending notifications. 5896 * device is fully setup before sending notifications.
5912 */ 5897 */
5913 if (!dev->rtnl_link_ops || 5898 if (!dev->rtnl_link_ops ||
5914 dev->rtnl_link_state == RTNL_LINK_INITIALIZED) 5899 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5915 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL); 5900 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
5916 5901
5917 out: 5902 out:
5918 return ret; 5903 return ret;
5919 5904
5920 err_uninit: 5905 err_uninit:
5921 if (dev->netdev_ops->ndo_uninit) 5906 if (dev->netdev_ops->ndo_uninit)
5922 dev->netdev_ops->ndo_uninit(dev); 5907 dev->netdev_ops->ndo_uninit(dev);
5923 goto out; 5908 goto out;
5924 } 5909 }
5925 EXPORT_SYMBOL(register_netdevice); 5910 EXPORT_SYMBOL(register_netdevice);
5926 5911
5927 /** 5912 /**
5928 * init_dummy_netdev - init a dummy network device for NAPI 5913 * init_dummy_netdev - init a dummy network device for NAPI
5929 * @dev: device to init 5914 * @dev: device to init
5930 * 5915 *
5931 * This takes a network device structure and initialize the minimum 5916 * This takes a network device structure and initialize the minimum
5932 * amount of fields so it can be used to schedule NAPI polls without 5917 * amount of fields so it can be used to schedule NAPI polls without
5933 * registering a full blown interface. This is to be used by drivers 5918 * registering a full blown interface. This is to be used by drivers
5934 * that need to tie several hardware interfaces to a single NAPI 5919 * that need to tie several hardware interfaces to a single NAPI
5935 * poll scheduler due to HW limitations. 5920 * poll scheduler due to HW limitations.
5936 */ 5921 */
5937 int init_dummy_netdev(struct net_device *dev) 5922 int init_dummy_netdev(struct net_device *dev)
5938 { 5923 {
5939 /* Clear everything. Note we don't initialize spinlocks 5924 /* Clear everything. Note we don't initialize spinlocks
5940 * are they aren't supposed to be taken by any of the 5925 * are they aren't supposed to be taken by any of the
5941 * NAPI code and this dummy netdev is supposed to be 5926 * NAPI code and this dummy netdev is supposed to be
5942 * only ever used for NAPI polls 5927 * only ever used for NAPI polls
5943 */ 5928 */
5944 memset(dev, 0, sizeof(struct net_device)); 5929 memset(dev, 0, sizeof(struct net_device));
5945 5930
5946 /* make sure we BUG if trying to hit standard 5931 /* make sure we BUG if trying to hit standard
5947 * register/unregister code path 5932 * register/unregister code path
5948 */ 5933 */
5949 dev->reg_state = NETREG_DUMMY; 5934 dev->reg_state = NETREG_DUMMY;
5950 5935
5951 /* NAPI wants this */ 5936 /* NAPI wants this */
5952 INIT_LIST_HEAD(&dev->napi_list); 5937 INIT_LIST_HEAD(&dev->napi_list);
5953 5938
5954 /* a dummy interface is started by default */ 5939 /* a dummy interface is started by default */
5955 set_bit(__LINK_STATE_PRESENT, &dev->state); 5940 set_bit(__LINK_STATE_PRESENT, &dev->state);
5956 set_bit(__LINK_STATE_START, &dev->state); 5941 set_bit(__LINK_STATE_START, &dev->state);
5957 5942
5958 /* Note : We dont allocate pcpu_refcnt for dummy devices, 5943 /* Note : We dont allocate pcpu_refcnt for dummy devices,
5959 * because users of this 'device' dont need to change 5944 * because users of this 'device' dont need to change
5960 * its refcount. 5945 * its refcount.
5961 */ 5946 */
5962 5947
5963 return 0; 5948 return 0;
5964 } 5949 }
5965 EXPORT_SYMBOL_GPL(init_dummy_netdev); 5950 EXPORT_SYMBOL_GPL(init_dummy_netdev);
5966 5951
5967 5952
5968 /** 5953 /**
5969 * register_netdev - register a network device 5954 * register_netdev - register a network device
5970 * @dev: device to register 5955 * @dev: device to register
5971 * 5956 *
5972 * Take a completed network device structure and add it to the kernel 5957 * Take a completed network device structure and add it to the kernel
5973 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier 5958 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5974 * chain. 0 is returned on success. A negative errno code is returned 5959 * chain. 0 is returned on success. A negative errno code is returned
5975 * on a failure to set up the device, or if the name is a duplicate. 5960 * on a failure to set up the device, or if the name is a duplicate.
5976 * 5961 *
5977 * This is a wrapper around register_netdevice that takes the rtnl semaphore 5962 * This is a wrapper around register_netdevice that takes the rtnl semaphore
5978 * and expands the device name if you passed a format string to 5963 * and expands the device name if you passed a format string to
5979 * alloc_netdev. 5964 * alloc_netdev.
5980 */ 5965 */
5981 int register_netdev(struct net_device *dev) 5966 int register_netdev(struct net_device *dev)
5982 { 5967 {
5983 int err; 5968 int err;
5984 5969
5985 rtnl_lock(); 5970 rtnl_lock();
5986 err = register_netdevice(dev); 5971 err = register_netdevice(dev);
5987 rtnl_unlock(); 5972 rtnl_unlock();
5988 return err; 5973 return err;
5989 } 5974 }
5990 EXPORT_SYMBOL(register_netdev); 5975 EXPORT_SYMBOL(register_netdev);
5991 5976
5992 int netdev_refcnt_read(const struct net_device *dev) 5977 int netdev_refcnt_read(const struct net_device *dev)
5993 { 5978 {
5994 int i, refcnt = 0; 5979 int i, refcnt = 0;
5995 5980
5996 for_each_possible_cpu(i) 5981 for_each_possible_cpu(i)
5997 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i); 5982 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5998 return refcnt; 5983 return refcnt;
5999 } 5984 }
6000 EXPORT_SYMBOL(netdev_refcnt_read); 5985 EXPORT_SYMBOL(netdev_refcnt_read);
6001 5986
6002 /** 5987 /**
6003 * netdev_wait_allrefs - wait until all references are gone. 5988 * netdev_wait_allrefs - wait until all references are gone.
6004 * @dev: target net_device 5989 * @dev: target net_device
6005 * 5990 *
6006 * This is called when unregistering network devices. 5991 * This is called when unregistering network devices.
6007 * 5992 *
6008 * Any protocol or device that holds a reference should register 5993 * Any protocol or device that holds a reference should register
6009 * for netdevice notification, and cleanup and put back the 5994 * for netdevice notification, and cleanup and put back the
6010 * reference if they receive an UNREGISTER event. 5995 * reference if they receive an UNREGISTER event.
6011 * We can get stuck here if buggy protocols don't correctly 5996 * We can get stuck here if buggy protocols don't correctly
6012 * call dev_put. 5997 * call dev_put.
6013 */ 5998 */
6014 static void netdev_wait_allrefs(struct net_device *dev) 5999 static void netdev_wait_allrefs(struct net_device *dev)
6015 { 6000 {
6016 unsigned long rebroadcast_time, warning_time; 6001 unsigned long rebroadcast_time, warning_time;
6017 int refcnt; 6002 int refcnt;
6018 6003
6019 linkwatch_forget_dev(dev); 6004 linkwatch_forget_dev(dev);
6020 6005
6021 rebroadcast_time = warning_time = jiffies; 6006 rebroadcast_time = warning_time = jiffies;
6022 refcnt = netdev_refcnt_read(dev); 6007 refcnt = netdev_refcnt_read(dev);
6023 6008
6024 while (refcnt != 0) { 6009 while (refcnt != 0) {
6025 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 6010 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
6026 rtnl_lock(); 6011 rtnl_lock();
6027 6012
6028 /* Rebroadcast unregister notification */ 6013 /* Rebroadcast unregister notification */
6029 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 6014 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6030 6015
6031 __rtnl_unlock(); 6016 __rtnl_unlock();
6032 rcu_barrier(); 6017 rcu_barrier();
6033 rtnl_lock(); 6018 rtnl_lock();
6034 6019
6035 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); 6020 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6036 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 6021 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
6037 &dev->state)) { 6022 &dev->state)) {
6038 /* We must not have linkwatch events 6023 /* We must not have linkwatch events
6039 * pending on unregister. If this 6024 * pending on unregister. If this
6040 * happens, we simply run the queue 6025 * happens, we simply run the queue
6041 * unscheduled, resulting in a noop 6026 * unscheduled, resulting in a noop
6042 * for this device. 6027 * for this device.
6043 */ 6028 */
6044 linkwatch_run_queue(); 6029 linkwatch_run_queue();
6045 } 6030 }
6046 6031
6047 __rtnl_unlock(); 6032 __rtnl_unlock();
6048 6033
6049 rebroadcast_time = jiffies; 6034 rebroadcast_time = jiffies;
6050 } 6035 }
6051 6036
6052 msleep(250); 6037 msleep(250);
6053 6038
6054 refcnt = netdev_refcnt_read(dev); 6039 refcnt = netdev_refcnt_read(dev);
6055 6040
6056 if (time_after(jiffies, warning_time + 10 * HZ)) { 6041 if (time_after(jiffies, warning_time + 10 * HZ)) {
6057 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", 6042 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
6058 dev->name, refcnt); 6043 dev->name, refcnt);
6059 warning_time = jiffies; 6044 warning_time = jiffies;
6060 } 6045 }
6061 } 6046 }
6062 } 6047 }
6063 6048
6064 /* The sequence is: 6049 /* The sequence is:
6065 * 6050 *
6066 * rtnl_lock(); 6051 * rtnl_lock();
6067 * ... 6052 * ...
6068 * register_netdevice(x1); 6053 * register_netdevice(x1);
6069 * register_netdevice(x2); 6054 * register_netdevice(x2);
6070 * ... 6055 * ...
6071 * unregister_netdevice(y1); 6056 * unregister_netdevice(y1);
6072 * unregister_netdevice(y2); 6057 * unregister_netdevice(y2);
6073 * ... 6058 * ...
6074 * rtnl_unlock(); 6059 * rtnl_unlock();
6075 * free_netdev(y1); 6060 * free_netdev(y1);
6076 * free_netdev(y2); 6061 * free_netdev(y2);
6077 * 6062 *
6078 * We are invoked by rtnl_unlock(). 6063 * We are invoked by rtnl_unlock().
6079 * This allows us to deal with problems: 6064 * This allows us to deal with problems:
6080 * 1) We can delete sysfs objects which invoke hotplug 6065 * 1) We can delete sysfs objects which invoke hotplug
6081 * without deadlocking with linkwatch via keventd. 6066 * without deadlocking with linkwatch via keventd.
6082 * 2) Since we run with the RTNL semaphore not held, we can sleep 6067 * 2) Since we run with the RTNL semaphore not held, we can sleep
6083 * safely in order to wait for the netdev refcnt to drop to zero. 6068 * safely in order to wait for the netdev refcnt to drop to zero.
6084 * 6069 *
6085 * We must not return until all unregister events added during 6070 * We must not return until all unregister events added during
6086 * the interval the lock was held have been completed. 6071 * the interval the lock was held have been completed.
6087 */ 6072 */
6088 void netdev_run_todo(void) 6073 void netdev_run_todo(void)
6089 { 6074 {
6090 struct list_head list; 6075 struct list_head list;
6091 6076
6092 /* Snapshot list, allow later requests */ 6077 /* Snapshot list, allow later requests */
6093 list_replace_init(&net_todo_list, &list); 6078 list_replace_init(&net_todo_list, &list);
6094 6079
6095 __rtnl_unlock(); 6080 __rtnl_unlock();
6096 6081
6097 6082
6098 /* Wait for rcu callbacks to finish before next phase */ 6083 /* Wait for rcu callbacks to finish before next phase */
6099 if (!list_empty(&list)) 6084 if (!list_empty(&list))
6100 rcu_barrier(); 6085 rcu_barrier();
6101 6086
6102 while (!list_empty(&list)) { 6087 while (!list_empty(&list)) {
6103 struct net_device *dev 6088 struct net_device *dev
6104 = list_first_entry(&list, struct net_device, todo_list); 6089 = list_first_entry(&list, struct net_device, todo_list);
6105 list_del(&dev->todo_list); 6090 list_del(&dev->todo_list);
6106 6091
6107 rtnl_lock(); 6092 rtnl_lock();
6108 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); 6093 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6109 __rtnl_unlock(); 6094 __rtnl_unlock();
6110 6095
6111 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { 6096 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
6112 pr_err("network todo '%s' but state %d\n", 6097 pr_err("network todo '%s' but state %d\n",
6113 dev->name, dev->reg_state); 6098 dev->name, dev->reg_state);
6114 dump_stack(); 6099 dump_stack();
6115 continue; 6100 continue;
6116 } 6101 }
6117 6102
6118 dev->reg_state = NETREG_UNREGISTERED; 6103 dev->reg_state = NETREG_UNREGISTERED;
6119 6104
6120 on_each_cpu(flush_backlog, dev, 1); 6105 on_each_cpu(flush_backlog, dev, 1);
6121 6106
6122 netdev_wait_allrefs(dev); 6107 netdev_wait_allrefs(dev);
6123 6108
6124 /* paranoia */ 6109 /* paranoia */
6125 BUG_ON(netdev_refcnt_read(dev)); 6110 BUG_ON(netdev_refcnt_read(dev));
6126 WARN_ON(rcu_access_pointer(dev->ip_ptr)); 6111 WARN_ON(rcu_access_pointer(dev->ip_ptr));
6127 WARN_ON(rcu_access_pointer(dev->ip6_ptr)); 6112 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
6128 WARN_ON(dev->dn_ptr); 6113 WARN_ON(dev->dn_ptr);
6129 6114
6130 if (dev->destructor) 6115 if (dev->destructor)
6131 dev->destructor(dev); 6116 dev->destructor(dev);
6132 6117
6133 /* Report a network device has been unregistered */ 6118 /* Report a network device has been unregistered */
6134 rtnl_lock(); 6119 rtnl_lock();
6135 dev_net(dev)->dev_unreg_count--; 6120 dev_net(dev)->dev_unreg_count--;
6136 __rtnl_unlock(); 6121 __rtnl_unlock();
6137 wake_up(&netdev_unregistering_wq); 6122 wake_up(&netdev_unregistering_wq);
6138 6123
6139 /* Free network device */ 6124 /* Free network device */
6140 kobject_put(&dev->dev.kobj); 6125 kobject_put(&dev->dev.kobj);
6141 } 6126 }
6142 } 6127 }
6143 6128
6144 /* Convert net_device_stats to rtnl_link_stats64. They have the same 6129 /* Convert net_device_stats to rtnl_link_stats64. They have the same
6145 * fields in the same order, with only the type differing. 6130 * fields in the same order, with only the type differing.
6146 */ 6131 */
6147 void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, 6132 void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
6148 const struct net_device_stats *netdev_stats) 6133 const struct net_device_stats *netdev_stats)
6149 { 6134 {
6150 #if BITS_PER_LONG == 64 6135 #if BITS_PER_LONG == 64
6151 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); 6136 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
6152 memcpy(stats64, netdev_stats, sizeof(*stats64)); 6137 memcpy(stats64, netdev_stats, sizeof(*stats64));
6153 #else 6138 #else
6154 size_t i, n = sizeof(*stats64) / sizeof(u64); 6139 size_t i, n = sizeof(*stats64) / sizeof(u64);
6155 const unsigned long *src = (const unsigned long *)netdev_stats; 6140 const unsigned long *src = (const unsigned long *)netdev_stats;
6156 u64 *dst = (u64 *)stats64; 6141 u64 *dst = (u64 *)stats64;
6157 6142
6158 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) != 6143 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
6159 sizeof(*stats64) / sizeof(u64)); 6144 sizeof(*stats64) / sizeof(u64));
6160 for (i = 0; i < n; i++) 6145 for (i = 0; i < n; i++)
6161 dst[i] = src[i]; 6146 dst[i] = src[i];
6162 #endif 6147 #endif
6163 } 6148 }
6164 EXPORT_SYMBOL(netdev_stats_to_stats64); 6149 EXPORT_SYMBOL(netdev_stats_to_stats64);
6165 6150
6166 /** 6151 /**
6167 * dev_get_stats - get network device statistics 6152 * dev_get_stats - get network device statistics
6168 * @dev: device to get statistics from 6153 * @dev: device to get statistics from
6169 * @storage: place to store stats 6154 * @storage: place to store stats
6170 * 6155 *
6171 * Get network statistics from device. Return @storage. 6156 * Get network statistics from device. Return @storage.
6172 * The device driver may provide its own method by setting 6157 * The device driver may provide its own method by setting
6173 * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats; 6158 * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
6174 * otherwise the internal statistics structure is used. 6159 * otherwise the internal statistics structure is used.
6175 */ 6160 */
6176 struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, 6161 struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
6177 struct rtnl_link_stats64 *storage) 6162 struct rtnl_link_stats64 *storage)
6178 { 6163 {
6179 const struct net_device_ops *ops = dev->netdev_ops; 6164 const struct net_device_ops *ops = dev->netdev_ops;
6180 6165
6181 if (ops->ndo_get_stats64) { 6166 if (ops->ndo_get_stats64) {
6182 memset(storage, 0, sizeof(*storage)); 6167 memset(storage, 0, sizeof(*storage));
6183 ops->ndo_get_stats64(dev, storage); 6168 ops->ndo_get_stats64(dev, storage);
6184 } else if (ops->ndo_get_stats) { 6169 } else if (ops->ndo_get_stats) {
6185 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); 6170 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
6186 } else { 6171 } else {
6187 netdev_stats_to_stats64(storage, &dev->stats); 6172 netdev_stats_to_stats64(storage, &dev->stats);
6188 } 6173 }
6189 storage->rx_dropped += atomic_long_read(&dev->rx_dropped); 6174 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
6190 return storage; 6175 return storage;
6191 } 6176 }
6192 EXPORT_SYMBOL(dev_get_stats); 6177 EXPORT_SYMBOL(dev_get_stats);
6193 6178
6194 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) 6179 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
6195 { 6180 {
6196 struct netdev_queue *queue = dev_ingress_queue(dev); 6181 struct netdev_queue *queue = dev_ingress_queue(dev);
6197 6182
6198 #ifdef CONFIG_NET_CLS_ACT 6183 #ifdef CONFIG_NET_CLS_ACT
6199 if (queue) 6184 if (queue)
6200 return queue; 6185 return queue;
6201 queue = kzalloc(sizeof(*queue), GFP_KERNEL); 6186 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
6202 if (!queue) 6187 if (!queue)
6203 return NULL; 6188 return NULL;
6204 netdev_init_one_queue(dev, queue, NULL); 6189 netdev_init_one_queue(dev, queue, NULL);
6205 queue->qdisc = &noop_qdisc; 6190 queue->qdisc = &noop_qdisc;
6206 queue->qdisc_sleeping = &noop_qdisc; 6191 queue->qdisc_sleeping = &noop_qdisc;
6207 rcu_assign_pointer(dev->ingress_queue, queue); 6192 rcu_assign_pointer(dev->ingress_queue, queue);
6208 #endif 6193 #endif
6209 return queue; 6194 return queue;
6210 } 6195 }
6211 6196
6212 static const struct ethtool_ops default_ethtool_ops; 6197 static const struct ethtool_ops default_ethtool_ops;
6213 6198
6214 void netdev_set_default_ethtool_ops(struct net_device *dev, 6199 void netdev_set_default_ethtool_ops(struct net_device *dev,
6215 const struct ethtool_ops *ops) 6200 const struct ethtool_ops *ops)
6216 { 6201 {
6217 if (dev->ethtool_ops == &default_ethtool_ops) 6202 if (dev->ethtool_ops == &default_ethtool_ops)
6218 dev->ethtool_ops = ops; 6203 dev->ethtool_ops = ops;
6219 } 6204 }
6220 EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); 6205 EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
6221 6206
6222 void netdev_freemem(struct net_device *dev) 6207 void netdev_freemem(struct net_device *dev)
6223 { 6208 {
6224 char *addr = (char *)dev - dev->padded; 6209 char *addr = (char *)dev - dev->padded;
6225 6210
6226 if (is_vmalloc_addr(addr)) 6211 if (is_vmalloc_addr(addr))
6227 vfree(addr); 6212 vfree(addr);
6228 else 6213 else
6229 kfree(addr); 6214 kfree(addr);
6230 } 6215 }
6231 6216
6232 /** 6217 /**
6233 * alloc_netdev_mqs - allocate network device 6218 * alloc_netdev_mqs - allocate network device
6234 * @sizeof_priv: size of private data to allocate space for 6219 * @sizeof_priv: size of private data to allocate space for
6235 * @name: device name format string 6220 * @name: device name format string
6236 * @setup: callback to initialize device 6221 * @setup: callback to initialize device
6237 * @txqs: the number of TX subqueues to allocate 6222 * @txqs: the number of TX subqueues to allocate
6238 * @rxqs: the number of RX subqueues to allocate 6223 * @rxqs: the number of RX subqueues to allocate
6239 * 6224 *
6240 * Allocates a struct net_device with private data area for driver use 6225 * Allocates a struct net_device with private data area for driver use
6241 * and performs basic initialization. Also allocates subquue structs 6226 * and performs basic initialization. Also allocates subquue structs
6242 * for each queue on the device. 6227 * for each queue on the device.
6243 */ 6228 */
6244 struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, 6229 struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6245 void (*setup)(struct net_device *), 6230 void (*setup)(struct net_device *),
6246 unsigned int txqs, unsigned int rxqs) 6231 unsigned int txqs, unsigned int rxqs)
6247 { 6232 {
6248 struct net_device *dev; 6233 struct net_device *dev;
6249 size_t alloc_size; 6234 size_t alloc_size;
6250 struct net_device *p; 6235 struct net_device *p;
6251 6236
6252 BUG_ON(strlen(name) >= sizeof(dev->name)); 6237 BUG_ON(strlen(name) >= sizeof(dev->name));
6253 6238
6254 if (txqs < 1) { 6239 if (txqs < 1) {
6255 pr_err("alloc_netdev: Unable to allocate device with zero queues\n"); 6240 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
6256 return NULL; 6241 return NULL;
6257 } 6242 }
6258 6243
6259 #ifdef CONFIG_RPS 6244 #ifdef CONFIG_RPS
6260 if (rxqs < 1) { 6245 if (rxqs < 1) {
6261 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); 6246 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
6262 return NULL; 6247 return NULL;
6263 } 6248 }
6264 #endif 6249 #endif
6265 6250
6266 alloc_size = sizeof(struct net_device); 6251 alloc_size = sizeof(struct net_device);
6267 if (sizeof_priv) { 6252 if (sizeof_priv) {
6268 /* ensure 32-byte alignment of private area */ 6253 /* ensure 32-byte alignment of private area */
6269 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN); 6254 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
6270 alloc_size += sizeof_priv; 6255 alloc_size += sizeof_priv;
6271 } 6256 }
6272 /* ensure 32-byte alignment of whole construct */ 6257 /* ensure 32-byte alignment of whole construct */
6273 alloc_size += NETDEV_ALIGN - 1; 6258 alloc_size += NETDEV_ALIGN - 1;
6274 6259
6275 p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); 6260 p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
6276 if (!p) 6261 if (!p)
6277 p = vzalloc(alloc_size); 6262 p = vzalloc(alloc_size);
6278 if (!p) 6263 if (!p)
6279 return NULL; 6264 return NULL;
6280 6265
6281 dev = PTR_ALIGN(p, NETDEV_ALIGN); 6266 dev = PTR_ALIGN(p, NETDEV_ALIGN);
6282 dev->padded = (char *)dev - (char *)p; 6267 dev->padded = (char *)dev - (char *)p;
6283 6268
6284 dev->pcpu_refcnt = alloc_percpu(int); 6269 dev->pcpu_refcnt = alloc_percpu(int);
6285 if (!dev->pcpu_refcnt) 6270 if (!dev->pcpu_refcnt)
6286 goto free_dev; 6271 goto free_dev;
6287 6272
6288 if (dev_addr_init(dev)) 6273 if (dev_addr_init(dev))
6289 goto free_pcpu; 6274 goto free_pcpu;
6290 6275
6291 dev_mc_init(dev); 6276 dev_mc_init(dev);
6292 dev_uc_init(dev); 6277 dev_uc_init(dev);
6293 6278
6294 dev_net_set(dev, &init_net); 6279 dev_net_set(dev, &init_net);
6295 6280
6296 dev->gso_max_size = GSO_MAX_SIZE; 6281 dev->gso_max_size = GSO_MAX_SIZE;
6297 dev->gso_max_segs = GSO_MAX_SEGS; 6282 dev->gso_max_segs = GSO_MAX_SEGS;
6298 6283
6299 INIT_LIST_HEAD(&dev->napi_list); 6284 INIT_LIST_HEAD(&dev->napi_list);
6300 INIT_LIST_HEAD(&dev->unreg_list); 6285 INIT_LIST_HEAD(&dev->unreg_list);
6301 INIT_LIST_HEAD(&dev->close_list); 6286 INIT_LIST_HEAD(&dev->close_list);
6302 INIT_LIST_HEAD(&dev->link_watch_list); 6287 INIT_LIST_HEAD(&dev->link_watch_list);
6303 INIT_LIST_HEAD(&dev->adj_list.upper); 6288 INIT_LIST_HEAD(&dev->adj_list.upper);
6304 INIT_LIST_HEAD(&dev->adj_list.lower); 6289 INIT_LIST_HEAD(&dev->adj_list.lower);
6305 INIT_LIST_HEAD(&dev->all_adj_list.upper); 6290 INIT_LIST_HEAD(&dev->all_adj_list.upper);
6306 INIT_LIST_HEAD(&dev->all_adj_list.lower); 6291 INIT_LIST_HEAD(&dev->all_adj_list.lower);
6307 dev->priv_flags = IFF_XMIT_DST_RELEASE; 6292 dev->priv_flags = IFF_XMIT_DST_RELEASE;
6308 setup(dev); 6293 setup(dev);
6309 6294
6310 dev->num_tx_queues = txqs; 6295 dev->num_tx_queues = txqs;
6311 dev->real_num_tx_queues = txqs; 6296 dev->real_num_tx_queues = txqs;
6312 if (netif_alloc_netdev_queues(dev)) 6297 if (netif_alloc_netdev_queues(dev))
6313 goto free_all; 6298 goto free_all;
6314 6299
6315 #ifdef CONFIG_RPS 6300 #ifdef CONFIG_RPS
6316 dev->num_rx_queues = rxqs; 6301 dev->num_rx_queues = rxqs;
6317 dev->real_num_rx_queues = rxqs; 6302 dev->real_num_rx_queues = rxqs;
6318 if (netif_alloc_rx_queues(dev)) 6303 if (netif_alloc_rx_queues(dev))
6319 goto free_all; 6304 goto free_all;
6320 #endif 6305 #endif
6321 6306
6322 strcpy(dev->name, name); 6307 strcpy(dev->name, name);
6323 dev->group = INIT_NETDEV_GROUP; 6308 dev->group = INIT_NETDEV_GROUP;
6324 if (!dev->ethtool_ops) 6309 if (!dev->ethtool_ops)
6325 dev->ethtool_ops = &default_ethtool_ops; 6310 dev->ethtool_ops = &default_ethtool_ops;
6326 return dev; 6311 return dev;
6327 6312
6328 free_all: 6313 free_all:
6329 free_netdev(dev); 6314 free_netdev(dev);
6330 return NULL; 6315 return NULL;
6331 6316
6332 free_pcpu: 6317 free_pcpu:
6333 free_percpu(dev->pcpu_refcnt); 6318 free_percpu(dev->pcpu_refcnt);
6334 netif_free_tx_queues(dev); 6319 netif_free_tx_queues(dev);
6335 #ifdef CONFIG_RPS 6320 #ifdef CONFIG_RPS
6336 kfree(dev->_rx); 6321 kfree(dev->_rx);
6337 #endif 6322 #endif
6338 6323
6339 free_dev: 6324 free_dev:
6340 netdev_freemem(dev); 6325 netdev_freemem(dev);
6341 return NULL; 6326 return NULL;
6342 } 6327 }
6343 EXPORT_SYMBOL(alloc_netdev_mqs); 6328 EXPORT_SYMBOL(alloc_netdev_mqs);
6344 6329
6345 /** 6330 /**
6346 * free_netdev - free network device 6331 * free_netdev - free network device
6347 * @dev: device 6332 * @dev: device
6348 * 6333 *
6349 * This function does the last stage of destroying an allocated device 6334 * This function does the last stage of destroying an allocated device
6350 * interface. The reference to the device object is released. 6335 * interface. The reference to the device object is released.
6351 * If this is the last reference then it will be freed. 6336 * If this is the last reference then it will be freed.
6352 */ 6337 */
6353 void free_netdev(struct net_device *dev) 6338 void free_netdev(struct net_device *dev)
6354 { 6339 {
6355 struct napi_struct *p, *n; 6340 struct napi_struct *p, *n;
6356 6341
6357 release_net(dev_net(dev)); 6342 release_net(dev_net(dev));
6358 6343
6359 netif_free_tx_queues(dev); 6344 netif_free_tx_queues(dev);
6360 #ifdef CONFIG_RPS 6345 #ifdef CONFIG_RPS
6361 kfree(dev->_rx); 6346 kfree(dev->_rx);
6362 #endif 6347 #endif
6363 6348
6364 kfree(rcu_dereference_protected(dev->ingress_queue, 1)); 6349 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
6365 6350
6366 /* Flush device addresses */ 6351 /* Flush device addresses */
6367 dev_addr_flush(dev); 6352 dev_addr_flush(dev);
6368 6353
6369 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) 6354 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
6370 netif_napi_del(p); 6355 netif_napi_del(p);
6371 6356
6372 free_percpu(dev->pcpu_refcnt); 6357 free_percpu(dev->pcpu_refcnt);
6373 dev->pcpu_refcnt = NULL; 6358 dev->pcpu_refcnt = NULL;
6374 6359
6375 /* Compatibility with error handling in drivers */ 6360 /* Compatibility with error handling in drivers */
6376 if (dev->reg_state == NETREG_UNINITIALIZED) { 6361 if (dev->reg_state == NETREG_UNINITIALIZED) {
6377 netdev_freemem(dev); 6362 netdev_freemem(dev);
6378 return; 6363 return;
6379 } 6364 }
6380 6365
6381 BUG_ON(dev->reg_state != NETREG_UNREGISTERED); 6366 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
6382 dev->reg_state = NETREG_RELEASED; 6367 dev->reg_state = NETREG_RELEASED;
6383 6368
6384 /* will free via device release */ 6369 /* will free via device release */
6385 put_device(&dev->dev); 6370 put_device(&dev->dev);
6386 } 6371 }
6387 EXPORT_SYMBOL(free_netdev); 6372 EXPORT_SYMBOL(free_netdev);
6388 6373
6389 /** 6374 /**
6390 * synchronize_net - Synchronize with packet receive processing 6375 * synchronize_net - Synchronize with packet receive processing
6391 * 6376 *
6392 * Wait for packets currently being received to be done. 6377 * Wait for packets currently being received to be done.
6393 * Does not block later packets from starting. 6378 * Does not block later packets from starting.
6394 */ 6379 */
6395 void synchronize_net(void) 6380 void synchronize_net(void)
6396 { 6381 {
6397 might_sleep(); 6382 might_sleep();
6398 if (rtnl_is_locked()) 6383 if (rtnl_is_locked())
6399 synchronize_rcu_expedited(); 6384 synchronize_rcu_expedited();
6400 else 6385 else
6401 synchronize_rcu(); 6386 synchronize_rcu();
6402 } 6387 }
6403 EXPORT_SYMBOL(synchronize_net); 6388 EXPORT_SYMBOL(synchronize_net);
6404 6389
6405 /** 6390 /**
6406 * unregister_netdevice_queue - remove device from the kernel 6391 * unregister_netdevice_queue - remove device from the kernel
6407 * @dev: device 6392 * @dev: device
6408 * @head: list 6393 * @head: list
6409 * 6394 *
6410 * This function shuts down a device interface and removes it 6395 * This function shuts down a device interface and removes it
6411 * from the kernel tables. 6396 * from the kernel tables.
6412 * If head not NULL, device is queued to be unregistered later. 6397 * If head not NULL, device is queued to be unregistered later.
6413 * 6398 *
6414 * Callers must hold the rtnl semaphore. You may want 6399 * Callers must hold the rtnl semaphore. You may want
6415 * unregister_netdev() instead of this. 6400 * unregister_netdev() instead of this.
6416 */ 6401 */
6417 6402
6418 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) 6403 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
6419 { 6404 {
6420 ASSERT_RTNL(); 6405 ASSERT_RTNL();
6421 6406
6422 if (head) { 6407 if (head) {
6423 list_move_tail(&dev->unreg_list, head); 6408 list_move_tail(&dev->unreg_list, head);
6424 } else { 6409 } else {
6425 rollback_registered(dev); 6410 rollback_registered(dev);
6426 /* Finish processing unregister after unlock */ 6411 /* Finish processing unregister after unlock */
6427 net_set_todo(dev); 6412 net_set_todo(dev);
6428 } 6413 }
6429 } 6414 }
6430 EXPORT_SYMBOL(unregister_netdevice_queue); 6415 EXPORT_SYMBOL(unregister_netdevice_queue);
6431 6416
6432 /** 6417 /**
6433 * unregister_netdevice_many - unregister many devices 6418 * unregister_netdevice_many - unregister many devices
6434 * @head: list of devices 6419 * @head: list of devices
6435 */ 6420 */
6436 void unregister_netdevice_many(struct list_head *head) 6421 void unregister_netdevice_many(struct list_head *head)
6437 { 6422 {
6438 struct net_device *dev; 6423 struct net_device *dev;
6439 6424
6440 if (!list_empty(head)) { 6425 if (!list_empty(head)) {
6441 rollback_registered_many(head); 6426 rollback_registered_many(head);
6442 list_for_each_entry(dev, head, unreg_list) 6427 list_for_each_entry(dev, head, unreg_list)
6443 net_set_todo(dev); 6428 net_set_todo(dev);
6444 } 6429 }
6445 } 6430 }
6446 EXPORT_SYMBOL(unregister_netdevice_many); 6431 EXPORT_SYMBOL(unregister_netdevice_many);
6447 6432
6448 /** 6433 /**
6449 * unregister_netdev - remove device from the kernel 6434 * unregister_netdev - remove device from the kernel
6450 * @dev: device 6435 * @dev: device
6451 * 6436 *
6452 * This function shuts down a device interface and removes it 6437 * This function shuts down a device interface and removes it
6453 * from the kernel tables. 6438 * from the kernel tables.
6454 * 6439 *
6455 * This is just a wrapper for unregister_netdevice that takes 6440 * This is just a wrapper for unregister_netdevice that takes
6456 * the rtnl semaphore. In general you want to use this and not 6441 * the rtnl semaphore. In general you want to use this and not
6457 * unregister_netdevice. 6442 * unregister_netdevice.
6458 */ 6443 */
6459 void unregister_netdev(struct net_device *dev) 6444 void unregister_netdev(struct net_device *dev)
6460 { 6445 {
6461 rtnl_lock(); 6446 rtnl_lock();
6462 unregister_netdevice(dev); 6447 unregister_netdevice(dev);
6463 rtnl_unlock(); 6448 rtnl_unlock();
6464 } 6449 }
6465 EXPORT_SYMBOL(unregister_netdev); 6450 EXPORT_SYMBOL(unregister_netdev);
6466 6451
6467 /** 6452 /**
6468 * dev_change_net_namespace - move device to different nethost namespace 6453 * dev_change_net_namespace - move device to different nethost namespace
6469 * @dev: device 6454 * @dev: device
6470 * @net: network namespace 6455 * @net: network namespace
6471 * @pat: If not NULL name pattern to try if the current device name 6456 * @pat: If not NULL name pattern to try if the current device name
6472 * is already taken in the destination network namespace. 6457 * is already taken in the destination network namespace.
6473 * 6458 *
6474 * This function shuts down a device interface and moves it 6459 * This function shuts down a device interface and moves it
6475 * to a new network namespace. On success 0 is returned, on 6460 * to a new network namespace. On success 0 is returned, on
6476 * a failure a netagive errno code is returned. 6461 * a failure a netagive errno code is returned.
6477 * 6462 *
6478 * Callers must hold the rtnl semaphore. 6463 * Callers must hold the rtnl semaphore.
6479 */ 6464 */
6480 6465
6481 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) 6466 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
6482 { 6467 {
6483 int err; 6468 int err;
6484 6469
6485 ASSERT_RTNL(); 6470 ASSERT_RTNL();
6486 6471
6487 /* Don't allow namespace local devices to be moved. */ 6472 /* Don't allow namespace local devices to be moved. */
6488 err = -EINVAL; 6473 err = -EINVAL;
6489 if (dev->features & NETIF_F_NETNS_LOCAL) 6474 if (dev->features & NETIF_F_NETNS_LOCAL)
6490 goto out; 6475 goto out;
6491 6476
6492 /* Ensure the device has been registrered */ 6477 /* Ensure the device has been registrered */
6493 if (dev->reg_state != NETREG_REGISTERED) 6478 if (dev->reg_state != NETREG_REGISTERED)
6494 goto out; 6479 goto out;
6495 6480
6496 /* Get out if there is nothing todo */ 6481 /* Get out if there is nothing todo */
6497 err = 0; 6482 err = 0;
6498 if (net_eq(dev_net(dev), net)) 6483 if (net_eq(dev_net(dev), net))
6499 goto out; 6484 goto out;
6500 6485
6501 /* Pick the destination device name, and ensure 6486 /* Pick the destination device name, and ensure
6502 * we can use it in the destination network namespace. 6487 * we can use it in the destination network namespace.
6503 */ 6488 */
6504 err = -EEXIST; 6489 err = -EEXIST;
6505 if (__dev_get_by_name(net, dev->name)) { 6490 if (__dev_get_by_name(net, dev->name)) {
6506 /* We get here if we can't use the current device name */ 6491 /* We get here if we can't use the current device name */
6507 if (!pat) 6492 if (!pat)
6508 goto out; 6493 goto out;
6509 if (dev_get_valid_name(net, dev, pat) < 0) 6494 if (dev_get_valid_name(net, dev, pat) < 0)
6510 goto out; 6495 goto out;
6511 } 6496 }
6512 6497
6513 /* 6498 /*
6514 * And now a mini version of register_netdevice unregister_netdevice. 6499 * And now a mini version of register_netdevice unregister_netdevice.
6515 */ 6500 */
6516 6501
6517 /* If device is running close it first. */ 6502 /* If device is running close it first. */
6518 dev_close(dev); 6503 dev_close(dev);
6519 6504
6520 /* And unlink it from device chain */ 6505 /* And unlink it from device chain */
6521 err = -ENODEV; 6506 err = -ENODEV;
6522 unlist_netdevice(dev); 6507 unlist_netdevice(dev);
6523 6508
6524 synchronize_net(); 6509 synchronize_net();
6525 6510
6526 /* Shutdown queueing discipline. */ 6511 /* Shutdown queueing discipline. */
6527 dev_shutdown(dev); 6512 dev_shutdown(dev);
6528 6513
6529 /* Notify protocols, that we are about to destroy 6514 /* Notify protocols, that we are about to destroy
6530 this device. They should clean all the things. 6515 this device. They should clean all the things.
6531 6516
6532 Note that dev->reg_state stays at NETREG_REGISTERED. 6517 Note that dev->reg_state stays at NETREG_REGISTERED.
6533 This is wanted because this way 8021q and macvlan know 6518 This is wanted because this way 8021q and macvlan know
6534 the device is just moving and can keep their slaves up. 6519 the device is just moving and can keep their slaves up.
6535 */ 6520 */
6536 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 6521 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6537 rcu_barrier(); 6522 rcu_barrier();
6538 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); 6523 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6539 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); 6524 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
6540 6525
6541 /* 6526 /*
6542 * Flush the unicast and multicast chains 6527 * Flush the unicast and multicast chains
6543 */ 6528 */
6544 dev_uc_flush(dev); 6529 dev_uc_flush(dev);
6545 dev_mc_flush(dev); 6530 dev_mc_flush(dev);
6546 6531
6547 /* Send a netdev-removed uevent to the old namespace */ 6532 /* Send a netdev-removed uevent to the old namespace */
6548 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); 6533 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
6549 6534
6550 /* Actually switch the network namespace */ 6535 /* Actually switch the network namespace */
6551 dev_net_set(dev, net); 6536 dev_net_set(dev, net);
6552 6537
6553 /* If there is an ifindex conflict assign a new one */ 6538 /* If there is an ifindex conflict assign a new one */
6554 if (__dev_get_by_index(net, dev->ifindex)) { 6539 if (__dev_get_by_index(net, dev->ifindex)) {
6555 int iflink = (dev->iflink == dev->ifindex); 6540 int iflink = (dev->iflink == dev->ifindex);
6556 dev->ifindex = dev_new_index(net); 6541 dev->ifindex = dev_new_index(net);
6557 if (iflink) 6542 if (iflink)
6558 dev->iflink = dev->ifindex; 6543 dev->iflink = dev->ifindex;
6559 } 6544 }
6560 6545
6561 /* Send a netdev-add uevent to the new namespace */ 6546 /* Send a netdev-add uevent to the new namespace */
6562 kobject_uevent(&dev->dev.kobj, KOBJ_ADD); 6547 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
6563 6548
6564 /* Fixup kobjects */ 6549 /* Fixup kobjects */
6565 err = device_rename(&dev->dev, dev->name); 6550 err = device_rename(&dev->dev, dev->name);
6566 WARN_ON(err); 6551 WARN_ON(err);
6567 6552
6568 /* Add the device back in the hashes */ 6553 /* Add the device back in the hashes */
6569 list_netdevice(dev); 6554 list_netdevice(dev);
6570 6555
6571 /* Notify protocols, that a new device appeared. */ 6556 /* Notify protocols, that a new device appeared. */
6572 call_netdevice_notifiers(NETDEV_REGISTER, dev); 6557 call_netdevice_notifiers(NETDEV_REGISTER, dev);
6573 6558
6574 /* 6559 /*
6575 * Prevent userspace races by waiting until the network 6560 * Prevent userspace races by waiting until the network
6576 * device is fully setup before sending notifications. 6561 * device is fully setup before sending notifications.
6577 */ 6562 */
6578 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL); 6563 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
6579 6564
6580 synchronize_net(); 6565 synchronize_net();
6581 err = 0; 6566 err = 0;
6582 out: 6567 out:
6583 return err; 6568 return err;
6584 } 6569 }
6585 EXPORT_SYMBOL_GPL(dev_change_net_namespace); 6570 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
6586 6571
6587 static int dev_cpu_callback(struct notifier_block *nfb, 6572 static int dev_cpu_callback(struct notifier_block *nfb,
6588 unsigned long action, 6573 unsigned long action,
6589 void *ocpu) 6574 void *ocpu)
6590 { 6575 {
6591 struct sk_buff **list_skb; 6576 struct sk_buff **list_skb;
6592 struct sk_buff *skb; 6577 struct sk_buff *skb;
6593 unsigned int cpu, oldcpu = (unsigned long)ocpu; 6578 unsigned int cpu, oldcpu = (unsigned long)ocpu;
6594 struct softnet_data *sd, *oldsd; 6579 struct softnet_data *sd, *oldsd;
6595 6580
6596 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) 6581 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
6597 return NOTIFY_OK; 6582 return NOTIFY_OK;
6598 6583
6599 local_irq_disable(); 6584 local_irq_disable();
6600 cpu = smp_processor_id(); 6585 cpu = smp_processor_id();
6601 sd = &per_cpu(softnet_data, cpu); 6586 sd = &per_cpu(softnet_data, cpu);
6602 oldsd = &per_cpu(softnet_data, oldcpu); 6587 oldsd = &per_cpu(softnet_data, oldcpu);
6603 6588
6604 /* Find end of our completion_queue. */ 6589 /* Find end of our completion_queue. */
6605 list_skb = &sd->completion_queue; 6590 list_skb = &sd->completion_queue;
6606 while (*list_skb) 6591 while (*list_skb)
6607 list_skb = &(*list_skb)->next; 6592 list_skb = &(*list_skb)->next;
6608 /* Append completion queue from offline CPU. */ 6593 /* Append completion queue from offline CPU. */
6609 *list_skb = oldsd->completion_queue; 6594 *list_skb = oldsd->completion_queue;
6610 oldsd->completion_queue = NULL; 6595 oldsd->completion_queue = NULL;
6611 6596
6612 /* Append output queue from offline CPU. */ 6597 /* Append output queue from offline CPU. */
6613 if (oldsd->output_queue) { 6598 if (oldsd->output_queue) {
6614 *sd->output_queue_tailp = oldsd->output_queue; 6599 *sd->output_queue_tailp = oldsd->output_queue;
6615 sd->output_queue_tailp = oldsd->output_queue_tailp; 6600 sd->output_queue_tailp = oldsd->output_queue_tailp;
6616 oldsd->output_queue = NULL; 6601 oldsd->output_queue = NULL;
6617 oldsd->output_queue_tailp = &oldsd->output_queue; 6602 oldsd->output_queue_tailp = &oldsd->output_queue;
6618 } 6603 }
6619 /* Append NAPI poll list from offline CPU. */ 6604 /* Append NAPI poll list from offline CPU. */
6620 if (!list_empty(&oldsd->poll_list)) { 6605 if (!list_empty(&oldsd->poll_list)) {
6621 list_splice_init(&oldsd->poll_list, &sd->poll_list); 6606 list_splice_init(&oldsd->poll_list, &sd->poll_list);
6622 raise_softirq_irqoff(NET_RX_SOFTIRQ); 6607 raise_softirq_irqoff(NET_RX_SOFTIRQ);
6623 } 6608 }
6624 6609
6625 raise_softirq_irqoff(NET_TX_SOFTIRQ); 6610 raise_softirq_irqoff(NET_TX_SOFTIRQ);
6626 local_irq_enable(); 6611 local_irq_enable();
6627 6612
6628 /* Process offline CPU's input_pkt_queue */ 6613 /* Process offline CPU's input_pkt_queue */
6629 while ((skb = __skb_dequeue(&oldsd->process_queue))) { 6614 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
6630 netif_rx(skb); 6615 netif_rx(skb);
6631 input_queue_head_incr(oldsd); 6616 input_queue_head_incr(oldsd);
6632 } 6617 }
6633 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { 6618 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
6634 netif_rx(skb); 6619 netif_rx(skb);
6635 input_queue_head_incr(oldsd); 6620 input_queue_head_incr(oldsd);
6636 } 6621 }
6637 6622
6638 return NOTIFY_OK; 6623 return NOTIFY_OK;
6639 } 6624 }
6640 6625
6641 6626
6642 /** 6627 /**
6643 * netdev_increment_features - increment feature set by one 6628 * netdev_increment_features - increment feature set by one
6644 * @all: current feature set 6629 * @all: current feature set
6645 * @one: new feature set 6630 * @one: new feature set
6646 * @mask: mask feature set 6631 * @mask: mask feature set
6647 * 6632 *
6648 * Computes a new feature set after adding a device with feature set 6633 * Computes a new feature set after adding a device with feature set
6649 * @one to the master device with current feature set @all. Will not 6634 * @one to the master device with current feature set @all. Will not
6650 * enable anything that is off in @mask. Returns the new feature set. 6635 * enable anything that is off in @mask. Returns the new feature set.
6651 */ 6636 */
6652 netdev_features_t netdev_increment_features(netdev_features_t all, 6637 netdev_features_t netdev_increment_features(netdev_features_t all,
6653 netdev_features_t one, netdev_features_t mask) 6638 netdev_features_t one, netdev_features_t mask)
6654 { 6639 {
6655 if (mask & NETIF_F_GEN_CSUM) 6640 if (mask & NETIF_F_GEN_CSUM)
6656 mask |= NETIF_F_ALL_CSUM; 6641 mask |= NETIF_F_ALL_CSUM;
6657 mask |= NETIF_F_VLAN_CHALLENGED; 6642 mask |= NETIF_F_VLAN_CHALLENGED;
6658 6643
6659 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; 6644 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
6660 all &= one | ~NETIF_F_ALL_FOR_ALL; 6645 all &= one | ~NETIF_F_ALL_FOR_ALL;
6661 6646
6662 /* If one device supports hw checksumming, set for all. */ 6647 /* If one device supports hw checksumming, set for all. */
6663 if (all & NETIF_F_GEN_CSUM) 6648 if (all & NETIF_F_GEN_CSUM)
6664 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); 6649 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
6665 6650
6666 return all; 6651 return all;
6667 } 6652 }
6668 EXPORT_SYMBOL(netdev_increment_features); 6653 EXPORT_SYMBOL(netdev_increment_features);
6669 6654
6670 static struct hlist_head * __net_init netdev_create_hash(void) 6655 static struct hlist_head * __net_init netdev_create_hash(void)
6671 { 6656 {
6672 int i; 6657 int i;
6673 struct hlist_head *hash; 6658 struct hlist_head *hash;
6674 6659
6675 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL); 6660 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
6676 if (hash != NULL) 6661 if (hash != NULL)
6677 for (i = 0; i < NETDEV_HASHENTRIES; i++) 6662 for (i = 0; i < NETDEV_HASHENTRIES; i++)
6678 INIT_HLIST_HEAD(&hash[i]); 6663 INIT_HLIST_HEAD(&hash[i]);
6679 6664
6680 return hash; 6665 return hash;
6681 } 6666 }
6682 6667
6683 /* Initialize per network namespace state */ 6668 /* Initialize per network namespace state */
6684 static int __net_init netdev_init(struct net *net) 6669 static int __net_init netdev_init(struct net *net)
6685 { 6670 {
6686 if (net != &init_net) 6671 if (net != &init_net)
6687 INIT_LIST_HEAD(&net->dev_base_head); 6672 INIT_LIST_HEAD(&net->dev_base_head);
6688 6673
6689 net->dev_name_head = netdev_create_hash(); 6674 net->dev_name_head = netdev_create_hash();
6690 if (net->dev_name_head == NULL) 6675 if (net->dev_name_head == NULL)
6691 goto err_name; 6676 goto err_name;
6692 6677
6693 net->dev_index_head = netdev_create_hash(); 6678 net->dev_index_head = netdev_create_hash();
6694 if (net->dev_index_head == NULL) 6679 if (net->dev_index_head == NULL)
6695 goto err_idx; 6680 goto err_idx;
6696 6681
6697 return 0; 6682 return 0;
6698 6683
6699 err_idx: 6684 err_idx:
6700 kfree(net->dev_name_head); 6685 kfree(net->dev_name_head);
6701 err_name: 6686 err_name:
6702 return -ENOMEM; 6687 return -ENOMEM;
6703 } 6688 }
6704 6689
6705 /** 6690 /**
6706 * netdev_drivername - network driver for the device 6691 * netdev_drivername - network driver for the device
6707 * @dev: network device 6692 * @dev: network device
6708 * 6693 *
6709 * Determine network driver for device. 6694 * Determine network driver for device.
6710 */ 6695 */
6711 const char *netdev_drivername(const struct net_device *dev) 6696 const char *netdev_drivername(const struct net_device *dev)
6712 { 6697 {
6713 const struct device_driver *driver; 6698 const struct device_driver *driver;
6714 const struct device *parent; 6699 const struct device *parent;
6715 const char *empty = ""; 6700 const char *empty = "";
6716 6701
6717 parent = dev->dev.parent; 6702 parent = dev->dev.parent;
6718 if (!parent) 6703 if (!parent)
6719 return empty; 6704 return empty;
6720 6705
6721 driver = parent->driver; 6706 driver = parent->driver;
6722 if (driver && driver->name) 6707 if (driver && driver->name)
6723 return driver->name; 6708 return driver->name;
6724 return empty; 6709 return empty;
6725 } 6710 }
6726 6711
6727 static int __netdev_printk(const char *level, const struct net_device *dev, 6712 static int __netdev_printk(const char *level, const struct net_device *dev,
6728 struct va_format *vaf) 6713 struct va_format *vaf)
6729 { 6714 {
6730 int r; 6715 int r;
6731 6716
6732 if (dev && dev->dev.parent) { 6717 if (dev && dev->dev.parent) {
6733 r = dev_printk_emit(level[1] - '0', 6718 r = dev_printk_emit(level[1] - '0',
6734 dev->dev.parent, 6719 dev->dev.parent,
6735 "%s %s %s: %pV", 6720 "%s %s %s: %pV",
6736 dev_driver_string(dev->dev.parent), 6721 dev_driver_string(dev->dev.parent),
6737 dev_name(dev->dev.parent), 6722 dev_name(dev->dev.parent),
6738 netdev_name(dev), vaf); 6723 netdev_name(dev), vaf);
6739 } else if (dev) { 6724 } else if (dev) {
6740 r = printk("%s%s: %pV", level, netdev_name(dev), vaf); 6725 r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
6741 } else { 6726 } else {
6742 r = printk("%s(NULL net_device): %pV", level, vaf); 6727 r = printk("%s(NULL net_device): %pV", level, vaf);
6743 } 6728 }
6744 6729
6745 return r; 6730 return r;
6746 } 6731 }
6747 6732
6748 int netdev_printk(const char *level, const struct net_device *dev, 6733 int netdev_printk(const char *level, const struct net_device *dev,
6749 const char *format, ...) 6734 const char *format, ...)
6750 { 6735 {
6751 struct va_format vaf; 6736 struct va_format vaf;
6752 va_list args; 6737 va_list args;
6753 int r; 6738 int r;
6754 6739
6755 va_start(args, format); 6740 va_start(args, format);
6756 6741
6757 vaf.fmt = format; 6742 vaf.fmt = format;
6758 vaf.va = &args; 6743 vaf.va = &args;
6759 6744
6760 r = __netdev_printk(level, dev, &vaf); 6745 r = __netdev_printk(level, dev, &vaf);
6761 6746
6762 va_end(args); 6747 va_end(args);
6763 6748
6764 return r; 6749 return r;
6765 } 6750 }
6766 EXPORT_SYMBOL(netdev_printk); 6751 EXPORT_SYMBOL(netdev_printk);
6767 6752
6768 #define define_netdev_printk_level(func, level) \ 6753 #define define_netdev_printk_level(func, level) \
6769 int func(const struct net_device *dev, const char *fmt, ...) \ 6754 int func(const struct net_device *dev, const char *fmt, ...) \
6770 { \ 6755 { \
6771 int r; \ 6756 int r; \
6772 struct va_format vaf; \ 6757 struct va_format vaf; \
6773 va_list args; \ 6758 va_list args; \
6774 \ 6759 \
6775 va_start(args, fmt); \ 6760 va_start(args, fmt); \
6776 \ 6761 \
6777 vaf.fmt = fmt; \ 6762 vaf.fmt = fmt; \
6778 vaf.va = &args; \ 6763 vaf.va = &args; \
6779 \ 6764 \
6780 r = __netdev_printk(level, dev, &vaf); \ 6765 r = __netdev_printk(level, dev, &vaf); \
6781 \ 6766 \
6782 va_end(args); \ 6767 va_end(args); \
6783 \ 6768 \
6784 return r; \ 6769 return r; \
6785 } \ 6770 } \
6786 EXPORT_SYMBOL(func); 6771 EXPORT_SYMBOL(func);
6787 6772
6788 define_netdev_printk_level(netdev_emerg, KERN_EMERG); 6773 define_netdev_printk_level(netdev_emerg, KERN_EMERG);
6789 define_netdev_printk_level(netdev_alert, KERN_ALERT); 6774 define_netdev_printk_level(netdev_alert, KERN_ALERT);
6790 define_netdev_printk_level(netdev_crit, KERN_CRIT); 6775 define_netdev_printk_level(netdev_crit, KERN_CRIT);
6791 define_netdev_printk_level(netdev_err, KERN_ERR); 6776 define_netdev_printk_level(netdev_err, KERN_ERR);
6792 define_netdev_printk_level(netdev_warn, KERN_WARNING); 6777 define_netdev_printk_level(netdev_warn, KERN_WARNING);
6793 define_netdev_printk_level(netdev_notice, KERN_NOTICE); 6778 define_netdev_printk_level(netdev_notice, KERN_NOTICE);
6794 define_netdev_printk_level(netdev_info, KERN_INFO); 6779 define_netdev_printk_level(netdev_info, KERN_INFO);
6795 6780
6796 static void __net_exit netdev_exit(struct net *net) 6781 static void __net_exit netdev_exit(struct net *net)
6797 { 6782 {
6798 kfree(net->dev_name_head); 6783 kfree(net->dev_name_head);
6799 kfree(net->dev_index_head); 6784 kfree(net->dev_index_head);
6800 } 6785 }
6801 6786
6802 static struct pernet_operations __net_initdata netdev_net_ops = { 6787 static struct pernet_operations __net_initdata netdev_net_ops = {
6803 .init = netdev_init, 6788 .init = netdev_init,
6804 .exit = netdev_exit, 6789 .exit = netdev_exit,
6805 }; 6790 };
6806 6791
6807 static void __net_exit default_device_exit(struct net *net) 6792 static void __net_exit default_device_exit(struct net *net)
6808 { 6793 {
6809 struct net_device *dev, *aux; 6794 struct net_device *dev, *aux;
6810 /* 6795 /*
6811 * Push all migratable network devices back to the 6796 * Push all migratable network devices back to the
6812 * initial network namespace 6797 * initial network namespace
6813 */ 6798 */
6814 rtnl_lock(); 6799 rtnl_lock();
6815 for_each_netdev_safe(net, dev, aux) { 6800 for_each_netdev_safe(net, dev, aux) {
6816 int err; 6801 int err;
6817 char fb_name[IFNAMSIZ]; 6802 char fb_name[IFNAMSIZ];
6818 6803
6819 /* Ignore unmoveable devices (i.e. loopback) */ 6804 /* Ignore unmoveable devices (i.e. loopback) */
6820 if (dev->features & NETIF_F_NETNS_LOCAL) 6805 if (dev->features & NETIF_F_NETNS_LOCAL)
6821 continue; 6806 continue;
6822 6807
6823 /* Leave virtual devices for the generic cleanup */ 6808 /* Leave virtual devices for the generic cleanup */
6824 if (dev->rtnl_link_ops) 6809 if (dev->rtnl_link_ops)
6825 continue; 6810 continue;
6826 6811
6827 /* Push remaining network devices to init_net */ 6812 /* Push remaining network devices to init_net */
6828 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); 6813 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
6829 err = dev_change_net_namespace(dev, &init_net, fb_name); 6814 err = dev_change_net_namespace(dev, &init_net, fb_name);
6830 if (err) { 6815 if (err) {
6831 pr_emerg("%s: failed to move %s to init_net: %d\n", 6816 pr_emerg("%s: failed to move %s to init_net: %d\n",
6832 __func__, dev->name, err); 6817 __func__, dev->name, err);
6833 BUG(); 6818 BUG();
6834 } 6819 }
6835 } 6820 }
6836 rtnl_unlock(); 6821 rtnl_unlock();
6837 } 6822 }
6838 6823
6839 static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) 6824 static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
6840 { 6825 {
6841 /* Return with the rtnl_lock held when there are no network 6826 /* Return with the rtnl_lock held when there are no network
6842 * devices unregistering in any network namespace in net_list. 6827 * devices unregistering in any network namespace in net_list.
6843 */ 6828 */
6844 struct net *net; 6829 struct net *net;
6845 bool unregistering; 6830 bool unregistering;
6846 DEFINE_WAIT(wait); 6831 DEFINE_WAIT(wait);
6847 6832
6848 for (;;) { 6833 for (;;) {
6849 prepare_to_wait(&netdev_unregistering_wq, &wait, 6834 prepare_to_wait(&netdev_unregistering_wq, &wait,
6850 TASK_UNINTERRUPTIBLE); 6835 TASK_UNINTERRUPTIBLE);
6851 unregistering = false; 6836 unregistering = false;
6852 rtnl_lock(); 6837 rtnl_lock();
6853 list_for_each_entry(net, net_list, exit_list) { 6838 list_for_each_entry(net, net_list, exit_list) {
6854 if (net->dev_unreg_count > 0) { 6839 if (net->dev_unreg_count > 0) {
6855 unregistering = true; 6840 unregistering = true;
6856 break; 6841 break;
6857 } 6842 }
6858 } 6843 }
6859 if (!unregistering) 6844 if (!unregistering)
6860 break; 6845 break;
6861 __rtnl_unlock(); 6846 __rtnl_unlock();
6862 schedule(); 6847 schedule();
6863 } 6848 }
6864 finish_wait(&netdev_unregistering_wq, &wait); 6849 finish_wait(&netdev_unregistering_wq, &wait);
6865 } 6850 }
6866 6851
6867 static void __net_exit default_device_exit_batch(struct list_head *net_list) 6852 static void __net_exit default_device_exit_batch(struct list_head *net_list)
6868 { 6853 {
6869 /* At exit all network devices most be removed from a network 6854 /* At exit all network devices most be removed from a network
6870 * namespace. Do this in the reverse order of registration. 6855 * namespace. Do this in the reverse order of registration.
6871 * Do this across as many network namespaces as possible to 6856 * Do this across as many network namespaces as possible to
6872 * improve batching efficiency. 6857 * improve batching efficiency.
6873 */ 6858 */
6874 struct net_device *dev; 6859 struct net_device *dev;
6875 struct net *net; 6860 struct net *net;
6876 LIST_HEAD(dev_kill_list); 6861 LIST_HEAD(dev_kill_list);
6877 6862
6878 /* To prevent network device cleanup code from dereferencing 6863 /* To prevent network device cleanup code from dereferencing
6879 * loopback devices or network devices that have been freed 6864 * loopback devices or network devices that have been freed
6880 * wait here for all pending unregistrations to complete, 6865 * wait here for all pending unregistrations to complete,
6881 * before unregistring the loopback device and allowing the 6866 * before unregistring the loopback device and allowing the
6882 * network namespace be freed. 6867 * network namespace be freed.
6883 * 6868 *
6884 * The netdev todo list containing all network devices 6869 * The netdev todo list containing all network devices
6885 * unregistrations that happen in default_device_exit_batch 6870 * unregistrations that happen in default_device_exit_batch
6886 * will run in the rtnl_unlock() at the end of 6871 * will run in the rtnl_unlock() at the end of
6887 * default_device_exit_batch. 6872 * default_device_exit_batch.
6888 */ 6873 */
6889 rtnl_lock_unregistering(net_list); 6874 rtnl_lock_unregistering(net_list);
6890 list_for_each_entry(net, net_list, exit_list) { 6875 list_for_each_entry(net, net_list, exit_list) {
6891 for_each_netdev_reverse(net, dev) { 6876 for_each_netdev_reverse(net, dev) {
6892 if (dev->rtnl_link_ops) 6877 if (dev->rtnl_link_ops)
6893 dev->rtnl_link_ops->dellink(dev, &dev_kill_list); 6878 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
6894 else 6879 else
6895 unregister_netdevice_queue(dev, &dev_kill_list); 6880 unregister_netdevice_queue(dev, &dev_kill_list);
6896 } 6881 }
6897 } 6882 }
6898 unregister_netdevice_many(&dev_kill_list); 6883 unregister_netdevice_many(&dev_kill_list);
6899 list_del(&dev_kill_list); 6884 list_del(&dev_kill_list);
6900 rtnl_unlock(); 6885 rtnl_unlock();
6901 } 6886 }
6902 6887
6903 static struct pernet_operations __net_initdata default_device_ops = { 6888 static struct pernet_operations __net_initdata default_device_ops = {
6904 .exit = default_device_exit, 6889 .exit = default_device_exit,
6905 .exit_batch = default_device_exit_batch, 6890 .exit_batch = default_device_exit_batch,
6906 }; 6891 };
6907 6892
6908 /* 6893 /*
6909 * Initialize the DEV module. At boot time this walks the device list and 6894 * Initialize the DEV module. At boot time this walks the device list and
6910 * unhooks any devices that fail to initialise (normally hardware not 6895 * unhooks any devices that fail to initialise (normally hardware not
6911 * present) and leaves us with a valid list of present and active devices. 6896 * present) and leaves us with a valid list of present and active devices.
6912 * 6897 *
6913 */ 6898 */
6914 6899
6915 /* 6900 /*
6916 * This is called single threaded during boot, so no need 6901 * This is called single threaded during boot, so no need
6917 * to take the rtnl semaphore. 6902 * to take the rtnl semaphore.
6918 */ 6903 */
6919 static int __init net_dev_init(void) 6904 static int __init net_dev_init(void)
6920 { 6905 {
6921 int i, rc = -ENOMEM; 6906 int i, rc = -ENOMEM;
6922 6907
6923 BUG_ON(!dev_boot_phase); 6908 BUG_ON(!dev_boot_phase);
6924 6909
6925 if (dev_proc_init()) 6910 if (dev_proc_init())
6926 goto out; 6911 goto out;
6927 6912
6928 if (netdev_kobject_init()) 6913 if (netdev_kobject_init())
6929 goto out; 6914 goto out;
6930 6915
6931 INIT_LIST_HEAD(&ptype_all); 6916 INIT_LIST_HEAD(&ptype_all);
6932 for (i = 0; i < PTYPE_HASH_SIZE; i++) 6917 for (i = 0; i < PTYPE_HASH_SIZE; i++)
6933 INIT_LIST_HEAD(&ptype_base[i]); 6918 INIT_LIST_HEAD(&ptype_base[i]);
6934 6919
6935 INIT_LIST_HEAD(&offload_base); 6920 INIT_LIST_HEAD(&offload_base);
6936 6921
6937 if (register_pernet_subsys(&netdev_net_ops)) 6922 if (register_pernet_subsys(&netdev_net_ops))
6938 goto out; 6923 goto out;
6939 6924
6940 /* 6925 /*
6941 * Initialise the packet receive queues. 6926 * Initialise the packet receive queues.
6942 */ 6927 */
6943 6928
6944 for_each_possible_cpu(i) { 6929 for_each_possible_cpu(i) {
6945 struct softnet_data *sd = &per_cpu(softnet_data, i); 6930 struct softnet_data *sd = &per_cpu(softnet_data, i);
6946 6931
6947 memset(sd, 0, sizeof(*sd)); 6932 memset(sd, 0, sizeof(*sd));
6948 skb_queue_head_init(&sd->input_pkt_queue); 6933 skb_queue_head_init(&sd->input_pkt_queue);
6949 skb_queue_head_init(&sd->process_queue); 6934 skb_queue_head_init(&sd->process_queue);
6950 sd->completion_queue = NULL; 6935 sd->completion_queue = NULL;
6951 INIT_LIST_HEAD(&sd->poll_list); 6936 INIT_LIST_HEAD(&sd->poll_list);
6952 sd->output_queue = NULL; 6937 sd->output_queue = NULL;
6953 sd->output_queue_tailp = &sd->output_queue; 6938 sd->output_queue_tailp = &sd->output_queue;
6954 #ifdef CONFIG_RPS 6939 #ifdef CONFIG_RPS
6955 sd->csd.func = rps_trigger_softirq; 6940 sd->csd.func = rps_trigger_softirq;
6956 sd->csd.info = sd; 6941 sd->csd.info = sd;
6957 sd->csd.flags = 0; 6942 sd->csd.flags = 0;
6958 sd->cpu = i; 6943 sd->cpu = i;
6959 #endif 6944 #endif
6960 6945
6961 sd->backlog.poll = process_backlog; 6946 sd->backlog.poll = process_backlog;
6962 sd->backlog.weight = weight_p; 6947 sd->backlog.weight = weight_p;
6963 sd->backlog.gro_list = NULL; 6948 sd->backlog.gro_list = NULL;
6964 sd->backlog.gro_count = 0; 6949 sd->backlog.gro_count = 0;
6965 6950
6966 #ifdef CONFIG_NET_FLOW_LIMIT 6951 #ifdef CONFIG_NET_FLOW_LIMIT
6967 sd->flow_limit = NULL; 6952 sd->flow_limit = NULL;
6968 #endif 6953 #endif
6969 } 6954 }
6970 6955
6971 dev_boot_phase = 0; 6956 dev_boot_phase = 0;
6972 6957
6973 /* The loopback device is special if any other network devices 6958 /* The loopback device is special if any other network devices
6974 * is present in a network namespace the loopback device must 6959 * is present in a network namespace the loopback device must
6975 * be present. Since we now dynamically allocate and free the 6960 * be present. Since we now dynamically allocate and free the
6976 * loopback device ensure this invariant is maintained by 6961 * loopback device ensure this invariant is maintained by
6977 * keeping the loopback device as the first device on the 6962 * keeping the loopback device as the first device on the
6978 * list of network devices. Ensuring the loopback devices 6963 * list of network devices. Ensuring the loopback devices
6979 * is the first device that appears and the last network device 6964 * is the first device that appears and the last network device
6980 * that disappears. 6965 * that disappears.
6981 */ 6966 */
6982 if (register_pernet_device(&loopback_net_ops)) 6967 if (register_pernet_device(&loopback_net_ops))
6983 goto out; 6968 goto out;
6984 6969
6985 if (register_pernet_device(&default_device_ops)) 6970 if (register_pernet_device(&default_device_ops))
6986 goto out; 6971 goto out;
6987 6972
6988 open_softirq(NET_TX_SOFTIRQ, net_tx_action); 6973 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
6989 open_softirq(NET_RX_SOFTIRQ, net_rx_action); 6974 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
6990 6975
6991 hotcpu_notifier(dev_cpu_callback, 0); 6976 hotcpu_notifier(dev_cpu_callback, 0);
6992 dst_init(); 6977 dst_init();
6993 rc = 0; 6978 rc = 0;
6994 out: 6979 out:
6995 return rc; 6980 return rc;
6996 } 6981 }
6997 6982
6998 subsys_initcall(net_dev_init); 6983 subsys_initcall(net_dev_init);
6999 6984