Commit 12b0004d1d1e2a9aa667412d479041e403bcafae

Authored by Cong Wang
Committed by David S. Miller
1 parent 25060d8f3f

net: adjust skb_gso_segment() for calling in rx path

skb_gso_segment() is almost always called in tx path,
except for openvswitch. It calls this function when
it receives the packet and tries to queue it to user-space.
In this special case, the ->ip_summed check inside
skb_gso_segment() is no longer true, as ->ip_summed value
has different meanings on rx path.

This patch adjusts skb_gso_segment() so that we can at least
avoid such warnings on checksum.

Cc: Jesse Gross <jesse@nicira.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 3 changed files with 26 additions and 8 deletions Inline Diff

include/linux/netdevice.h
1 /* 1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX 2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket 3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level. 4 * interface as the means of communication with the user level.
5 * 5 *
6 * Definitions for the Interfaces handler. 6 * Definitions for the Interfaces handler.
7 * 7 *
8 * Version: @(#)dev.h 1.0.10 08/12/93 8 * Version: @(#)dev.h 1.0.10 08/12/93
9 * 9 *
10 * Authors: Ross Biro 10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Corey Minyard <wf-rch!minyard@relay.EU.net> 12 * Corey Minyard <wf-rch!minyard@relay.EU.net>
13 * Donald J. Becker, <becker@cesdis.gsfc.nasa.gov> 13 * Donald J. Becker, <becker@cesdis.gsfc.nasa.gov>
14 * Alan Cox, <alan@lxorguk.ukuu.org.uk> 14 * Alan Cox, <alan@lxorguk.ukuu.org.uk>
15 * Bjorn Ekwall. <bj0rn@blox.se> 15 * Bjorn Ekwall. <bj0rn@blox.se>
16 * Pekka Riikonen <priikone@poseidon.pspt.fi> 16 * Pekka Riikonen <priikone@poseidon.pspt.fi>
17 * 17 *
18 * This program is free software; you can redistribute it and/or 18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License 19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version 20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version. 21 * 2 of the License, or (at your option) any later version.
22 * 22 *
23 * Moved to /usr/include/linux for NET3 23 * Moved to /usr/include/linux for NET3
24 */ 24 */
25 #ifndef _LINUX_NETDEVICE_H 25 #ifndef _LINUX_NETDEVICE_H
26 #define _LINUX_NETDEVICE_H 26 #define _LINUX_NETDEVICE_H
27 27
28 #include <linux/pm_qos.h> 28 #include <linux/pm_qos.h>
29 #include <linux/timer.h> 29 #include <linux/timer.h>
30 #include <linux/bug.h> 30 #include <linux/bug.h>
31 #include <linux/delay.h> 31 #include <linux/delay.h>
32 #include <linux/atomic.h> 32 #include <linux/atomic.h>
33 #include <asm/cache.h> 33 #include <asm/cache.h>
34 #include <asm/byteorder.h> 34 #include <asm/byteorder.h>
35 35
36 #include <linux/percpu.h> 36 #include <linux/percpu.h>
37 #include <linux/rculist.h> 37 #include <linux/rculist.h>
38 #include <linux/dmaengine.h> 38 #include <linux/dmaengine.h>
39 #include <linux/workqueue.h> 39 #include <linux/workqueue.h>
40 #include <linux/dynamic_queue_limits.h> 40 #include <linux/dynamic_queue_limits.h>
41 41
42 #include <linux/ethtool.h> 42 #include <linux/ethtool.h>
43 #include <net/net_namespace.h> 43 #include <net/net_namespace.h>
44 #include <net/dsa.h> 44 #include <net/dsa.h>
45 #ifdef CONFIG_DCB 45 #ifdef CONFIG_DCB
46 #include <net/dcbnl.h> 46 #include <net/dcbnl.h>
47 #endif 47 #endif
48 #include <net/netprio_cgroup.h> 48 #include <net/netprio_cgroup.h>
49 49
50 #include <linux/netdev_features.h> 50 #include <linux/netdev_features.h>
51 #include <linux/neighbour.h> 51 #include <linux/neighbour.h>
52 #include <uapi/linux/netdevice.h> 52 #include <uapi/linux/netdevice.h>
53 53
54 struct netpoll_info; 54 struct netpoll_info;
55 struct device; 55 struct device;
56 struct phy_device; 56 struct phy_device;
57 /* 802.11 specific */ 57 /* 802.11 specific */
58 struct wireless_dev; 58 struct wireless_dev;
59 /* source back-compat hooks */ 59 /* source back-compat hooks */
60 #define SET_ETHTOOL_OPS(netdev,ops) \ 60 #define SET_ETHTOOL_OPS(netdev,ops) \
61 ( (netdev)->ethtool_ops = (ops) ) 61 ( (netdev)->ethtool_ops = (ops) )
62 62
63 extern void netdev_set_default_ethtool_ops(struct net_device *dev, 63 extern void netdev_set_default_ethtool_ops(struct net_device *dev,
64 const struct ethtool_ops *ops); 64 const struct ethtool_ops *ops);
65 65
66 /* hardware address assignment types */ 66 /* hardware address assignment types */
67 #define NET_ADDR_PERM 0 /* address is permanent (default) */ 67 #define NET_ADDR_PERM 0 /* address is permanent (default) */
68 #define NET_ADDR_RANDOM 1 /* address is generated randomly */ 68 #define NET_ADDR_RANDOM 1 /* address is generated randomly */
69 #define NET_ADDR_STOLEN 2 /* address is stolen from other device */ 69 #define NET_ADDR_STOLEN 2 /* address is stolen from other device */
70 #define NET_ADDR_SET 3 /* address is set using 70 #define NET_ADDR_SET 3 /* address is set using
71 * dev_set_mac_address() */ 71 * dev_set_mac_address() */
72 72
73 /* Backlog congestion levels */ 73 /* Backlog congestion levels */
74 #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ 74 #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */
75 #define NET_RX_DROP 1 /* packet dropped */ 75 #define NET_RX_DROP 1 /* packet dropped */
76 76
77 /* 77 /*
78 * Transmit return codes: transmit return codes originate from three different 78 * Transmit return codes: transmit return codes originate from three different
79 * namespaces: 79 * namespaces:
80 * 80 *
81 * - qdisc return codes 81 * - qdisc return codes
82 * - driver transmit return codes 82 * - driver transmit return codes
83 * - errno values 83 * - errno values
84 * 84 *
85 * Drivers are allowed to return any one of those in their hard_start_xmit() 85 * Drivers are allowed to return any one of those in their hard_start_xmit()
86 * function. Real network devices commonly used with qdiscs should only return 86 * function. Real network devices commonly used with qdiscs should only return
87 * the driver transmit return codes though - when qdiscs are used, the actual 87 * the driver transmit return codes though - when qdiscs are used, the actual
88 * transmission happens asynchronously, so the value is not propagated to 88 * transmission happens asynchronously, so the value is not propagated to
89 * higher layers. Virtual network devices transmit synchronously, in this case 89 * higher layers. Virtual network devices transmit synchronously, in this case
90 * the driver transmit return codes are consumed by dev_queue_xmit(), all 90 * the driver transmit return codes are consumed by dev_queue_xmit(), all
91 * others are propagated to higher layers. 91 * others are propagated to higher layers.
92 */ 92 */
93 93
94 /* qdisc ->enqueue() return codes. */ 94 /* qdisc ->enqueue() return codes. */
95 #define NET_XMIT_SUCCESS 0x00 95 #define NET_XMIT_SUCCESS 0x00
96 #define NET_XMIT_DROP 0x01 /* skb dropped */ 96 #define NET_XMIT_DROP 0x01 /* skb dropped */
97 #define NET_XMIT_CN 0x02 /* congestion notification */ 97 #define NET_XMIT_CN 0x02 /* congestion notification */
98 #define NET_XMIT_POLICED 0x03 /* skb is shot by police */ 98 #define NET_XMIT_POLICED 0x03 /* skb is shot by police */
99 #define NET_XMIT_MASK 0x0f /* qdisc flags in net/sch_generic.h */ 99 #define NET_XMIT_MASK 0x0f /* qdisc flags in net/sch_generic.h */
100 100
101 /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It 101 /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It
102 * indicates that the device will soon be dropping packets, or already drops 102 * indicates that the device will soon be dropping packets, or already drops
103 * some packets of the same priority; prompting us to send less aggressively. */ 103 * some packets of the same priority; prompting us to send less aggressively. */
104 #define net_xmit_eval(e) ((e) == NET_XMIT_CN ? 0 : (e)) 104 #define net_xmit_eval(e) ((e) == NET_XMIT_CN ? 0 : (e))
105 #define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0) 105 #define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0)
106 106
107 /* Driver transmit return codes */ 107 /* Driver transmit return codes */
108 #define NETDEV_TX_MASK 0xf0 108 #define NETDEV_TX_MASK 0xf0
109 109
110 enum netdev_tx { 110 enum netdev_tx {
111 __NETDEV_TX_MIN = INT_MIN, /* make sure enum is signed */ 111 __NETDEV_TX_MIN = INT_MIN, /* make sure enum is signed */
112 NETDEV_TX_OK = 0x00, /* driver took care of packet */ 112 NETDEV_TX_OK = 0x00, /* driver took care of packet */
113 NETDEV_TX_BUSY = 0x10, /* driver tx path was busy*/ 113 NETDEV_TX_BUSY = 0x10, /* driver tx path was busy*/
114 NETDEV_TX_LOCKED = 0x20, /* driver tx lock was already taken */ 114 NETDEV_TX_LOCKED = 0x20, /* driver tx lock was already taken */
115 }; 115 };
116 typedef enum netdev_tx netdev_tx_t; 116 typedef enum netdev_tx netdev_tx_t;
117 117
118 /* 118 /*
119 * Current order: NETDEV_TX_MASK > NET_XMIT_MASK >= 0 is significant; 119 * Current order: NETDEV_TX_MASK > NET_XMIT_MASK >= 0 is significant;
120 * hard_start_xmit() return < NET_XMIT_MASK means skb was consumed. 120 * hard_start_xmit() return < NET_XMIT_MASK means skb was consumed.
121 */ 121 */
122 static inline bool dev_xmit_complete(int rc) 122 static inline bool dev_xmit_complete(int rc)
123 { 123 {
124 /* 124 /*
125 * Positive cases with an skb consumed by a driver: 125 * Positive cases with an skb consumed by a driver:
126 * - successful transmission (rc == NETDEV_TX_OK) 126 * - successful transmission (rc == NETDEV_TX_OK)
127 * - error while transmitting (rc < 0) 127 * - error while transmitting (rc < 0)
128 * - error while queueing to a different device (rc & NET_XMIT_MASK) 128 * - error while queueing to a different device (rc & NET_XMIT_MASK)
129 */ 129 */
130 if (likely(rc < NET_XMIT_MASK)) 130 if (likely(rc < NET_XMIT_MASK))
131 return true; 131 return true;
132 132
133 return false; 133 return false;
134 } 134 }
135 135
136 /* 136 /*
137 * Compute the worst case header length according to the protocols 137 * Compute the worst case header length according to the protocols
138 * used. 138 * used.
139 */ 139 */
140 140
141 #if defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25) 141 #if defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25)
142 # if defined(CONFIG_MAC80211_MESH) 142 # if defined(CONFIG_MAC80211_MESH)
143 # define LL_MAX_HEADER 128 143 # define LL_MAX_HEADER 128
144 # else 144 # else
145 # define LL_MAX_HEADER 96 145 # define LL_MAX_HEADER 96
146 # endif 146 # endif
147 #elif IS_ENABLED(CONFIG_TR) 147 #elif IS_ENABLED(CONFIG_TR)
148 # define LL_MAX_HEADER 48 148 # define LL_MAX_HEADER 48
149 #else 149 #else
150 # define LL_MAX_HEADER 32 150 # define LL_MAX_HEADER 32
151 #endif 151 #endif
152 152
153 #if !IS_ENABLED(CONFIG_NET_IPIP) && !IS_ENABLED(CONFIG_NET_IPGRE) && \ 153 #if !IS_ENABLED(CONFIG_NET_IPIP) && !IS_ENABLED(CONFIG_NET_IPGRE) && \
154 !IS_ENABLED(CONFIG_IPV6_SIT) && !IS_ENABLED(CONFIG_IPV6_TUNNEL) 154 !IS_ENABLED(CONFIG_IPV6_SIT) && !IS_ENABLED(CONFIG_IPV6_TUNNEL)
155 #define MAX_HEADER LL_MAX_HEADER 155 #define MAX_HEADER LL_MAX_HEADER
156 #else 156 #else
157 #define MAX_HEADER (LL_MAX_HEADER + 48) 157 #define MAX_HEADER (LL_MAX_HEADER + 48)
158 #endif 158 #endif
159 159
160 /* 160 /*
161 * Old network device statistics. Fields are native words 161 * Old network device statistics. Fields are native words
162 * (unsigned long) so they can be read and written atomically. 162 * (unsigned long) so they can be read and written atomically.
163 */ 163 */
164 164
165 struct net_device_stats { 165 struct net_device_stats {
166 unsigned long rx_packets; 166 unsigned long rx_packets;
167 unsigned long tx_packets; 167 unsigned long tx_packets;
168 unsigned long rx_bytes; 168 unsigned long rx_bytes;
169 unsigned long tx_bytes; 169 unsigned long tx_bytes;
170 unsigned long rx_errors; 170 unsigned long rx_errors;
171 unsigned long tx_errors; 171 unsigned long tx_errors;
172 unsigned long rx_dropped; 172 unsigned long rx_dropped;
173 unsigned long tx_dropped; 173 unsigned long tx_dropped;
174 unsigned long multicast; 174 unsigned long multicast;
175 unsigned long collisions; 175 unsigned long collisions;
176 unsigned long rx_length_errors; 176 unsigned long rx_length_errors;
177 unsigned long rx_over_errors; 177 unsigned long rx_over_errors;
178 unsigned long rx_crc_errors; 178 unsigned long rx_crc_errors;
179 unsigned long rx_frame_errors; 179 unsigned long rx_frame_errors;
180 unsigned long rx_fifo_errors; 180 unsigned long rx_fifo_errors;
181 unsigned long rx_missed_errors; 181 unsigned long rx_missed_errors;
182 unsigned long tx_aborted_errors; 182 unsigned long tx_aborted_errors;
183 unsigned long tx_carrier_errors; 183 unsigned long tx_carrier_errors;
184 unsigned long tx_fifo_errors; 184 unsigned long tx_fifo_errors;
185 unsigned long tx_heartbeat_errors; 185 unsigned long tx_heartbeat_errors;
186 unsigned long tx_window_errors; 186 unsigned long tx_window_errors;
187 unsigned long rx_compressed; 187 unsigned long rx_compressed;
188 unsigned long tx_compressed; 188 unsigned long tx_compressed;
189 }; 189 };
190 190
191 191
192 #include <linux/cache.h> 192 #include <linux/cache.h>
193 #include <linux/skbuff.h> 193 #include <linux/skbuff.h>
194 194
195 #ifdef CONFIG_RPS 195 #ifdef CONFIG_RPS
196 #include <linux/static_key.h> 196 #include <linux/static_key.h>
197 extern struct static_key rps_needed; 197 extern struct static_key rps_needed;
198 #endif 198 #endif
199 199
200 struct neighbour; 200 struct neighbour;
201 struct neigh_parms; 201 struct neigh_parms;
202 struct sk_buff; 202 struct sk_buff;
203 203
204 struct netdev_hw_addr { 204 struct netdev_hw_addr {
205 struct list_head list; 205 struct list_head list;
206 unsigned char addr[MAX_ADDR_LEN]; 206 unsigned char addr[MAX_ADDR_LEN];
207 unsigned char type; 207 unsigned char type;
208 #define NETDEV_HW_ADDR_T_LAN 1 208 #define NETDEV_HW_ADDR_T_LAN 1
209 #define NETDEV_HW_ADDR_T_SAN 2 209 #define NETDEV_HW_ADDR_T_SAN 2
210 #define NETDEV_HW_ADDR_T_SLAVE 3 210 #define NETDEV_HW_ADDR_T_SLAVE 3
211 #define NETDEV_HW_ADDR_T_UNICAST 4 211 #define NETDEV_HW_ADDR_T_UNICAST 4
212 #define NETDEV_HW_ADDR_T_MULTICAST 5 212 #define NETDEV_HW_ADDR_T_MULTICAST 5
213 bool synced; 213 bool synced;
214 bool global_use; 214 bool global_use;
215 int refcount; 215 int refcount;
216 struct rcu_head rcu_head; 216 struct rcu_head rcu_head;
217 }; 217 };
218 218
219 struct netdev_hw_addr_list { 219 struct netdev_hw_addr_list {
220 struct list_head list; 220 struct list_head list;
221 int count; 221 int count;
222 }; 222 };
223 223
224 #define netdev_hw_addr_list_count(l) ((l)->count) 224 #define netdev_hw_addr_list_count(l) ((l)->count)
225 #define netdev_hw_addr_list_empty(l) (netdev_hw_addr_list_count(l) == 0) 225 #define netdev_hw_addr_list_empty(l) (netdev_hw_addr_list_count(l) == 0)
226 #define netdev_hw_addr_list_for_each(ha, l) \ 226 #define netdev_hw_addr_list_for_each(ha, l) \
227 list_for_each_entry(ha, &(l)->list, list) 227 list_for_each_entry(ha, &(l)->list, list)
228 228
229 #define netdev_uc_count(dev) netdev_hw_addr_list_count(&(dev)->uc) 229 #define netdev_uc_count(dev) netdev_hw_addr_list_count(&(dev)->uc)
230 #define netdev_uc_empty(dev) netdev_hw_addr_list_empty(&(dev)->uc) 230 #define netdev_uc_empty(dev) netdev_hw_addr_list_empty(&(dev)->uc)
231 #define netdev_for_each_uc_addr(ha, dev) \ 231 #define netdev_for_each_uc_addr(ha, dev) \
232 netdev_hw_addr_list_for_each(ha, &(dev)->uc) 232 netdev_hw_addr_list_for_each(ha, &(dev)->uc)
233 233
234 #define netdev_mc_count(dev) netdev_hw_addr_list_count(&(dev)->mc) 234 #define netdev_mc_count(dev) netdev_hw_addr_list_count(&(dev)->mc)
235 #define netdev_mc_empty(dev) netdev_hw_addr_list_empty(&(dev)->mc) 235 #define netdev_mc_empty(dev) netdev_hw_addr_list_empty(&(dev)->mc)
236 #define netdev_for_each_mc_addr(ha, dev) \ 236 #define netdev_for_each_mc_addr(ha, dev) \
237 netdev_hw_addr_list_for_each(ha, &(dev)->mc) 237 netdev_hw_addr_list_for_each(ha, &(dev)->mc)
238 238
239 struct hh_cache { 239 struct hh_cache {
240 u16 hh_len; 240 u16 hh_len;
241 u16 __pad; 241 u16 __pad;
242 seqlock_t hh_lock; 242 seqlock_t hh_lock;
243 243
244 /* cached hardware header; allow for machine alignment needs. */ 244 /* cached hardware header; allow for machine alignment needs. */
245 #define HH_DATA_MOD 16 245 #define HH_DATA_MOD 16
246 #define HH_DATA_OFF(__len) \ 246 #define HH_DATA_OFF(__len) \
247 (HH_DATA_MOD - (((__len - 1) & (HH_DATA_MOD - 1)) + 1)) 247 (HH_DATA_MOD - (((__len - 1) & (HH_DATA_MOD - 1)) + 1))
248 #define HH_DATA_ALIGN(__len) \ 248 #define HH_DATA_ALIGN(__len) \
249 (((__len)+(HH_DATA_MOD-1))&~(HH_DATA_MOD - 1)) 249 (((__len)+(HH_DATA_MOD-1))&~(HH_DATA_MOD - 1))
250 unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)]; 250 unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
251 }; 251 };
252 252
253 /* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much. 253 /* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much.
254 * Alternative is: 254 * Alternative is:
255 * dev->hard_header_len ? (dev->hard_header_len + 255 * dev->hard_header_len ? (dev->hard_header_len +
256 * (HH_DATA_MOD - 1)) & ~(HH_DATA_MOD - 1) : 0 256 * (HH_DATA_MOD - 1)) & ~(HH_DATA_MOD - 1) : 0
257 * 257 *
258 * We could use other alignment values, but we must maintain the 258 * We could use other alignment values, but we must maintain the
259 * relationship HH alignment <= LL alignment. 259 * relationship HH alignment <= LL alignment.
260 */ 260 */
261 #define LL_RESERVED_SPACE(dev) \ 261 #define LL_RESERVED_SPACE(dev) \
262 ((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) 262 ((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
263 #define LL_RESERVED_SPACE_EXTRA(dev,extra) \ 263 #define LL_RESERVED_SPACE_EXTRA(dev,extra) \
264 ((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) 264 ((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
265 265
266 struct header_ops { 266 struct header_ops {
267 int (*create) (struct sk_buff *skb, struct net_device *dev, 267 int (*create) (struct sk_buff *skb, struct net_device *dev,
268 unsigned short type, const void *daddr, 268 unsigned short type, const void *daddr,
269 const void *saddr, unsigned int len); 269 const void *saddr, unsigned int len);
270 int (*parse)(const struct sk_buff *skb, unsigned char *haddr); 270 int (*parse)(const struct sk_buff *skb, unsigned char *haddr);
271 int (*rebuild)(struct sk_buff *skb); 271 int (*rebuild)(struct sk_buff *skb);
272 int (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); 272 int (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type);
273 void (*cache_update)(struct hh_cache *hh, 273 void (*cache_update)(struct hh_cache *hh,
274 const struct net_device *dev, 274 const struct net_device *dev,
275 const unsigned char *haddr); 275 const unsigned char *haddr);
276 }; 276 };
277 277
278 /* These flag bits are private to the generic network queueing 278 /* These flag bits are private to the generic network queueing
279 * layer, they may not be explicitly referenced by any other 279 * layer, they may not be explicitly referenced by any other
280 * code. 280 * code.
281 */ 281 */
282 282
283 enum netdev_state_t { 283 enum netdev_state_t {
284 __LINK_STATE_START, 284 __LINK_STATE_START,
285 __LINK_STATE_PRESENT, 285 __LINK_STATE_PRESENT,
286 __LINK_STATE_NOCARRIER, 286 __LINK_STATE_NOCARRIER,
287 __LINK_STATE_LINKWATCH_PENDING, 287 __LINK_STATE_LINKWATCH_PENDING,
288 __LINK_STATE_DORMANT, 288 __LINK_STATE_DORMANT,
289 }; 289 };
290 290
291 291
292 /* 292 /*
293 * This structure holds at boot time configured netdevice settings. They 293 * This structure holds at boot time configured netdevice settings. They
294 * are then used in the device probing. 294 * are then used in the device probing.
295 */ 295 */
296 struct netdev_boot_setup { 296 struct netdev_boot_setup {
297 char name[IFNAMSIZ]; 297 char name[IFNAMSIZ];
298 struct ifmap map; 298 struct ifmap map;
299 }; 299 };
300 #define NETDEV_BOOT_SETUP_MAX 8 300 #define NETDEV_BOOT_SETUP_MAX 8
301 301
302 extern int __init netdev_boot_setup(char *str); 302 extern int __init netdev_boot_setup(char *str);
303 303
304 /* 304 /*
305 * Structure for NAPI scheduling similar to tasklet but with weighting 305 * Structure for NAPI scheduling similar to tasklet but with weighting
306 */ 306 */
307 struct napi_struct { 307 struct napi_struct {
308 /* The poll_list must only be managed by the entity which 308 /* The poll_list must only be managed by the entity which
309 * changes the state of the NAPI_STATE_SCHED bit. This means 309 * changes the state of the NAPI_STATE_SCHED bit. This means
310 * whoever atomically sets that bit can add this napi_struct 310 * whoever atomically sets that bit can add this napi_struct
311 * to the per-cpu poll_list, and whoever clears that bit 311 * to the per-cpu poll_list, and whoever clears that bit
312 * can remove from the list right before clearing the bit. 312 * can remove from the list right before clearing the bit.
313 */ 313 */
314 struct list_head poll_list; 314 struct list_head poll_list;
315 315
316 unsigned long state; 316 unsigned long state;
317 int weight; 317 int weight;
318 unsigned int gro_count; 318 unsigned int gro_count;
319 int (*poll)(struct napi_struct *, int); 319 int (*poll)(struct napi_struct *, int);
320 #ifdef CONFIG_NETPOLL 320 #ifdef CONFIG_NETPOLL
321 spinlock_t poll_lock; 321 spinlock_t poll_lock;
322 int poll_owner; 322 int poll_owner;
323 #endif 323 #endif
324 struct net_device *dev; 324 struct net_device *dev;
325 struct sk_buff *gro_list; 325 struct sk_buff *gro_list;
326 struct sk_buff *skb; 326 struct sk_buff *skb;
327 struct list_head dev_list; 327 struct list_head dev_list;
328 }; 328 };
329 329
330 enum { 330 enum {
331 NAPI_STATE_SCHED, /* Poll is scheduled */ 331 NAPI_STATE_SCHED, /* Poll is scheduled */
332 NAPI_STATE_DISABLE, /* Disable pending */ 332 NAPI_STATE_DISABLE, /* Disable pending */
333 NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ 333 NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
334 }; 334 };
335 335
336 enum gro_result { 336 enum gro_result {
337 GRO_MERGED, 337 GRO_MERGED,
338 GRO_MERGED_FREE, 338 GRO_MERGED_FREE,
339 GRO_HELD, 339 GRO_HELD,
340 GRO_NORMAL, 340 GRO_NORMAL,
341 GRO_DROP, 341 GRO_DROP,
342 }; 342 };
343 typedef enum gro_result gro_result_t; 343 typedef enum gro_result gro_result_t;
344 344
345 /* 345 /*
346 * enum rx_handler_result - Possible return values for rx_handlers. 346 * enum rx_handler_result - Possible return values for rx_handlers.
347 * @RX_HANDLER_CONSUMED: skb was consumed by rx_handler, do not process it 347 * @RX_HANDLER_CONSUMED: skb was consumed by rx_handler, do not process it
348 * further. 348 * further.
349 * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in 349 * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in
350 * case skb->dev was changed by rx_handler. 350 * case skb->dev was changed by rx_handler.
351 * @RX_HANDLER_EXACT: Force exact delivery, no wildcard. 351 * @RX_HANDLER_EXACT: Force exact delivery, no wildcard.
352 * @RX_HANDLER_PASS: Do nothing, passe the skb as if no rx_handler was called. 352 * @RX_HANDLER_PASS: Do nothing, passe the skb as if no rx_handler was called.
353 * 353 *
354 * rx_handlers are functions called from inside __netif_receive_skb(), to do 354 * rx_handlers are functions called from inside __netif_receive_skb(), to do
355 * special processing of the skb, prior to delivery to protocol handlers. 355 * special processing of the skb, prior to delivery to protocol handlers.
356 * 356 *
357 * Currently, a net_device can only have a single rx_handler registered. Trying 357 * Currently, a net_device can only have a single rx_handler registered. Trying
358 * to register a second rx_handler will return -EBUSY. 358 * to register a second rx_handler will return -EBUSY.
359 * 359 *
360 * To register a rx_handler on a net_device, use netdev_rx_handler_register(). 360 * To register a rx_handler on a net_device, use netdev_rx_handler_register().
361 * To unregister a rx_handler on a net_device, use 361 * To unregister a rx_handler on a net_device, use
362 * netdev_rx_handler_unregister(). 362 * netdev_rx_handler_unregister().
363 * 363 *
364 * Upon return, rx_handler is expected to tell __netif_receive_skb() what to 364 * Upon return, rx_handler is expected to tell __netif_receive_skb() what to
365 * do with the skb. 365 * do with the skb.
366 * 366 *
367 * If the rx_handler consumed to skb in some way, it should return 367 * If the rx_handler consumed to skb in some way, it should return
368 * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for 368 * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for
369 * the skb to be delivered in some other ways. 369 * the skb to be delivered in some other ways.
370 * 370 *
371 * If the rx_handler changed skb->dev, to divert the skb to another 371 * If the rx_handler changed skb->dev, to divert the skb to another
372 * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the 372 * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the
373 * new device will be called if it exists. 373 * new device will be called if it exists.
374 * 374 *
375 * If the rx_handler consider the skb should be ignored, it should return 375 * If the rx_handler consider the skb should be ignored, it should return
376 * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that 376 * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that
377 * are registered on exact device (ptype->dev == skb->dev). 377 * are registered on exact device (ptype->dev == skb->dev).
378 * 378 *
379 * If the rx_handler didn't changed skb->dev, but want the skb to be normally 379 * If the rx_handler didn't changed skb->dev, but want the skb to be normally
380 * delivered, it should return RX_HANDLER_PASS. 380 * delivered, it should return RX_HANDLER_PASS.
381 * 381 *
382 * A device without a registered rx_handler will behave as if rx_handler 382 * A device without a registered rx_handler will behave as if rx_handler
383 * returned RX_HANDLER_PASS. 383 * returned RX_HANDLER_PASS.
384 */ 384 */
385 385
386 enum rx_handler_result { 386 enum rx_handler_result {
387 RX_HANDLER_CONSUMED, 387 RX_HANDLER_CONSUMED,
388 RX_HANDLER_ANOTHER, 388 RX_HANDLER_ANOTHER,
389 RX_HANDLER_EXACT, 389 RX_HANDLER_EXACT,
390 RX_HANDLER_PASS, 390 RX_HANDLER_PASS,
391 }; 391 };
392 typedef enum rx_handler_result rx_handler_result_t; 392 typedef enum rx_handler_result rx_handler_result_t;
393 typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); 393 typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
394 394
395 extern void __napi_schedule(struct napi_struct *n); 395 extern void __napi_schedule(struct napi_struct *n);
396 396
397 static inline bool napi_disable_pending(struct napi_struct *n) 397 static inline bool napi_disable_pending(struct napi_struct *n)
398 { 398 {
399 return test_bit(NAPI_STATE_DISABLE, &n->state); 399 return test_bit(NAPI_STATE_DISABLE, &n->state);
400 } 400 }
401 401
402 /** 402 /**
403 * napi_schedule_prep - check if napi can be scheduled 403 * napi_schedule_prep - check if napi can be scheduled
404 * @n: napi context 404 * @n: napi context
405 * 405 *
406 * Test if NAPI routine is already running, and if not mark 406 * Test if NAPI routine is already running, and if not mark
407 * it as running. This is used as a condition variable 407 * it as running. This is used as a condition variable
408 * insure only one NAPI poll instance runs. We also make 408 * insure only one NAPI poll instance runs. We also make
409 * sure there is no pending NAPI disable. 409 * sure there is no pending NAPI disable.
410 */ 410 */
411 static inline bool napi_schedule_prep(struct napi_struct *n) 411 static inline bool napi_schedule_prep(struct napi_struct *n)
412 { 412 {
413 return !napi_disable_pending(n) && 413 return !napi_disable_pending(n) &&
414 !test_and_set_bit(NAPI_STATE_SCHED, &n->state); 414 !test_and_set_bit(NAPI_STATE_SCHED, &n->state);
415 } 415 }
416 416
417 /** 417 /**
418 * napi_schedule - schedule NAPI poll 418 * napi_schedule - schedule NAPI poll
419 * @n: napi context 419 * @n: napi context
420 * 420 *
421 * Schedule NAPI poll routine to be called if it is not already 421 * Schedule NAPI poll routine to be called if it is not already
422 * running. 422 * running.
423 */ 423 */
424 static inline void napi_schedule(struct napi_struct *n) 424 static inline void napi_schedule(struct napi_struct *n)
425 { 425 {
426 if (napi_schedule_prep(n)) 426 if (napi_schedule_prep(n))
427 __napi_schedule(n); 427 __napi_schedule(n);
428 } 428 }
429 429
430 /* Try to reschedule poll. Called by dev->poll() after napi_complete(). */ 430 /* Try to reschedule poll. Called by dev->poll() after napi_complete(). */
431 static inline bool napi_reschedule(struct napi_struct *napi) 431 static inline bool napi_reschedule(struct napi_struct *napi)
432 { 432 {
433 if (napi_schedule_prep(napi)) { 433 if (napi_schedule_prep(napi)) {
434 __napi_schedule(napi); 434 __napi_schedule(napi);
435 return true; 435 return true;
436 } 436 }
437 return false; 437 return false;
438 } 438 }
439 439
440 /** 440 /**
441 * napi_complete - NAPI processing complete 441 * napi_complete - NAPI processing complete
442 * @n: napi context 442 * @n: napi context
443 * 443 *
444 * Mark NAPI processing as complete. 444 * Mark NAPI processing as complete.
445 */ 445 */
446 extern void __napi_complete(struct napi_struct *n); 446 extern void __napi_complete(struct napi_struct *n);
447 extern void napi_complete(struct napi_struct *n); 447 extern void napi_complete(struct napi_struct *n);
448 448
449 /** 449 /**
450 * napi_disable - prevent NAPI from scheduling 450 * napi_disable - prevent NAPI from scheduling
451 * @n: napi context 451 * @n: napi context
452 * 452 *
453 * Stop NAPI from being scheduled on this context. 453 * Stop NAPI from being scheduled on this context.
454 * Waits till any outstanding processing completes. 454 * Waits till any outstanding processing completes.
455 */ 455 */
456 static inline void napi_disable(struct napi_struct *n) 456 static inline void napi_disable(struct napi_struct *n)
457 { 457 {
458 set_bit(NAPI_STATE_DISABLE, &n->state); 458 set_bit(NAPI_STATE_DISABLE, &n->state);
459 while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) 459 while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
460 msleep(1); 460 msleep(1);
461 clear_bit(NAPI_STATE_DISABLE, &n->state); 461 clear_bit(NAPI_STATE_DISABLE, &n->state);
462 } 462 }
463 463
464 /** 464 /**
465 * napi_enable - enable NAPI scheduling 465 * napi_enable - enable NAPI scheduling
466 * @n: napi context 466 * @n: napi context
467 * 467 *
468 * Resume NAPI from being scheduled on this context. 468 * Resume NAPI from being scheduled on this context.
469 * Must be paired with napi_disable. 469 * Must be paired with napi_disable.
470 */ 470 */
471 static inline void napi_enable(struct napi_struct *n) 471 static inline void napi_enable(struct napi_struct *n)
472 { 472 {
473 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); 473 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
474 smp_mb__before_clear_bit(); 474 smp_mb__before_clear_bit();
475 clear_bit(NAPI_STATE_SCHED, &n->state); 475 clear_bit(NAPI_STATE_SCHED, &n->state);
476 } 476 }
477 477
478 #ifdef CONFIG_SMP 478 #ifdef CONFIG_SMP
479 /** 479 /**
480 * napi_synchronize - wait until NAPI is not running 480 * napi_synchronize - wait until NAPI is not running
481 * @n: napi context 481 * @n: napi context
482 * 482 *
483 * Wait until NAPI is done being scheduled on this context. 483 * Wait until NAPI is done being scheduled on this context.
484 * Waits till any outstanding processing completes but 484 * Waits till any outstanding processing completes but
485 * does not disable future activations. 485 * does not disable future activations.
486 */ 486 */
487 static inline void napi_synchronize(const struct napi_struct *n) 487 static inline void napi_synchronize(const struct napi_struct *n)
488 { 488 {
489 while (test_bit(NAPI_STATE_SCHED, &n->state)) 489 while (test_bit(NAPI_STATE_SCHED, &n->state))
490 msleep(1); 490 msleep(1);
491 } 491 }
492 #else 492 #else
493 # define napi_synchronize(n) barrier() 493 # define napi_synchronize(n) barrier()
494 #endif 494 #endif
495 495
496 enum netdev_queue_state_t { 496 enum netdev_queue_state_t {
497 __QUEUE_STATE_DRV_XOFF, 497 __QUEUE_STATE_DRV_XOFF,
498 __QUEUE_STATE_STACK_XOFF, 498 __QUEUE_STATE_STACK_XOFF,
499 __QUEUE_STATE_FROZEN, 499 __QUEUE_STATE_FROZEN,
500 #define QUEUE_STATE_ANY_XOFF ((1 << __QUEUE_STATE_DRV_XOFF) | \ 500 #define QUEUE_STATE_ANY_XOFF ((1 << __QUEUE_STATE_DRV_XOFF) | \
501 (1 << __QUEUE_STATE_STACK_XOFF)) 501 (1 << __QUEUE_STATE_STACK_XOFF))
502 #define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \ 502 #define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \
503 (1 << __QUEUE_STATE_FROZEN)) 503 (1 << __QUEUE_STATE_FROZEN))
504 }; 504 };
505 /* 505 /*
506 * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue. The 506 * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue. The
507 * netif_tx_* functions below are used to manipulate this flag. The 507 * netif_tx_* functions below are used to manipulate this flag. The
508 * __QUEUE_STATE_STACK_XOFF flag is used by the stack to stop the transmit 508 * __QUEUE_STATE_STACK_XOFF flag is used by the stack to stop the transmit
509 * queue independently. The netif_xmit_*stopped functions below are called 509 * queue independently. The netif_xmit_*stopped functions below are called
510 * to check if the queue has been stopped by the driver or stack (either 510 * to check if the queue has been stopped by the driver or stack (either
511 * of the XOFF bits are set in the state). Drivers should not need to call 511 * of the XOFF bits are set in the state). Drivers should not need to call
512 * netif_xmit*stopped functions, they should only be using netif_tx_*. 512 * netif_xmit*stopped functions, they should only be using netif_tx_*.
513 */ 513 */
514 514
515 struct netdev_queue { 515 struct netdev_queue {
516 /* 516 /*
517 * read mostly part 517 * read mostly part
518 */ 518 */
519 struct net_device *dev; 519 struct net_device *dev;
520 struct Qdisc *qdisc; 520 struct Qdisc *qdisc;
521 struct Qdisc *qdisc_sleeping; 521 struct Qdisc *qdisc_sleeping;
522 #ifdef CONFIG_SYSFS 522 #ifdef CONFIG_SYSFS
523 struct kobject kobj; 523 struct kobject kobj;
524 #endif 524 #endif
525 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) 525 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
526 int numa_node; 526 int numa_node;
527 #endif 527 #endif
528 /* 528 /*
529 * write mostly part 529 * write mostly part
530 */ 530 */
531 spinlock_t _xmit_lock ____cacheline_aligned_in_smp; 531 spinlock_t _xmit_lock ____cacheline_aligned_in_smp;
532 int xmit_lock_owner; 532 int xmit_lock_owner;
533 /* 533 /*
534 * please use this field instead of dev->trans_start 534 * please use this field instead of dev->trans_start
535 */ 535 */
536 unsigned long trans_start; 536 unsigned long trans_start;
537 537
538 /* 538 /*
539 * Number of TX timeouts for this queue 539 * Number of TX timeouts for this queue
540 * (/sys/class/net/DEV/Q/trans_timeout) 540 * (/sys/class/net/DEV/Q/trans_timeout)
541 */ 541 */
542 unsigned long trans_timeout; 542 unsigned long trans_timeout;
543 543
544 unsigned long state; 544 unsigned long state;
545 545
546 #ifdef CONFIG_BQL 546 #ifdef CONFIG_BQL
547 struct dql dql; 547 struct dql dql;
548 #endif 548 #endif
549 } ____cacheline_aligned_in_smp; 549 } ____cacheline_aligned_in_smp;
550 550
551 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) 551 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
552 { 552 {
553 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) 553 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
554 return q->numa_node; 554 return q->numa_node;
555 #else 555 #else
556 return NUMA_NO_NODE; 556 return NUMA_NO_NODE;
557 #endif 557 #endif
558 } 558 }
559 559
560 static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node) 560 static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node)
561 { 561 {
562 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) 562 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
563 q->numa_node = node; 563 q->numa_node = node;
564 #endif 564 #endif
565 } 565 }
566 566
567 #ifdef CONFIG_RPS 567 #ifdef CONFIG_RPS
568 /* 568 /*
569 * This structure holds an RPS map which can be of variable length. The 569 * This structure holds an RPS map which can be of variable length. The
570 * map is an array of CPUs. 570 * map is an array of CPUs.
571 */ 571 */
572 struct rps_map { 572 struct rps_map {
573 unsigned int len; 573 unsigned int len;
574 struct rcu_head rcu; 574 struct rcu_head rcu;
575 u16 cpus[0]; 575 u16 cpus[0];
576 }; 576 };
577 #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) 577 #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
578 578
579 /* 579 /*
580 * The rps_dev_flow structure contains the mapping of a flow to a CPU, the 580 * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
581 * tail pointer for that CPU's input queue at the time of last enqueue, and 581 * tail pointer for that CPU's input queue at the time of last enqueue, and
582 * a hardware filter index. 582 * a hardware filter index.
583 */ 583 */
584 struct rps_dev_flow { 584 struct rps_dev_flow {
585 u16 cpu; 585 u16 cpu;
586 u16 filter; 586 u16 filter;
587 unsigned int last_qtail; 587 unsigned int last_qtail;
588 }; 588 };
589 #define RPS_NO_FILTER 0xffff 589 #define RPS_NO_FILTER 0xffff
590 590
591 /* 591 /*
592 * The rps_dev_flow_table structure contains a table of flow mappings. 592 * The rps_dev_flow_table structure contains a table of flow mappings.
593 */ 593 */
594 struct rps_dev_flow_table { 594 struct rps_dev_flow_table {
595 unsigned int mask; 595 unsigned int mask;
596 struct rcu_head rcu; 596 struct rcu_head rcu;
597 struct work_struct free_work; 597 struct work_struct free_work;
598 struct rps_dev_flow flows[0]; 598 struct rps_dev_flow flows[0];
599 }; 599 };
600 #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ 600 #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
601 ((_num) * sizeof(struct rps_dev_flow))) 601 ((_num) * sizeof(struct rps_dev_flow)))
602 602
603 /* 603 /*
604 * The rps_sock_flow_table contains mappings of flows to the last CPU 604 * The rps_sock_flow_table contains mappings of flows to the last CPU
605 * on which they were processed by the application (set in recvmsg). 605 * on which they were processed by the application (set in recvmsg).
606 */ 606 */
607 struct rps_sock_flow_table { 607 struct rps_sock_flow_table {
608 unsigned int mask; 608 unsigned int mask;
609 u16 ents[0]; 609 u16 ents[0];
610 }; 610 };
611 #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \ 611 #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \
612 ((_num) * sizeof(u16))) 612 ((_num) * sizeof(u16)))
613 613
614 #define RPS_NO_CPU 0xffff 614 #define RPS_NO_CPU 0xffff
615 615
616 static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, 616 static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
617 u32 hash) 617 u32 hash)
618 { 618 {
619 if (table && hash) { 619 if (table && hash) {
620 unsigned int cpu, index = hash & table->mask; 620 unsigned int cpu, index = hash & table->mask;
621 621
622 /* We only give a hint, preemption can change cpu under us */ 622 /* We only give a hint, preemption can change cpu under us */
623 cpu = raw_smp_processor_id(); 623 cpu = raw_smp_processor_id();
624 624
625 if (table->ents[index] != cpu) 625 if (table->ents[index] != cpu)
626 table->ents[index] = cpu; 626 table->ents[index] = cpu;
627 } 627 }
628 } 628 }
629 629
630 static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table, 630 static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,
631 u32 hash) 631 u32 hash)
632 { 632 {
633 if (table && hash) 633 if (table && hash)
634 table->ents[hash & table->mask] = RPS_NO_CPU; 634 table->ents[hash & table->mask] = RPS_NO_CPU;
635 } 635 }
636 636
637 extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; 637 extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
638 638
639 #ifdef CONFIG_RFS_ACCEL 639 #ifdef CONFIG_RFS_ACCEL
640 extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, 640 extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
641 u32 flow_id, u16 filter_id); 641 u32 flow_id, u16 filter_id);
642 #endif 642 #endif
643 643
644 /* This structure contains an instance of an RX queue. */ 644 /* This structure contains an instance of an RX queue. */
645 struct netdev_rx_queue { 645 struct netdev_rx_queue {
646 struct rps_map __rcu *rps_map; 646 struct rps_map __rcu *rps_map;
647 struct rps_dev_flow_table __rcu *rps_flow_table; 647 struct rps_dev_flow_table __rcu *rps_flow_table;
648 struct kobject kobj; 648 struct kobject kobj;
649 struct net_device *dev; 649 struct net_device *dev;
650 } ____cacheline_aligned_in_smp; 650 } ____cacheline_aligned_in_smp;
651 #endif /* CONFIG_RPS */ 651 #endif /* CONFIG_RPS */
652 652
653 #ifdef CONFIG_XPS 653 #ifdef CONFIG_XPS
654 /* 654 /*
655 * This structure holds an XPS map which can be of variable length. The 655 * This structure holds an XPS map which can be of variable length. The
656 * map is an array of queues. 656 * map is an array of queues.
657 */ 657 */
658 struct xps_map { 658 struct xps_map {
659 unsigned int len; 659 unsigned int len;
660 unsigned int alloc_len; 660 unsigned int alloc_len;
661 struct rcu_head rcu; 661 struct rcu_head rcu;
662 u16 queues[0]; 662 u16 queues[0];
663 }; 663 };
664 #define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16))) 664 #define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16)))
665 #define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map)) \ 665 #define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map)) \
666 / sizeof(u16)) 666 / sizeof(u16))
667 667
668 /* 668 /*
669 * This structure holds all XPS maps for device. Maps are indexed by CPU. 669 * This structure holds all XPS maps for device. Maps are indexed by CPU.
670 */ 670 */
671 struct xps_dev_maps { 671 struct xps_dev_maps {
672 struct rcu_head rcu; 672 struct rcu_head rcu;
673 struct xps_map __rcu *cpu_map[0]; 673 struct xps_map __rcu *cpu_map[0];
674 }; 674 };
675 #define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ 675 #define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \
676 (nr_cpu_ids * sizeof(struct xps_map *))) 676 (nr_cpu_ids * sizeof(struct xps_map *)))
677 #endif /* CONFIG_XPS */ 677 #endif /* CONFIG_XPS */
678 678
679 #define TC_MAX_QUEUE 16 679 #define TC_MAX_QUEUE 16
680 #define TC_BITMASK 15 680 #define TC_BITMASK 15
681 /* HW offloaded queuing disciplines txq count and offset maps */ 681 /* HW offloaded queuing disciplines txq count and offset maps */
682 struct netdev_tc_txq { 682 struct netdev_tc_txq {
683 u16 count; 683 u16 count;
684 u16 offset; 684 u16 offset;
685 }; 685 };
686 686
687 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 687 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
688 /* 688 /*
689 * This structure is to hold information about the device 689 * This structure is to hold information about the device
690 * configured to run FCoE protocol stack. 690 * configured to run FCoE protocol stack.
691 */ 691 */
692 struct netdev_fcoe_hbainfo { 692 struct netdev_fcoe_hbainfo {
693 char manufacturer[64]; 693 char manufacturer[64];
694 char serial_number[64]; 694 char serial_number[64];
695 char hardware_version[64]; 695 char hardware_version[64];
696 char driver_version[64]; 696 char driver_version[64];
697 char optionrom_version[64]; 697 char optionrom_version[64];
698 char firmware_version[64]; 698 char firmware_version[64];
699 char model[256]; 699 char model[256];
700 char model_description[256]; 700 char model_description[256];
701 }; 701 };
702 #endif 702 #endif
703 703
704 /* 704 /*
705 * This structure defines the management hooks for network devices. 705 * This structure defines the management hooks for network devices.
706 * The following hooks can be defined; unless noted otherwise, they are 706 * The following hooks can be defined; unless noted otherwise, they are
707 * optional and can be filled with a null pointer. 707 * optional and can be filled with a null pointer.
708 * 708 *
709 * int (*ndo_init)(struct net_device *dev); 709 * int (*ndo_init)(struct net_device *dev);
710 * This function is called once when network device is registered. 710 * This function is called once when network device is registered.
711 * The network device can use this to any late stage initializaton 711 * The network device can use this to any late stage initializaton
712 * or semantic validattion. It can fail with an error code which will 712 * or semantic validattion. It can fail with an error code which will
713 * be propogated back to register_netdev 713 * be propogated back to register_netdev
714 * 714 *
715 * void (*ndo_uninit)(struct net_device *dev); 715 * void (*ndo_uninit)(struct net_device *dev);
716 * This function is called when device is unregistered or when registration 716 * This function is called when device is unregistered or when registration
717 * fails. It is not called if init fails. 717 * fails. It is not called if init fails.
718 * 718 *
719 * int (*ndo_open)(struct net_device *dev); 719 * int (*ndo_open)(struct net_device *dev);
720 * This function is called when network device transistions to the up 720 * This function is called when network device transistions to the up
721 * state. 721 * state.
722 * 722 *
723 * int (*ndo_stop)(struct net_device *dev); 723 * int (*ndo_stop)(struct net_device *dev);
724 * This function is called when network device transistions to the down 724 * This function is called when network device transistions to the down
725 * state. 725 * state.
726 * 726 *
727 * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, 727 * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb,
728 * struct net_device *dev); 728 * struct net_device *dev);
729 * Called when a packet needs to be transmitted. 729 * Called when a packet needs to be transmitted.
730 * Must return NETDEV_TX_OK , NETDEV_TX_BUSY. 730 * Must return NETDEV_TX_OK , NETDEV_TX_BUSY.
731 * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) 731 * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
732 * Required can not be NULL. 732 * Required can not be NULL.
733 * 733 *
734 * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb); 734 * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb);
735 * Called to decide which queue to when device supports multiple 735 * Called to decide which queue to when device supports multiple
736 * transmit queues. 736 * transmit queues.
737 * 737 *
738 * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); 738 * void (*ndo_change_rx_flags)(struct net_device *dev, int flags);
739 * This function is called to allow device receiver to make 739 * This function is called to allow device receiver to make
740 * changes to configuration when multicast or promiscious is enabled. 740 * changes to configuration when multicast or promiscious is enabled.
741 * 741 *
742 * void (*ndo_set_rx_mode)(struct net_device *dev); 742 * void (*ndo_set_rx_mode)(struct net_device *dev);
743 * This function is called device changes address list filtering. 743 * This function is called device changes address list filtering.
744 * If driver handles unicast address filtering, it should set 744 * If driver handles unicast address filtering, it should set
745 * IFF_UNICAST_FLT to its priv_flags. 745 * IFF_UNICAST_FLT to its priv_flags.
746 * 746 *
747 * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); 747 * int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
748 * This function is called when the Media Access Control address 748 * This function is called when the Media Access Control address
749 * needs to be changed. If this interface is not defined, the 749 * needs to be changed. If this interface is not defined, the
750 * mac address can not be changed. 750 * mac address can not be changed.
751 * 751 *
752 * int (*ndo_validate_addr)(struct net_device *dev); 752 * int (*ndo_validate_addr)(struct net_device *dev);
753 * Test if Media Access Control address is valid for the device. 753 * Test if Media Access Control address is valid for the device.
754 * 754 *
755 * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); 755 * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
756 * Called when a user request an ioctl which can't be handled by 756 * Called when a user request an ioctl which can't be handled by
757 * the generic interface code. If not defined ioctl's return 757 * the generic interface code. If not defined ioctl's return
758 * not supported error code. 758 * not supported error code.
759 * 759 *
760 * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); 760 * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map);
761 * Used to set network devices bus interface parameters. This interface 761 * Used to set network devices bus interface parameters. This interface
762 * is retained for legacy reason, new devices should use the bus 762 * is retained for legacy reason, new devices should use the bus
763 * interface (PCI) for low level management. 763 * interface (PCI) for low level management.
764 * 764 *
765 * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); 765 * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu);
766 * Called when a user wants to change the Maximum Transfer Unit 766 * Called when a user wants to change the Maximum Transfer Unit
767 * of a device. If not defined, any request to change MTU will 767 * of a device. If not defined, any request to change MTU will
768 * will return an error. 768 * will return an error.
769 * 769 *
770 * void (*ndo_tx_timeout)(struct net_device *dev); 770 * void (*ndo_tx_timeout)(struct net_device *dev);
771 * Callback uses when the transmitter has not made any progress 771 * Callback uses when the transmitter has not made any progress
772 * for dev->watchdog ticks. 772 * for dev->watchdog ticks.
773 * 773 *
774 * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, 774 * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
775 * struct rtnl_link_stats64 *storage); 775 * struct rtnl_link_stats64 *storage);
776 * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); 776 * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
777 * Called when a user wants to get the network device usage 777 * Called when a user wants to get the network device usage
778 * statistics. Drivers must do one of the following: 778 * statistics. Drivers must do one of the following:
779 * 1. Define @ndo_get_stats64 to fill in a zero-initialised 779 * 1. Define @ndo_get_stats64 to fill in a zero-initialised
780 * rtnl_link_stats64 structure passed by the caller. 780 * rtnl_link_stats64 structure passed by the caller.
781 * 2. Define @ndo_get_stats to update a net_device_stats structure 781 * 2. Define @ndo_get_stats to update a net_device_stats structure
782 * (which should normally be dev->stats) and return a pointer to 782 * (which should normally be dev->stats) and return a pointer to
783 * it. The structure may be changed asynchronously only if each 783 * it. The structure may be changed asynchronously only if each
784 * field is written atomically. 784 * field is written atomically.
785 * 3. Update dev->stats asynchronously and atomically, and define 785 * 3. Update dev->stats asynchronously and atomically, and define
786 * neither operation. 786 * neither operation.
787 * 787 *
788 * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid); 788 * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid);
789 * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) 789 * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER)
790 * this function is called when a VLAN id is registered. 790 * this function is called when a VLAN id is registered.
791 * 791 *
792 * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); 792 * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid);
793 * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) 793 * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER)
794 * this function is called when a VLAN id is unregistered. 794 * this function is called when a VLAN id is unregistered.
795 * 795 *
796 * void (*ndo_poll_controller)(struct net_device *dev); 796 * void (*ndo_poll_controller)(struct net_device *dev);
797 * 797 *
798 * SR-IOV management functions. 798 * SR-IOV management functions.
799 * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); 799 * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
800 * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos); 800 * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
801 * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate); 801 * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
802 * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); 802 * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
803 * int (*ndo_get_vf_config)(struct net_device *dev, 803 * int (*ndo_get_vf_config)(struct net_device *dev,
804 * int vf, struct ifla_vf_info *ivf); 804 * int vf, struct ifla_vf_info *ivf);
805 * int (*ndo_set_vf_port)(struct net_device *dev, int vf, 805 * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
806 * struct nlattr *port[]); 806 * struct nlattr *port[]);
807 * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); 807 * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
808 * int (*ndo_setup_tc)(struct net_device *dev, u8 tc) 808 * int (*ndo_setup_tc)(struct net_device *dev, u8 tc)
809 * Called to setup 'tc' number of traffic classes in the net device. This 809 * Called to setup 'tc' number of traffic classes in the net device. This
810 * is always called from the stack with the rtnl lock held and netif tx 810 * is always called from the stack with the rtnl lock held and netif tx
811 * queues stopped. This allows the netdevice to perform queue management 811 * queues stopped. This allows the netdevice to perform queue management
812 * safely. 812 * safely.
813 * 813 *
814 * Fiber Channel over Ethernet (FCoE) offload functions. 814 * Fiber Channel over Ethernet (FCoE) offload functions.
815 * int (*ndo_fcoe_enable)(struct net_device *dev); 815 * int (*ndo_fcoe_enable)(struct net_device *dev);
816 * Called when the FCoE protocol stack wants to start using LLD for FCoE 816 * Called when the FCoE protocol stack wants to start using LLD for FCoE
817 * so the underlying device can perform whatever needed configuration or 817 * so the underlying device can perform whatever needed configuration or
818 * initialization to support acceleration of FCoE traffic. 818 * initialization to support acceleration of FCoE traffic.
819 * 819 *
820 * int (*ndo_fcoe_disable)(struct net_device *dev); 820 * int (*ndo_fcoe_disable)(struct net_device *dev);
821 * Called when the FCoE protocol stack wants to stop using LLD for FCoE 821 * Called when the FCoE protocol stack wants to stop using LLD for FCoE
822 * so the underlying device can perform whatever needed clean-ups to 822 * so the underlying device can perform whatever needed clean-ups to
823 * stop supporting acceleration of FCoE traffic. 823 * stop supporting acceleration of FCoE traffic.
824 * 824 *
825 * int (*ndo_fcoe_ddp_setup)(struct net_device *dev, u16 xid, 825 * int (*ndo_fcoe_ddp_setup)(struct net_device *dev, u16 xid,
826 * struct scatterlist *sgl, unsigned int sgc); 826 * struct scatterlist *sgl, unsigned int sgc);
827 * Called when the FCoE Initiator wants to initialize an I/O that 827 * Called when the FCoE Initiator wants to initialize an I/O that
828 * is a possible candidate for Direct Data Placement (DDP). The LLD can 828 * is a possible candidate for Direct Data Placement (DDP). The LLD can
829 * perform necessary setup and returns 1 to indicate the device is set up 829 * perform necessary setup and returns 1 to indicate the device is set up
830 * successfully to perform DDP on this I/O, otherwise this returns 0. 830 * successfully to perform DDP on this I/O, otherwise this returns 0.
831 * 831 *
832 * int (*ndo_fcoe_ddp_done)(struct net_device *dev, u16 xid); 832 * int (*ndo_fcoe_ddp_done)(struct net_device *dev, u16 xid);
833 * Called when the FCoE Initiator/Target is done with the DDPed I/O as 833 * Called when the FCoE Initiator/Target is done with the DDPed I/O as
834 * indicated by the FC exchange id 'xid', so the underlying device can 834 * indicated by the FC exchange id 'xid', so the underlying device can
835 * clean up and reuse resources for later DDP requests. 835 * clean up and reuse resources for later DDP requests.
836 * 836 *
837 * int (*ndo_fcoe_ddp_target)(struct net_device *dev, u16 xid, 837 * int (*ndo_fcoe_ddp_target)(struct net_device *dev, u16 xid,
838 * struct scatterlist *sgl, unsigned int sgc); 838 * struct scatterlist *sgl, unsigned int sgc);
839 * Called when the FCoE Target wants to initialize an I/O that 839 * Called when the FCoE Target wants to initialize an I/O that
840 * is a possible candidate for Direct Data Placement (DDP). The LLD can 840 * is a possible candidate for Direct Data Placement (DDP). The LLD can
841 * perform necessary setup and returns 1 to indicate the device is set up 841 * perform necessary setup and returns 1 to indicate the device is set up
842 * successfully to perform DDP on this I/O, otherwise this returns 0. 842 * successfully to perform DDP on this I/O, otherwise this returns 0.
843 * 843 *
844 * int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, 844 * int (*ndo_fcoe_get_hbainfo)(struct net_device *dev,
845 * struct netdev_fcoe_hbainfo *hbainfo); 845 * struct netdev_fcoe_hbainfo *hbainfo);
846 * Called when the FCoE Protocol stack wants information on the underlying 846 * Called when the FCoE Protocol stack wants information on the underlying
847 * device. This information is utilized by the FCoE protocol stack to 847 * device. This information is utilized by the FCoE protocol stack to
848 * register attributes with Fiber Channel management service as per the 848 * register attributes with Fiber Channel management service as per the
849 * FC-GS Fabric Device Management Information(FDMI) specification. 849 * FC-GS Fabric Device Management Information(FDMI) specification.
850 * 850 *
851 * int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type); 851 * int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type);
852 * Called when the underlying device wants to override default World Wide 852 * Called when the underlying device wants to override default World Wide
853 * Name (WWN) generation mechanism in FCoE protocol stack to pass its own 853 * Name (WWN) generation mechanism in FCoE protocol stack to pass its own
854 * World Wide Port Name (WWPN) or World Wide Node Name (WWNN) to the FCoE 854 * World Wide Port Name (WWPN) or World Wide Node Name (WWNN) to the FCoE
855 * protocol stack to use. 855 * protocol stack to use.
856 * 856 *
857 * RFS acceleration. 857 * RFS acceleration.
858 * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb, 858 * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb,
859 * u16 rxq_index, u32 flow_id); 859 * u16 rxq_index, u32 flow_id);
860 * Set hardware filter for RFS. rxq_index is the target queue index; 860 * Set hardware filter for RFS. rxq_index is the target queue index;
861 * flow_id is a flow ID to be passed to rps_may_expire_flow() later. 861 * flow_id is a flow ID to be passed to rps_may_expire_flow() later.
862 * Return the filter ID on success, or a negative error code. 862 * Return the filter ID on success, or a negative error code.
863 * 863 *
864 * Slave management functions (for bridge, bonding, etc). 864 * Slave management functions (for bridge, bonding, etc).
865 * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev); 865 * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev);
866 * Called to make another netdev an underling. 866 * Called to make another netdev an underling.
867 * 867 *
868 * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); 868 * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev);
869 * Called to release previously enslaved netdev. 869 * Called to release previously enslaved netdev.
870 * 870 *
871 * Feature/offload setting functions. 871 * Feature/offload setting functions.
872 * netdev_features_t (*ndo_fix_features)(struct net_device *dev, 872 * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
873 * netdev_features_t features); 873 * netdev_features_t features);
874 * Adjusts the requested feature flags according to device-specific 874 * Adjusts the requested feature flags according to device-specific
875 * constraints, and returns the resulting flags. Must not modify 875 * constraints, and returns the resulting flags. Must not modify
876 * the device state. 876 * the device state.
877 * 877 *
878 * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); 878 * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features);
879 * Called to update device configuration to new features. Passed 879 * Called to update device configuration to new features. Passed
880 * feature set might be less than what was returned by ndo_fix_features()). 880 * feature set might be less than what was returned by ndo_fix_features()).
881 * Must return >0 or -errno if it changed dev->features itself. 881 * Must return >0 or -errno if it changed dev->features itself.
882 * 882 *
883 * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], 883 * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[],
884 * struct net_device *dev, 884 * struct net_device *dev,
885 * const unsigned char *addr, u16 flags) 885 * const unsigned char *addr, u16 flags)
886 * Adds an FDB entry to dev for addr. 886 * Adds an FDB entry to dev for addr.
887 * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev, 887 * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev,
888 * const unsigned char *addr) 888 * const unsigned char *addr)
889 * Deletes the FDB entry from dev coresponding to addr. 889 * Deletes the FDB entry from dev coresponding to addr.
890 * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, 890 * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
891 * struct net_device *dev, int idx) 891 * struct net_device *dev, int idx)
892 * Used to add FDB entries to dump requests. Implementers should add 892 * Used to add FDB entries to dump requests. Implementers should add
893 * entries to skb and update idx with the number of entries. 893 * entries to skb and update idx with the number of entries.
894 * 894 *
895 * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh) 895 * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh)
896 * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, 896 * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq,
897 * struct net_device *dev) 897 * struct net_device *dev)
898 * 898 *
899 * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); 899 * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
900 * Called to change device carrier. Soft-devices (like dummy, team, etc) 900 * Called to change device carrier. Soft-devices (like dummy, team, etc)
901 * which do not represent real hardware may define this to allow their 901 * which do not represent real hardware may define this to allow their
902 * userspace components to manage their virtual carrier state. Devices 902 * userspace components to manage their virtual carrier state. Devices
903 * that determine carrier state from physical hardware properties (eg 903 * that determine carrier state from physical hardware properties (eg
904 * network cables) or protocol-dependent mechanisms (eg 904 * network cables) or protocol-dependent mechanisms (eg
905 * USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function. 905 * USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function.
906 */ 906 */
907 struct net_device_ops { 907 struct net_device_ops {
908 int (*ndo_init)(struct net_device *dev); 908 int (*ndo_init)(struct net_device *dev);
909 void (*ndo_uninit)(struct net_device *dev); 909 void (*ndo_uninit)(struct net_device *dev);
910 int (*ndo_open)(struct net_device *dev); 910 int (*ndo_open)(struct net_device *dev);
911 int (*ndo_stop)(struct net_device *dev); 911 int (*ndo_stop)(struct net_device *dev);
912 netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb, 912 netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb,
913 struct net_device *dev); 913 struct net_device *dev);
914 u16 (*ndo_select_queue)(struct net_device *dev, 914 u16 (*ndo_select_queue)(struct net_device *dev,
915 struct sk_buff *skb); 915 struct sk_buff *skb);
916 void (*ndo_change_rx_flags)(struct net_device *dev, 916 void (*ndo_change_rx_flags)(struct net_device *dev,
917 int flags); 917 int flags);
918 void (*ndo_set_rx_mode)(struct net_device *dev); 918 void (*ndo_set_rx_mode)(struct net_device *dev);
919 int (*ndo_set_mac_address)(struct net_device *dev, 919 int (*ndo_set_mac_address)(struct net_device *dev,
920 void *addr); 920 void *addr);
921 int (*ndo_validate_addr)(struct net_device *dev); 921 int (*ndo_validate_addr)(struct net_device *dev);
922 int (*ndo_do_ioctl)(struct net_device *dev, 922 int (*ndo_do_ioctl)(struct net_device *dev,
923 struct ifreq *ifr, int cmd); 923 struct ifreq *ifr, int cmd);
924 int (*ndo_set_config)(struct net_device *dev, 924 int (*ndo_set_config)(struct net_device *dev,
925 struct ifmap *map); 925 struct ifmap *map);
926 int (*ndo_change_mtu)(struct net_device *dev, 926 int (*ndo_change_mtu)(struct net_device *dev,
927 int new_mtu); 927 int new_mtu);
928 int (*ndo_neigh_setup)(struct net_device *dev, 928 int (*ndo_neigh_setup)(struct net_device *dev,
929 struct neigh_parms *); 929 struct neigh_parms *);
930 void (*ndo_tx_timeout) (struct net_device *dev); 930 void (*ndo_tx_timeout) (struct net_device *dev);
931 931
932 struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, 932 struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
933 struct rtnl_link_stats64 *storage); 933 struct rtnl_link_stats64 *storage);
934 struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); 934 struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
935 935
936 int (*ndo_vlan_rx_add_vid)(struct net_device *dev, 936 int (*ndo_vlan_rx_add_vid)(struct net_device *dev,
937 unsigned short vid); 937 unsigned short vid);
938 int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, 938 int (*ndo_vlan_rx_kill_vid)(struct net_device *dev,
939 unsigned short vid); 939 unsigned short vid);
940 #ifdef CONFIG_NET_POLL_CONTROLLER 940 #ifdef CONFIG_NET_POLL_CONTROLLER
941 void (*ndo_poll_controller)(struct net_device *dev); 941 void (*ndo_poll_controller)(struct net_device *dev);
942 int (*ndo_netpoll_setup)(struct net_device *dev, 942 int (*ndo_netpoll_setup)(struct net_device *dev,
943 struct netpoll_info *info, 943 struct netpoll_info *info,
944 gfp_t gfp); 944 gfp_t gfp);
945 void (*ndo_netpoll_cleanup)(struct net_device *dev); 945 void (*ndo_netpoll_cleanup)(struct net_device *dev);
946 #endif 946 #endif
947 int (*ndo_set_vf_mac)(struct net_device *dev, 947 int (*ndo_set_vf_mac)(struct net_device *dev,
948 int queue, u8 *mac); 948 int queue, u8 *mac);
949 int (*ndo_set_vf_vlan)(struct net_device *dev, 949 int (*ndo_set_vf_vlan)(struct net_device *dev,
950 int queue, u16 vlan, u8 qos); 950 int queue, u16 vlan, u8 qos);
951 int (*ndo_set_vf_tx_rate)(struct net_device *dev, 951 int (*ndo_set_vf_tx_rate)(struct net_device *dev,
952 int vf, int rate); 952 int vf, int rate);
953 int (*ndo_set_vf_spoofchk)(struct net_device *dev, 953 int (*ndo_set_vf_spoofchk)(struct net_device *dev,
954 int vf, bool setting); 954 int vf, bool setting);
955 int (*ndo_get_vf_config)(struct net_device *dev, 955 int (*ndo_get_vf_config)(struct net_device *dev,
956 int vf, 956 int vf,
957 struct ifla_vf_info *ivf); 957 struct ifla_vf_info *ivf);
958 int (*ndo_set_vf_port)(struct net_device *dev, 958 int (*ndo_set_vf_port)(struct net_device *dev,
959 int vf, 959 int vf,
960 struct nlattr *port[]); 960 struct nlattr *port[]);
961 int (*ndo_get_vf_port)(struct net_device *dev, 961 int (*ndo_get_vf_port)(struct net_device *dev,
962 int vf, struct sk_buff *skb); 962 int vf, struct sk_buff *skb);
963 int (*ndo_setup_tc)(struct net_device *dev, u8 tc); 963 int (*ndo_setup_tc)(struct net_device *dev, u8 tc);
964 #if IS_ENABLED(CONFIG_FCOE) 964 #if IS_ENABLED(CONFIG_FCOE)
965 int (*ndo_fcoe_enable)(struct net_device *dev); 965 int (*ndo_fcoe_enable)(struct net_device *dev);
966 int (*ndo_fcoe_disable)(struct net_device *dev); 966 int (*ndo_fcoe_disable)(struct net_device *dev);
967 int (*ndo_fcoe_ddp_setup)(struct net_device *dev, 967 int (*ndo_fcoe_ddp_setup)(struct net_device *dev,
968 u16 xid, 968 u16 xid,
969 struct scatterlist *sgl, 969 struct scatterlist *sgl,
970 unsigned int sgc); 970 unsigned int sgc);
971 int (*ndo_fcoe_ddp_done)(struct net_device *dev, 971 int (*ndo_fcoe_ddp_done)(struct net_device *dev,
972 u16 xid); 972 u16 xid);
973 int (*ndo_fcoe_ddp_target)(struct net_device *dev, 973 int (*ndo_fcoe_ddp_target)(struct net_device *dev,
974 u16 xid, 974 u16 xid,
975 struct scatterlist *sgl, 975 struct scatterlist *sgl,
976 unsigned int sgc); 976 unsigned int sgc);
977 int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, 977 int (*ndo_fcoe_get_hbainfo)(struct net_device *dev,
978 struct netdev_fcoe_hbainfo *hbainfo); 978 struct netdev_fcoe_hbainfo *hbainfo);
979 #endif 979 #endif
980 980
981 #if IS_ENABLED(CONFIG_LIBFCOE) 981 #if IS_ENABLED(CONFIG_LIBFCOE)
982 #define NETDEV_FCOE_WWNN 0 982 #define NETDEV_FCOE_WWNN 0
983 #define NETDEV_FCOE_WWPN 1 983 #define NETDEV_FCOE_WWPN 1
984 int (*ndo_fcoe_get_wwn)(struct net_device *dev, 984 int (*ndo_fcoe_get_wwn)(struct net_device *dev,
985 u64 *wwn, int type); 985 u64 *wwn, int type);
986 #endif 986 #endif
987 987
988 #ifdef CONFIG_RFS_ACCEL 988 #ifdef CONFIG_RFS_ACCEL
989 int (*ndo_rx_flow_steer)(struct net_device *dev, 989 int (*ndo_rx_flow_steer)(struct net_device *dev,
990 const struct sk_buff *skb, 990 const struct sk_buff *skb,
991 u16 rxq_index, 991 u16 rxq_index,
992 u32 flow_id); 992 u32 flow_id);
993 #endif 993 #endif
994 int (*ndo_add_slave)(struct net_device *dev, 994 int (*ndo_add_slave)(struct net_device *dev,
995 struct net_device *slave_dev); 995 struct net_device *slave_dev);
996 int (*ndo_del_slave)(struct net_device *dev, 996 int (*ndo_del_slave)(struct net_device *dev,
997 struct net_device *slave_dev); 997 struct net_device *slave_dev);
998 netdev_features_t (*ndo_fix_features)(struct net_device *dev, 998 netdev_features_t (*ndo_fix_features)(struct net_device *dev,
999 netdev_features_t features); 999 netdev_features_t features);
1000 int (*ndo_set_features)(struct net_device *dev, 1000 int (*ndo_set_features)(struct net_device *dev,
1001 netdev_features_t features); 1001 netdev_features_t features);
1002 int (*ndo_neigh_construct)(struct neighbour *n); 1002 int (*ndo_neigh_construct)(struct neighbour *n);
1003 void (*ndo_neigh_destroy)(struct neighbour *n); 1003 void (*ndo_neigh_destroy)(struct neighbour *n);
1004 1004
1005 int (*ndo_fdb_add)(struct ndmsg *ndm, 1005 int (*ndo_fdb_add)(struct ndmsg *ndm,
1006 struct nlattr *tb[], 1006 struct nlattr *tb[],
1007 struct net_device *dev, 1007 struct net_device *dev,
1008 const unsigned char *addr, 1008 const unsigned char *addr,
1009 u16 flags); 1009 u16 flags);
1010 int (*ndo_fdb_del)(struct ndmsg *ndm, 1010 int (*ndo_fdb_del)(struct ndmsg *ndm,
1011 struct net_device *dev, 1011 struct net_device *dev,
1012 const unsigned char *addr); 1012 const unsigned char *addr);
1013 int (*ndo_fdb_dump)(struct sk_buff *skb, 1013 int (*ndo_fdb_dump)(struct sk_buff *skb,
1014 struct netlink_callback *cb, 1014 struct netlink_callback *cb,
1015 struct net_device *dev, 1015 struct net_device *dev,
1016 int idx); 1016 int idx);
1017 1017
1018 int (*ndo_bridge_setlink)(struct net_device *dev, 1018 int (*ndo_bridge_setlink)(struct net_device *dev,
1019 struct nlmsghdr *nlh); 1019 struct nlmsghdr *nlh);
1020 int (*ndo_bridge_getlink)(struct sk_buff *skb, 1020 int (*ndo_bridge_getlink)(struct sk_buff *skb,
1021 u32 pid, u32 seq, 1021 u32 pid, u32 seq,
1022 struct net_device *dev); 1022 struct net_device *dev);
1023 int (*ndo_change_carrier)(struct net_device *dev, 1023 int (*ndo_change_carrier)(struct net_device *dev,
1024 bool new_carrier); 1024 bool new_carrier);
1025 }; 1025 };
1026 1026
1027 /* 1027 /*
1028 * The DEVICE structure. 1028 * The DEVICE structure.
1029 * Actually, this whole structure is a big mistake. It mixes I/O 1029 * Actually, this whole structure is a big mistake. It mixes I/O
1030 * data with strictly "high-level" data, and it has to know about 1030 * data with strictly "high-level" data, and it has to know about
1031 * almost every data structure used in the INET module. 1031 * almost every data structure used in the INET module.
1032 * 1032 *
1033 * FIXME: cleanup struct net_device such that network protocol info 1033 * FIXME: cleanup struct net_device such that network protocol info
1034 * moves out. 1034 * moves out.
1035 */ 1035 */
1036 1036
1037 struct net_device { 1037 struct net_device {
1038 1038
1039 /* 1039 /*
1040 * This is the first field of the "visible" part of this structure 1040 * This is the first field of the "visible" part of this structure
1041 * (i.e. as seen by users in the "Space.c" file). It is the name 1041 * (i.e. as seen by users in the "Space.c" file). It is the name
1042 * of the interface. 1042 * of the interface.
1043 */ 1043 */
1044 char name[IFNAMSIZ]; 1044 char name[IFNAMSIZ];
1045 1045
1046 /* device name hash chain, please keep it close to name[] */ 1046 /* device name hash chain, please keep it close to name[] */
1047 struct hlist_node name_hlist; 1047 struct hlist_node name_hlist;
1048 1048
1049 /* snmp alias */ 1049 /* snmp alias */
1050 char *ifalias; 1050 char *ifalias;
1051 1051
1052 /* 1052 /*
1053 * I/O specific fields 1053 * I/O specific fields
1054 * FIXME: Merge these and struct ifmap into one 1054 * FIXME: Merge these and struct ifmap into one
1055 */ 1055 */
1056 unsigned long mem_end; /* shared mem end */ 1056 unsigned long mem_end; /* shared mem end */
1057 unsigned long mem_start; /* shared mem start */ 1057 unsigned long mem_start; /* shared mem start */
1058 unsigned long base_addr; /* device I/O address */ 1058 unsigned long base_addr; /* device I/O address */
1059 unsigned int irq; /* device IRQ number */ 1059 unsigned int irq; /* device IRQ number */
1060 1060
1061 /* 1061 /*
1062 * Some hardware also needs these fields, but they are not 1062 * Some hardware also needs these fields, but they are not
1063 * part of the usual set specified in Space.c. 1063 * part of the usual set specified in Space.c.
1064 */ 1064 */
1065 1065
1066 unsigned long state; 1066 unsigned long state;
1067 1067
1068 struct list_head dev_list; 1068 struct list_head dev_list;
1069 struct list_head napi_list; 1069 struct list_head napi_list;
1070 struct list_head unreg_list; 1070 struct list_head unreg_list;
1071 1071
1072 /* currently active device features */ 1072 /* currently active device features */
1073 netdev_features_t features; 1073 netdev_features_t features;
1074 /* user-changeable features */ 1074 /* user-changeable features */
1075 netdev_features_t hw_features; 1075 netdev_features_t hw_features;
1076 /* user-requested features */ 1076 /* user-requested features */
1077 netdev_features_t wanted_features; 1077 netdev_features_t wanted_features;
1078 /* mask of features inheritable by VLAN devices */ 1078 /* mask of features inheritable by VLAN devices */
1079 netdev_features_t vlan_features; 1079 netdev_features_t vlan_features;
1080 /* mask of features inherited by encapsulating devices 1080 /* mask of features inherited by encapsulating devices
1081 * This field indicates what encapsulation offloads 1081 * This field indicates what encapsulation offloads
1082 * the hardware is capable of doing, and drivers will 1082 * the hardware is capable of doing, and drivers will
1083 * need to set them appropriately. 1083 * need to set them appropriately.
1084 */ 1084 */
1085 netdev_features_t hw_enc_features; 1085 netdev_features_t hw_enc_features;
1086 1086
1087 /* Interface index. Unique device identifier */ 1087 /* Interface index. Unique device identifier */
1088 int ifindex; 1088 int ifindex;
1089 int iflink; 1089 int iflink;
1090 1090
1091 struct net_device_stats stats; 1091 struct net_device_stats stats;
1092 atomic_long_t rx_dropped; /* dropped packets by core network 1092 atomic_long_t rx_dropped; /* dropped packets by core network
1093 * Do not use this in drivers. 1093 * Do not use this in drivers.
1094 */ 1094 */
1095 1095
1096 #ifdef CONFIG_WIRELESS_EXT 1096 #ifdef CONFIG_WIRELESS_EXT
1097 /* List of functions to handle Wireless Extensions (instead of ioctl). 1097 /* List of functions to handle Wireless Extensions (instead of ioctl).
1098 * See <net/iw_handler.h> for details. Jean II */ 1098 * See <net/iw_handler.h> for details. Jean II */
1099 const struct iw_handler_def * wireless_handlers; 1099 const struct iw_handler_def * wireless_handlers;
1100 /* Instance data managed by the core of Wireless Extensions. */ 1100 /* Instance data managed by the core of Wireless Extensions. */
1101 struct iw_public_data * wireless_data; 1101 struct iw_public_data * wireless_data;
1102 #endif 1102 #endif
1103 /* Management operations */ 1103 /* Management operations */
1104 const struct net_device_ops *netdev_ops; 1104 const struct net_device_ops *netdev_ops;
1105 const struct ethtool_ops *ethtool_ops; 1105 const struct ethtool_ops *ethtool_ops;
1106 1106
1107 /* Hardware header description */ 1107 /* Hardware header description */
1108 const struct header_ops *header_ops; 1108 const struct header_ops *header_ops;
1109 1109
1110 unsigned int flags; /* interface flags (a la BSD) */ 1110 unsigned int flags; /* interface flags (a la BSD) */
1111 unsigned int priv_flags; /* Like 'flags' but invisible to userspace. 1111 unsigned int priv_flags; /* Like 'flags' but invisible to userspace.
1112 * See if.h for definitions. */ 1112 * See if.h for definitions. */
1113 unsigned short gflags; 1113 unsigned short gflags;
1114 unsigned short padded; /* How much padding added by alloc_netdev() */ 1114 unsigned short padded; /* How much padding added by alloc_netdev() */
1115 1115
1116 unsigned char operstate; /* RFC2863 operstate */ 1116 unsigned char operstate; /* RFC2863 operstate */
1117 unsigned char link_mode; /* mapping policy to operstate */ 1117 unsigned char link_mode; /* mapping policy to operstate */
1118 1118
1119 unsigned char if_port; /* Selectable AUI, TP,..*/ 1119 unsigned char if_port; /* Selectable AUI, TP,..*/
1120 unsigned char dma; /* DMA channel */ 1120 unsigned char dma; /* DMA channel */
1121 1121
1122 unsigned int mtu; /* interface MTU value */ 1122 unsigned int mtu; /* interface MTU value */
1123 unsigned short type; /* interface hardware type */ 1123 unsigned short type; /* interface hardware type */
1124 unsigned short hard_header_len; /* hardware hdr length */ 1124 unsigned short hard_header_len; /* hardware hdr length */
1125 1125
1126 /* extra head- and tailroom the hardware may need, but not in all cases 1126 /* extra head- and tailroom the hardware may need, but not in all cases
1127 * can this be guaranteed, especially tailroom. Some cases also use 1127 * can this be guaranteed, especially tailroom. Some cases also use
1128 * LL_MAX_HEADER instead to allocate the skb. 1128 * LL_MAX_HEADER instead to allocate the skb.
1129 */ 1129 */
1130 unsigned short needed_headroom; 1130 unsigned short needed_headroom;
1131 unsigned short needed_tailroom; 1131 unsigned short needed_tailroom;
1132 1132
1133 /* Interface address info. */ 1133 /* Interface address info. */
1134 unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ 1134 unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
1135 unsigned char addr_assign_type; /* hw address assignment type */ 1135 unsigned char addr_assign_type; /* hw address assignment type */
1136 unsigned char addr_len; /* hardware address length */ 1136 unsigned char addr_len; /* hardware address length */
1137 unsigned char neigh_priv_len; 1137 unsigned char neigh_priv_len;
1138 unsigned short dev_id; /* for shared network cards */ 1138 unsigned short dev_id; /* for shared network cards */
1139 1139
1140 spinlock_t addr_list_lock; 1140 spinlock_t addr_list_lock;
1141 struct netdev_hw_addr_list uc; /* Unicast mac addresses */ 1141 struct netdev_hw_addr_list uc; /* Unicast mac addresses */
1142 struct netdev_hw_addr_list mc; /* Multicast mac addresses */ 1142 struct netdev_hw_addr_list mc; /* Multicast mac addresses */
1143 bool uc_promisc; 1143 bool uc_promisc;
1144 unsigned int promiscuity; 1144 unsigned int promiscuity;
1145 unsigned int allmulti; 1145 unsigned int allmulti;
1146 1146
1147 1147
1148 /* Protocol specific pointers */ 1148 /* Protocol specific pointers */
1149 1149
1150 #if IS_ENABLED(CONFIG_VLAN_8021Q) 1150 #if IS_ENABLED(CONFIG_VLAN_8021Q)
1151 struct vlan_info __rcu *vlan_info; /* VLAN info */ 1151 struct vlan_info __rcu *vlan_info; /* VLAN info */
1152 #endif 1152 #endif
1153 #if IS_ENABLED(CONFIG_NET_DSA) 1153 #if IS_ENABLED(CONFIG_NET_DSA)
1154 struct dsa_switch_tree *dsa_ptr; /* dsa specific data */ 1154 struct dsa_switch_tree *dsa_ptr; /* dsa specific data */
1155 #endif 1155 #endif
1156 void *atalk_ptr; /* AppleTalk link */ 1156 void *atalk_ptr; /* AppleTalk link */
1157 struct in_device __rcu *ip_ptr; /* IPv4 specific data */ 1157 struct in_device __rcu *ip_ptr; /* IPv4 specific data */
1158 struct dn_dev __rcu *dn_ptr; /* DECnet specific data */ 1158 struct dn_dev __rcu *dn_ptr; /* DECnet specific data */
1159 struct inet6_dev __rcu *ip6_ptr; /* IPv6 specific data */ 1159 struct inet6_dev __rcu *ip6_ptr; /* IPv6 specific data */
1160 void *ax25_ptr; /* AX.25 specific data */ 1160 void *ax25_ptr; /* AX.25 specific data */
1161 struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data, 1161 struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data,
1162 assign before registering */ 1162 assign before registering */
1163 1163
1164 /* 1164 /*
1165 * Cache lines mostly used on receive path (including eth_type_trans()) 1165 * Cache lines mostly used on receive path (including eth_type_trans())
1166 */ 1166 */
1167 unsigned long last_rx; /* Time of last Rx 1167 unsigned long last_rx; /* Time of last Rx
1168 * This should not be set in 1168 * This should not be set in
1169 * drivers, unless really needed, 1169 * drivers, unless really needed,
1170 * because network stack (bonding) 1170 * because network stack (bonding)
1171 * use it if/when necessary, to 1171 * use it if/when necessary, to
1172 * avoid dirtying this cache line. 1172 * avoid dirtying this cache line.
1173 */ 1173 */
1174 1174
1175 struct list_head upper_dev_list; /* List of upper devices */ 1175 struct list_head upper_dev_list; /* List of upper devices */
1176 1176
1177 /* Interface address info used in eth_type_trans() */ 1177 /* Interface address info used in eth_type_trans() */
1178 unsigned char *dev_addr; /* hw address, (before bcast 1178 unsigned char *dev_addr; /* hw address, (before bcast
1179 because most packets are 1179 because most packets are
1180 unicast) */ 1180 unicast) */
1181 1181
1182 struct netdev_hw_addr_list dev_addrs; /* list of device 1182 struct netdev_hw_addr_list dev_addrs; /* list of device
1183 hw addresses */ 1183 hw addresses */
1184 1184
1185 unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ 1185 unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
1186 1186
1187 #ifdef CONFIG_SYSFS 1187 #ifdef CONFIG_SYSFS
1188 struct kset *queues_kset; 1188 struct kset *queues_kset;
1189 #endif 1189 #endif
1190 1190
1191 #ifdef CONFIG_RPS 1191 #ifdef CONFIG_RPS
1192 struct netdev_rx_queue *_rx; 1192 struct netdev_rx_queue *_rx;
1193 1193
1194 /* Number of RX queues allocated at register_netdev() time */ 1194 /* Number of RX queues allocated at register_netdev() time */
1195 unsigned int num_rx_queues; 1195 unsigned int num_rx_queues;
1196 1196
1197 /* Number of RX queues currently active in device */ 1197 /* Number of RX queues currently active in device */
1198 unsigned int real_num_rx_queues; 1198 unsigned int real_num_rx_queues;
1199 1199
1200 #ifdef CONFIG_RFS_ACCEL 1200 #ifdef CONFIG_RFS_ACCEL
1201 /* CPU reverse-mapping for RX completion interrupts, indexed 1201 /* CPU reverse-mapping for RX completion interrupts, indexed
1202 * by RX queue number. Assigned by driver. This must only be 1202 * by RX queue number. Assigned by driver. This must only be
1203 * set if the ndo_rx_flow_steer operation is defined. */ 1203 * set if the ndo_rx_flow_steer operation is defined. */
1204 struct cpu_rmap *rx_cpu_rmap; 1204 struct cpu_rmap *rx_cpu_rmap;
1205 #endif 1205 #endif
1206 #endif 1206 #endif
1207 1207
1208 rx_handler_func_t __rcu *rx_handler; 1208 rx_handler_func_t __rcu *rx_handler;
1209 void __rcu *rx_handler_data; 1209 void __rcu *rx_handler_data;
1210 1210
1211 struct netdev_queue __rcu *ingress_queue; 1211 struct netdev_queue __rcu *ingress_queue;
1212 1212
1213 /* 1213 /*
1214 * Cache lines mostly used on transmit path 1214 * Cache lines mostly used on transmit path
1215 */ 1215 */
1216 struct netdev_queue *_tx ____cacheline_aligned_in_smp; 1216 struct netdev_queue *_tx ____cacheline_aligned_in_smp;
1217 1217
1218 /* Number of TX queues allocated at alloc_netdev_mq() time */ 1218 /* Number of TX queues allocated at alloc_netdev_mq() time */
1219 unsigned int num_tx_queues; 1219 unsigned int num_tx_queues;
1220 1220
1221 /* Number of TX queues currently active in device */ 1221 /* Number of TX queues currently active in device */
1222 unsigned int real_num_tx_queues; 1222 unsigned int real_num_tx_queues;
1223 1223
1224 /* root qdisc from userspace point of view */ 1224 /* root qdisc from userspace point of view */
1225 struct Qdisc *qdisc; 1225 struct Qdisc *qdisc;
1226 1226
1227 unsigned long tx_queue_len; /* Max frames per queue allowed */ 1227 unsigned long tx_queue_len; /* Max frames per queue allowed */
1228 spinlock_t tx_global_lock; 1228 spinlock_t tx_global_lock;
1229 1229
1230 #ifdef CONFIG_XPS 1230 #ifdef CONFIG_XPS
1231 struct xps_dev_maps __rcu *xps_maps; 1231 struct xps_dev_maps __rcu *xps_maps;
1232 #endif 1232 #endif
1233 1233
1234 /* These may be needed for future network-power-down code. */ 1234 /* These may be needed for future network-power-down code. */
1235 1235
1236 /* 1236 /*
1237 * trans_start here is expensive for high speed devices on SMP, 1237 * trans_start here is expensive for high speed devices on SMP,
1238 * please use netdev_queue->trans_start instead. 1238 * please use netdev_queue->trans_start instead.
1239 */ 1239 */
1240 unsigned long trans_start; /* Time (in jiffies) of last Tx */ 1240 unsigned long trans_start; /* Time (in jiffies) of last Tx */
1241 1241
1242 int watchdog_timeo; /* used by dev_watchdog() */ 1242 int watchdog_timeo; /* used by dev_watchdog() */
1243 struct timer_list watchdog_timer; 1243 struct timer_list watchdog_timer;
1244 1244
1245 /* Number of references to this device */ 1245 /* Number of references to this device */
1246 int __percpu *pcpu_refcnt; 1246 int __percpu *pcpu_refcnt;
1247 1247
1248 /* delayed register/unregister */ 1248 /* delayed register/unregister */
1249 struct list_head todo_list; 1249 struct list_head todo_list;
1250 /* device index hash chain */ 1250 /* device index hash chain */
1251 struct hlist_node index_hlist; 1251 struct hlist_node index_hlist;
1252 1252
1253 struct list_head link_watch_list; 1253 struct list_head link_watch_list;
1254 1254
1255 /* register/unregister state machine */ 1255 /* register/unregister state machine */
1256 enum { NETREG_UNINITIALIZED=0, 1256 enum { NETREG_UNINITIALIZED=0,
1257 NETREG_REGISTERED, /* completed register_netdevice */ 1257 NETREG_REGISTERED, /* completed register_netdevice */
1258 NETREG_UNREGISTERING, /* called unregister_netdevice */ 1258 NETREG_UNREGISTERING, /* called unregister_netdevice */
1259 NETREG_UNREGISTERED, /* completed unregister todo */ 1259 NETREG_UNREGISTERED, /* completed unregister todo */
1260 NETREG_RELEASED, /* called free_netdev */ 1260 NETREG_RELEASED, /* called free_netdev */
1261 NETREG_DUMMY, /* dummy device for NAPI poll */ 1261 NETREG_DUMMY, /* dummy device for NAPI poll */
1262 } reg_state:8; 1262 } reg_state:8;
1263 1263
1264 bool dismantle; /* device is going do be freed */ 1264 bool dismantle; /* device is going do be freed */
1265 1265
1266 enum { 1266 enum {
1267 RTNL_LINK_INITIALIZED, 1267 RTNL_LINK_INITIALIZED,
1268 RTNL_LINK_INITIALIZING, 1268 RTNL_LINK_INITIALIZING,
1269 } rtnl_link_state:16; 1269 } rtnl_link_state:16;
1270 1270
1271 /* Called from unregister, can be used to call free_netdev */ 1271 /* Called from unregister, can be used to call free_netdev */
1272 void (*destructor)(struct net_device *dev); 1272 void (*destructor)(struct net_device *dev);
1273 1273
1274 #ifdef CONFIG_NETPOLL 1274 #ifdef CONFIG_NETPOLL
1275 struct netpoll_info __rcu *npinfo; 1275 struct netpoll_info __rcu *npinfo;
1276 #endif 1276 #endif
1277 1277
1278 #ifdef CONFIG_NET_NS 1278 #ifdef CONFIG_NET_NS
1279 /* Network namespace this network device is inside */ 1279 /* Network namespace this network device is inside */
1280 struct net *nd_net; 1280 struct net *nd_net;
1281 #endif 1281 #endif
1282 1282
1283 /* mid-layer private */ 1283 /* mid-layer private */
1284 union { 1284 union {
1285 void *ml_priv; 1285 void *ml_priv;
1286 struct pcpu_lstats __percpu *lstats; /* loopback stats */ 1286 struct pcpu_lstats __percpu *lstats; /* loopback stats */
1287 struct pcpu_tstats __percpu *tstats; /* tunnel stats */ 1287 struct pcpu_tstats __percpu *tstats; /* tunnel stats */
1288 struct pcpu_dstats __percpu *dstats; /* dummy stats */ 1288 struct pcpu_dstats __percpu *dstats; /* dummy stats */
1289 struct pcpu_vstats __percpu *vstats; /* veth stats */ 1289 struct pcpu_vstats __percpu *vstats; /* veth stats */
1290 }; 1290 };
1291 /* GARP */ 1291 /* GARP */
1292 struct garp_port __rcu *garp_port; 1292 struct garp_port __rcu *garp_port;
1293 1293
1294 /* class/net/name entry */ 1294 /* class/net/name entry */
1295 struct device dev; 1295 struct device dev;
1296 /* space for optional device, statistics, and wireless sysfs groups */ 1296 /* space for optional device, statistics, and wireless sysfs groups */
1297 const struct attribute_group *sysfs_groups[4]; 1297 const struct attribute_group *sysfs_groups[4];
1298 1298
1299 /* rtnetlink link ops */ 1299 /* rtnetlink link ops */
1300 const struct rtnl_link_ops *rtnl_link_ops; 1300 const struct rtnl_link_ops *rtnl_link_ops;
1301 1301
1302 /* for setting kernel sock attribute on TCP connection setup */ 1302 /* for setting kernel sock attribute on TCP connection setup */
1303 #define GSO_MAX_SIZE 65536 1303 #define GSO_MAX_SIZE 65536
1304 unsigned int gso_max_size; 1304 unsigned int gso_max_size;
1305 #define GSO_MAX_SEGS 65535 1305 #define GSO_MAX_SEGS 65535
1306 u16 gso_max_segs; 1306 u16 gso_max_segs;
1307 1307
1308 #ifdef CONFIG_DCB 1308 #ifdef CONFIG_DCB
1309 /* Data Center Bridging netlink ops */ 1309 /* Data Center Bridging netlink ops */
1310 const struct dcbnl_rtnl_ops *dcbnl_ops; 1310 const struct dcbnl_rtnl_ops *dcbnl_ops;
1311 #endif 1311 #endif
1312 u8 num_tc; 1312 u8 num_tc;
1313 struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; 1313 struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
1314 u8 prio_tc_map[TC_BITMASK + 1]; 1314 u8 prio_tc_map[TC_BITMASK + 1];
1315 1315
1316 #if IS_ENABLED(CONFIG_FCOE) 1316 #if IS_ENABLED(CONFIG_FCOE)
1317 /* max exchange id for FCoE LRO by ddp */ 1317 /* max exchange id for FCoE LRO by ddp */
1318 unsigned int fcoe_ddp_xid; 1318 unsigned int fcoe_ddp_xid;
1319 #endif 1319 #endif
1320 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) 1320 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
1321 struct netprio_map __rcu *priomap; 1321 struct netprio_map __rcu *priomap;
1322 #endif 1322 #endif
1323 /* phy device may attach itself for hardware timestamping */ 1323 /* phy device may attach itself for hardware timestamping */
1324 struct phy_device *phydev; 1324 struct phy_device *phydev;
1325 1325
1326 struct lock_class_key *qdisc_tx_busylock; 1326 struct lock_class_key *qdisc_tx_busylock;
1327 1327
1328 /* group the device belongs to */ 1328 /* group the device belongs to */
1329 int group; 1329 int group;
1330 1330
1331 struct pm_qos_request pm_qos_req; 1331 struct pm_qos_request pm_qos_req;
1332 }; 1332 };
1333 #define to_net_dev(d) container_of(d, struct net_device, dev) 1333 #define to_net_dev(d) container_of(d, struct net_device, dev)
1334 1334
1335 #define NETDEV_ALIGN 32 1335 #define NETDEV_ALIGN 32
1336 1336
1337 static inline 1337 static inline
1338 int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio) 1338 int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio)
1339 { 1339 {
1340 return dev->prio_tc_map[prio & TC_BITMASK]; 1340 return dev->prio_tc_map[prio & TC_BITMASK];
1341 } 1341 }
1342 1342
1343 static inline 1343 static inline
1344 int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) 1344 int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
1345 { 1345 {
1346 if (tc >= dev->num_tc) 1346 if (tc >= dev->num_tc)
1347 return -EINVAL; 1347 return -EINVAL;
1348 1348
1349 dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK; 1349 dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK;
1350 return 0; 1350 return 0;
1351 } 1351 }
1352 1352
1353 static inline 1353 static inline
1354 void netdev_reset_tc(struct net_device *dev) 1354 void netdev_reset_tc(struct net_device *dev)
1355 { 1355 {
1356 dev->num_tc = 0; 1356 dev->num_tc = 0;
1357 memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); 1357 memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
1358 memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); 1358 memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
1359 } 1359 }
1360 1360
1361 static inline 1361 static inline
1362 int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) 1362 int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
1363 { 1363 {
1364 if (tc >= dev->num_tc) 1364 if (tc >= dev->num_tc)
1365 return -EINVAL; 1365 return -EINVAL;
1366 1366
1367 dev->tc_to_txq[tc].count = count; 1367 dev->tc_to_txq[tc].count = count;
1368 dev->tc_to_txq[tc].offset = offset; 1368 dev->tc_to_txq[tc].offset = offset;
1369 return 0; 1369 return 0;
1370 } 1370 }
1371 1371
1372 static inline 1372 static inline
1373 int netdev_set_num_tc(struct net_device *dev, u8 num_tc) 1373 int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
1374 { 1374 {
1375 if (num_tc > TC_MAX_QUEUE) 1375 if (num_tc > TC_MAX_QUEUE)
1376 return -EINVAL; 1376 return -EINVAL;
1377 1377
1378 dev->num_tc = num_tc; 1378 dev->num_tc = num_tc;
1379 return 0; 1379 return 0;
1380 } 1380 }
1381 1381
1382 static inline 1382 static inline
1383 int netdev_get_num_tc(struct net_device *dev) 1383 int netdev_get_num_tc(struct net_device *dev)
1384 { 1384 {
1385 return dev->num_tc; 1385 return dev->num_tc;
1386 } 1386 }
1387 1387
1388 static inline 1388 static inline
1389 struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, 1389 struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
1390 unsigned int index) 1390 unsigned int index)
1391 { 1391 {
1392 return &dev->_tx[index]; 1392 return &dev->_tx[index];
1393 } 1393 }
1394 1394
1395 static inline void netdev_for_each_tx_queue(struct net_device *dev, 1395 static inline void netdev_for_each_tx_queue(struct net_device *dev,
1396 void (*f)(struct net_device *, 1396 void (*f)(struct net_device *,
1397 struct netdev_queue *, 1397 struct netdev_queue *,
1398 void *), 1398 void *),
1399 void *arg) 1399 void *arg)
1400 { 1400 {
1401 unsigned int i; 1401 unsigned int i;
1402 1402
1403 for (i = 0; i < dev->num_tx_queues; i++) 1403 for (i = 0; i < dev->num_tx_queues; i++)
1404 f(dev, &dev->_tx[i], arg); 1404 f(dev, &dev->_tx[i], arg);
1405 } 1405 }
1406 1406
1407 extern struct netdev_queue *netdev_pick_tx(struct net_device *dev, 1407 extern struct netdev_queue *netdev_pick_tx(struct net_device *dev,
1408 struct sk_buff *skb); 1408 struct sk_buff *skb);
1409 extern u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb); 1409 extern u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
1410 1410
1411 /* 1411 /*
1412 * Net namespace inlines 1412 * Net namespace inlines
1413 */ 1413 */
1414 static inline 1414 static inline
1415 struct net *dev_net(const struct net_device *dev) 1415 struct net *dev_net(const struct net_device *dev)
1416 { 1416 {
1417 return read_pnet(&dev->nd_net); 1417 return read_pnet(&dev->nd_net);
1418 } 1418 }
1419 1419
1420 static inline 1420 static inline
1421 void dev_net_set(struct net_device *dev, struct net *net) 1421 void dev_net_set(struct net_device *dev, struct net *net)
1422 { 1422 {
1423 #ifdef CONFIG_NET_NS 1423 #ifdef CONFIG_NET_NS
1424 release_net(dev->nd_net); 1424 release_net(dev->nd_net);
1425 dev->nd_net = hold_net(net); 1425 dev->nd_net = hold_net(net);
1426 #endif 1426 #endif
1427 } 1427 }
1428 1428
1429 static inline bool netdev_uses_dsa_tags(struct net_device *dev) 1429 static inline bool netdev_uses_dsa_tags(struct net_device *dev)
1430 { 1430 {
1431 #ifdef CONFIG_NET_DSA_TAG_DSA 1431 #ifdef CONFIG_NET_DSA_TAG_DSA
1432 if (dev->dsa_ptr != NULL) 1432 if (dev->dsa_ptr != NULL)
1433 return dsa_uses_dsa_tags(dev->dsa_ptr); 1433 return dsa_uses_dsa_tags(dev->dsa_ptr);
1434 #endif 1434 #endif
1435 1435
1436 return 0; 1436 return 0;
1437 } 1437 }
1438 1438
1439 static inline bool netdev_uses_trailer_tags(struct net_device *dev) 1439 static inline bool netdev_uses_trailer_tags(struct net_device *dev)
1440 { 1440 {
1441 #ifdef CONFIG_NET_DSA_TAG_TRAILER 1441 #ifdef CONFIG_NET_DSA_TAG_TRAILER
1442 if (dev->dsa_ptr != NULL) 1442 if (dev->dsa_ptr != NULL)
1443 return dsa_uses_trailer_tags(dev->dsa_ptr); 1443 return dsa_uses_trailer_tags(dev->dsa_ptr);
1444 #endif 1444 #endif
1445 1445
1446 return 0; 1446 return 0;
1447 } 1447 }
1448 1448
1449 /** 1449 /**
1450 * netdev_priv - access network device private data 1450 * netdev_priv - access network device private data
1451 * @dev: network device 1451 * @dev: network device
1452 * 1452 *
1453 * Get network device private data 1453 * Get network device private data
1454 */ 1454 */
1455 static inline void *netdev_priv(const struct net_device *dev) 1455 static inline void *netdev_priv(const struct net_device *dev)
1456 { 1456 {
1457 return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN); 1457 return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
1458 } 1458 }
1459 1459
1460 /* Set the sysfs physical device reference for the network logical device 1460 /* Set the sysfs physical device reference for the network logical device
1461 * if set prior to registration will cause a symlink during initialization. 1461 * if set prior to registration will cause a symlink during initialization.
1462 */ 1462 */
1463 #define SET_NETDEV_DEV(net, pdev) ((net)->dev.parent = (pdev)) 1463 #define SET_NETDEV_DEV(net, pdev) ((net)->dev.parent = (pdev))
1464 1464
1465 /* Set the sysfs device type for the network logical device to allow 1465 /* Set the sysfs device type for the network logical device to allow
1466 * fin grained indentification of different network device types. For 1466 * fin grained indentification of different network device types. For
1467 * example Ethernet, Wirelss LAN, Bluetooth, WiMAX etc. 1467 * example Ethernet, Wirelss LAN, Bluetooth, WiMAX etc.
1468 */ 1468 */
1469 #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype)) 1469 #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype))
1470 1470
1471 /** 1471 /**
1472 * netif_napi_add - initialize a napi context 1472 * netif_napi_add - initialize a napi context
1473 * @dev: network device 1473 * @dev: network device
1474 * @napi: napi context 1474 * @napi: napi context
1475 * @poll: polling function 1475 * @poll: polling function
1476 * @weight: default weight 1476 * @weight: default weight
1477 * 1477 *
1478 * netif_napi_add() must be used to initialize a napi context prior to calling 1478 * netif_napi_add() must be used to initialize a napi context prior to calling
1479 * *any* of the other napi related functions. 1479 * *any* of the other napi related functions.
1480 */ 1480 */
1481 void netif_napi_add(struct net_device *dev, struct napi_struct *napi, 1481 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
1482 int (*poll)(struct napi_struct *, int), int weight); 1482 int (*poll)(struct napi_struct *, int), int weight);
1483 1483
1484 /** 1484 /**
1485 * netif_napi_del - remove a napi context 1485 * netif_napi_del - remove a napi context
1486 * @napi: napi context 1486 * @napi: napi context
1487 * 1487 *
1488 * netif_napi_del() removes a napi context from the network device napi list 1488 * netif_napi_del() removes a napi context from the network device napi list
1489 */ 1489 */
1490 void netif_napi_del(struct napi_struct *napi); 1490 void netif_napi_del(struct napi_struct *napi);
1491 1491
1492 struct napi_gro_cb { 1492 struct napi_gro_cb {
1493 /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ 1493 /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
1494 void *frag0; 1494 void *frag0;
1495 1495
1496 /* Length of frag0. */ 1496 /* Length of frag0. */
1497 unsigned int frag0_len; 1497 unsigned int frag0_len;
1498 1498
1499 /* This indicates where we are processing relative to skb->data. */ 1499 /* This indicates where we are processing relative to skb->data. */
1500 int data_offset; 1500 int data_offset;
1501 1501
1502 /* This is non-zero if the packet cannot be merged with the new skb. */ 1502 /* This is non-zero if the packet cannot be merged with the new skb. */
1503 int flush; 1503 int flush;
1504 1504
1505 /* Number of segments aggregated. */ 1505 /* Number of segments aggregated. */
1506 u16 count; 1506 u16 count;
1507 1507
1508 /* This is non-zero if the packet may be of the same flow. */ 1508 /* This is non-zero if the packet may be of the same flow. */
1509 u8 same_flow; 1509 u8 same_flow;
1510 1510
1511 /* Free the skb? */ 1511 /* Free the skb? */
1512 u8 free; 1512 u8 free;
1513 #define NAPI_GRO_FREE 1 1513 #define NAPI_GRO_FREE 1
1514 #define NAPI_GRO_FREE_STOLEN_HEAD 2 1514 #define NAPI_GRO_FREE_STOLEN_HEAD 2
1515 1515
1516 /* jiffies when first packet was created/queued */ 1516 /* jiffies when first packet was created/queued */
1517 unsigned long age; 1517 unsigned long age;
1518 1518
1519 /* Used in ipv6_gro_receive() */ 1519 /* Used in ipv6_gro_receive() */
1520 int proto; 1520 int proto;
1521 1521
1522 /* used in skb_gro_receive() slow path */ 1522 /* used in skb_gro_receive() slow path */
1523 struct sk_buff *last; 1523 struct sk_buff *last;
1524 }; 1524 };
1525 1525
1526 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) 1526 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
1527 1527
1528 struct packet_type { 1528 struct packet_type {
1529 __be16 type; /* This is really htons(ether_type). */ 1529 __be16 type; /* This is really htons(ether_type). */
1530 struct net_device *dev; /* NULL is wildcarded here */ 1530 struct net_device *dev; /* NULL is wildcarded here */
1531 int (*func) (struct sk_buff *, 1531 int (*func) (struct sk_buff *,
1532 struct net_device *, 1532 struct net_device *,
1533 struct packet_type *, 1533 struct packet_type *,
1534 struct net_device *); 1534 struct net_device *);
1535 bool (*id_match)(struct packet_type *ptype, 1535 bool (*id_match)(struct packet_type *ptype,
1536 struct sock *sk); 1536 struct sock *sk);
1537 void *af_packet_priv; 1537 void *af_packet_priv;
1538 struct list_head list; 1538 struct list_head list;
1539 }; 1539 };
1540 1540
1541 struct offload_callbacks { 1541 struct offload_callbacks {
1542 struct sk_buff *(*gso_segment)(struct sk_buff *skb, 1542 struct sk_buff *(*gso_segment)(struct sk_buff *skb,
1543 netdev_features_t features); 1543 netdev_features_t features);
1544 int (*gso_send_check)(struct sk_buff *skb); 1544 int (*gso_send_check)(struct sk_buff *skb);
1545 struct sk_buff **(*gro_receive)(struct sk_buff **head, 1545 struct sk_buff **(*gro_receive)(struct sk_buff **head,
1546 struct sk_buff *skb); 1546 struct sk_buff *skb);
1547 int (*gro_complete)(struct sk_buff *skb); 1547 int (*gro_complete)(struct sk_buff *skb);
1548 }; 1548 };
1549 1549
1550 struct packet_offload { 1550 struct packet_offload {
1551 __be16 type; /* This is really htons(ether_type). */ 1551 __be16 type; /* This is really htons(ether_type). */
1552 struct offload_callbacks callbacks; 1552 struct offload_callbacks callbacks;
1553 struct list_head list; 1553 struct list_head list;
1554 }; 1554 };
1555 1555
1556 #include <linux/notifier.h> 1556 #include <linux/notifier.h>
1557 1557
1558 /* netdevice notifier chain. Please remember to update the rtnetlink 1558 /* netdevice notifier chain. Please remember to update the rtnetlink
1559 * notification exclusion list in rtnetlink_event() when adding new 1559 * notification exclusion list in rtnetlink_event() when adding new
1560 * types. 1560 * types.
1561 */ 1561 */
1562 #define NETDEV_UP 0x0001 /* For now you can't veto a device up/down */ 1562 #define NETDEV_UP 0x0001 /* For now you can't veto a device up/down */
1563 #define NETDEV_DOWN 0x0002 1563 #define NETDEV_DOWN 0x0002
1564 #define NETDEV_REBOOT 0x0003 /* Tell a protocol stack a network interface 1564 #define NETDEV_REBOOT 0x0003 /* Tell a protocol stack a network interface
1565 detected a hardware crash and restarted 1565 detected a hardware crash and restarted
1566 - we can use this eg to kick tcp sessions 1566 - we can use this eg to kick tcp sessions
1567 once done */ 1567 once done */
1568 #define NETDEV_CHANGE 0x0004 /* Notify device state change */ 1568 #define NETDEV_CHANGE 0x0004 /* Notify device state change */
1569 #define NETDEV_REGISTER 0x0005 1569 #define NETDEV_REGISTER 0x0005
1570 #define NETDEV_UNREGISTER 0x0006 1570 #define NETDEV_UNREGISTER 0x0006
1571 #define NETDEV_CHANGEMTU 0x0007 1571 #define NETDEV_CHANGEMTU 0x0007
1572 #define NETDEV_CHANGEADDR 0x0008 1572 #define NETDEV_CHANGEADDR 0x0008
1573 #define NETDEV_GOING_DOWN 0x0009 1573 #define NETDEV_GOING_DOWN 0x0009
1574 #define NETDEV_CHANGENAME 0x000A 1574 #define NETDEV_CHANGENAME 0x000A
1575 #define NETDEV_FEAT_CHANGE 0x000B 1575 #define NETDEV_FEAT_CHANGE 0x000B
1576 #define NETDEV_BONDING_FAILOVER 0x000C 1576 #define NETDEV_BONDING_FAILOVER 0x000C
1577 #define NETDEV_PRE_UP 0x000D 1577 #define NETDEV_PRE_UP 0x000D
1578 #define NETDEV_PRE_TYPE_CHANGE 0x000E 1578 #define NETDEV_PRE_TYPE_CHANGE 0x000E
1579 #define NETDEV_POST_TYPE_CHANGE 0x000F 1579 #define NETDEV_POST_TYPE_CHANGE 0x000F
1580 #define NETDEV_POST_INIT 0x0010 1580 #define NETDEV_POST_INIT 0x0010
1581 #define NETDEV_UNREGISTER_FINAL 0x0011 1581 #define NETDEV_UNREGISTER_FINAL 0x0011
1582 #define NETDEV_RELEASE 0x0012 1582 #define NETDEV_RELEASE 0x0012
1583 #define NETDEV_NOTIFY_PEERS 0x0013 1583 #define NETDEV_NOTIFY_PEERS 0x0013
1584 #define NETDEV_JOIN 0x0014 1584 #define NETDEV_JOIN 0x0014
1585 1585
1586 extern int register_netdevice_notifier(struct notifier_block *nb); 1586 extern int register_netdevice_notifier(struct notifier_block *nb);
1587 extern int unregister_netdevice_notifier(struct notifier_block *nb); 1587 extern int unregister_netdevice_notifier(struct notifier_block *nb);
1588 extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); 1588 extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
1589 1589
1590 1590
1591 extern rwlock_t dev_base_lock; /* Device list lock */ 1591 extern rwlock_t dev_base_lock; /* Device list lock */
1592 1592
1593 extern seqcount_t devnet_rename_seq; /* Device rename seq */ 1593 extern seqcount_t devnet_rename_seq; /* Device rename seq */
1594 1594
1595 1595
1596 #define for_each_netdev(net, d) \ 1596 #define for_each_netdev(net, d) \
1597 list_for_each_entry(d, &(net)->dev_base_head, dev_list) 1597 list_for_each_entry(d, &(net)->dev_base_head, dev_list)
1598 #define for_each_netdev_reverse(net, d) \ 1598 #define for_each_netdev_reverse(net, d) \
1599 list_for_each_entry_reverse(d, &(net)->dev_base_head, dev_list) 1599 list_for_each_entry_reverse(d, &(net)->dev_base_head, dev_list)
1600 #define for_each_netdev_rcu(net, d) \ 1600 #define for_each_netdev_rcu(net, d) \
1601 list_for_each_entry_rcu(d, &(net)->dev_base_head, dev_list) 1601 list_for_each_entry_rcu(d, &(net)->dev_base_head, dev_list)
1602 #define for_each_netdev_safe(net, d, n) \ 1602 #define for_each_netdev_safe(net, d, n) \
1603 list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) 1603 list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list)
1604 #define for_each_netdev_continue(net, d) \ 1604 #define for_each_netdev_continue(net, d) \
1605 list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) 1605 list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list)
1606 #define for_each_netdev_continue_rcu(net, d) \ 1606 #define for_each_netdev_continue_rcu(net, d) \
1607 list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) 1607 list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list)
1608 #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) 1608 #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list)
1609 1609
1610 static inline struct net_device *next_net_device(struct net_device *dev) 1610 static inline struct net_device *next_net_device(struct net_device *dev)
1611 { 1611 {
1612 struct list_head *lh; 1612 struct list_head *lh;
1613 struct net *net; 1613 struct net *net;
1614 1614
1615 net = dev_net(dev); 1615 net = dev_net(dev);
1616 lh = dev->dev_list.next; 1616 lh = dev->dev_list.next;
1617 return lh == &net->dev_base_head ? NULL : net_device_entry(lh); 1617 return lh == &net->dev_base_head ? NULL : net_device_entry(lh);
1618 } 1618 }
1619 1619
1620 static inline struct net_device *next_net_device_rcu(struct net_device *dev) 1620 static inline struct net_device *next_net_device_rcu(struct net_device *dev)
1621 { 1621 {
1622 struct list_head *lh; 1622 struct list_head *lh;
1623 struct net *net; 1623 struct net *net;
1624 1624
1625 net = dev_net(dev); 1625 net = dev_net(dev);
1626 lh = rcu_dereference(list_next_rcu(&dev->dev_list)); 1626 lh = rcu_dereference(list_next_rcu(&dev->dev_list));
1627 return lh == &net->dev_base_head ? NULL : net_device_entry(lh); 1627 return lh == &net->dev_base_head ? NULL : net_device_entry(lh);
1628 } 1628 }
1629 1629
1630 static inline struct net_device *first_net_device(struct net *net) 1630 static inline struct net_device *first_net_device(struct net *net)
1631 { 1631 {
1632 return list_empty(&net->dev_base_head) ? NULL : 1632 return list_empty(&net->dev_base_head) ? NULL :
1633 net_device_entry(net->dev_base_head.next); 1633 net_device_entry(net->dev_base_head.next);
1634 } 1634 }
1635 1635
1636 static inline struct net_device *first_net_device_rcu(struct net *net) 1636 static inline struct net_device *first_net_device_rcu(struct net *net)
1637 { 1637 {
1638 struct list_head *lh = rcu_dereference(list_next_rcu(&net->dev_base_head)); 1638 struct list_head *lh = rcu_dereference(list_next_rcu(&net->dev_base_head));
1639 1639
1640 return lh == &net->dev_base_head ? NULL : net_device_entry(lh); 1640 return lh == &net->dev_base_head ? NULL : net_device_entry(lh);
1641 } 1641 }
1642 1642
1643 extern int netdev_boot_setup_check(struct net_device *dev); 1643 extern int netdev_boot_setup_check(struct net_device *dev);
1644 extern unsigned long netdev_boot_base(const char *prefix, int unit); 1644 extern unsigned long netdev_boot_base(const char *prefix, int unit);
1645 extern struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, 1645 extern struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
1646 const char *hwaddr); 1646 const char *hwaddr);
1647 extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); 1647 extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
1648 extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type); 1648 extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type);
1649 extern void dev_add_pack(struct packet_type *pt); 1649 extern void dev_add_pack(struct packet_type *pt);
1650 extern void dev_remove_pack(struct packet_type *pt); 1650 extern void dev_remove_pack(struct packet_type *pt);
1651 extern void __dev_remove_pack(struct packet_type *pt); 1651 extern void __dev_remove_pack(struct packet_type *pt);
1652 extern void dev_add_offload(struct packet_offload *po); 1652 extern void dev_add_offload(struct packet_offload *po);
1653 extern void dev_remove_offload(struct packet_offload *po); 1653 extern void dev_remove_offload(struct packet_offload *po);
1654 extern void __dev_remove_offload(struct packet_offload *po); 1654 extern void __dev_remove_offload(struct packet_offload *po);
1655 1655
1656 extern struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, 1656 extern struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags,
1657 unsigned short mask); 1657 unsigned short mask);
1658 extern struct net_device *dev_get_by_name(struct net *net, const char *name); 1658 extern struct net_device *dev_get_by_name(struct net *net, const char *name);
1659 extern struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); 1659 extern struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
1660 extern struct net_device *__dev_get_by_name(struct net *net, const char *name); 1660 extern struct net_device *__dev_get_by_name(struct net *net, const char *name);
1661 extern int dev_alloc_name(struct net_device *dev, const char *name); 1661 extern int dev_alloc_name(struct net_device *dev, const char *name);
1662 extern int dev_open(struct net_device *dev); 1662 extern int dev_open(struct net_device *dev);
1663 extern int dev_close(struct net_device *dev); 1663 extern int dev_close(struct net_device *dev);
1664 extern void dev_disable_lro(struct net_device *dev); 1664 extern void dev_disable_lro(struct net_device *dev);
1665 extern int dev_loopback_xmit(struct sk_buff *newskb); 1665 extern int dev_loopback_xmit(struct sk_buff *newskb);
1666 extern int dev_queue_xmit(struct sk_buff *skb); 1666 extern int dev_queue_xmit(struct sk_buff *skb);
1667 extern int register_netdevice(struct net_device *dev); 1667 extern int register_netdevice(struct net_device *dev);
1668 extern void unregister_netdevice_queue(struct net_device *dev, 1668 extern void unregister_netdevice_queue(struct net_device *dev,
1669 struct list_head *head); 1669 struct list_head *head);
1670 extern void unregister_netdevice_many(struct list_head *head); 1670 extern void unregister_netdevice_many(struct list_head *head);
1671 static inline void unregister_netdevice(struct net_device *dev) 1671 static inline void unregister_netdevice(struct net_device *dev)
1672 { 1672 {
1673 unregister_netdevice_queue(dev, NULL); 1673 unregister_netdevice_queue(dev, NULL);
1674 } 1674 }
1675 1675
1676 extern int netdev_refcnt_read(const struct net_device *dev); 1676 extern int netdev_refcnt_read(const struct net_device *dev);
1677 extern void free_netdev(struct net_device *dev); 1677 extern void free_netdev(struct net_device *dev);
1678 extern void synchronize_net(void); 1678 extern void synchronize_net(void);
1679 extern int init_dummy_netdev(struct net_device *dev); 1679 extern int init_dummy_netdev(struct net_device *dev);
1680 extern void netdev_resync_ops(struct net_device *dev); 1680 extern void netdev_resync_ops(struct net_device *dev);
1681 1681
1682 extern struct net_device *dev_get_by_index(struct net *net, int ifindex); 1682 extern struct net_device *dev_get_by_index(struct net *net, int ifindex);
1683 extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); 1683 extern struct net_device *__dev_get_by_index(struct net *net, int ifindex);
1684 extern struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); 1684 extern struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
1685 extern int dev_restart(struct net_device *dev); 1685 extern int dev_restart(struct net_device *dev);
1686 #ifdef CONFIG_NETPOLL_TRAP 1686 #ifdef CONFIG_NETPOLL_TRAP
1687 extern int netpoll_trap(void); 1687 extern int netpoll_trap(void);
1688 #endif 1688 #endif
1689 extern int skb_gro_receive(struct sk_buff **head, 1689 extern int skb_gro_receive(struct sk_buff **head,
1690 struct sk_buff *skb); 1690 struct sk_buff *skb);
1691 1691
1692 static inline unsigned int skb_gro_offset(const struct sk_buff *skb) 1692 static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
1693 { 1693 {
1694 return NAPI_GRO_CB(skb)->data_offset; 1694 return NAPI_GRO_CB(skb)->data_offset;
1695 } 1695 }
1696 1696
1697 static inline unsigned int skb_gro_len(const struct sk_buff *skb) 1697 static inline unsigned int skb_gro_len(const struct sk_buff *skb)
1698 { 1698 {
1699 return skb->len - NAPI_GRO_CB(skb)->data_offset; 1699 return skb->len - NAPI_GRO_CB(skb)->data_offset;
1700 } 1700 }
1701 1701
1702 static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) 1702 static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
1703 { 1703 {
1704 NAPI_GRO_CB(skb)->data_offset += len; 1704 NAPI_GRO_CB(skb)->data_offset += len;
1705 } 1705 }
1706 1706
1707 static inline void *skb_gro_header_fast(struct sk_buff *skb, 1707 static inline void *skb_gro_header_fast(struct sk_buff *skb,
1708 unsigned int offset) 1708 unsigned int offset)
1709 { 1709 {
1710 return NAPI_GRO_CB(skb)->frag0 + offset; 1710 return NAPI_GRO_CB(skb)->frag0 + offset;
1711 } 1711 }
1712 1712
1713 static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) 1713 static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen)
1714 { 1714 {
1715 return NAPI_GRO_CB(skb)->frag0_len < hlen; 1715 return NAPI_GRO_CB(skb)->frag0_len < hlen;
1716 } 1716 }
1717 1717
1718 static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, 1718 static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
1719 unsigned int offset) 1719 unsigned int offset)
1720 { 1720 {
1721 if (!pskb_may_pull(skb, hlen)) 1721 if (!pskb_may_pull(skb, hlen))
1722 return NULL; 1722 return NULL;
1723 1723
1724 NAPI_GRO_CB(skb)->frag0 = NULL; 1724 NAPI_GRO_CB(skb)->frag0 = NULL;
1725 NAPI_GRO_CB(skb)->frag0_len = 0; 1725 NAPI_GRO_CB(skb)->frag0_len = 0;
1726 return skb->data + offset; 1726 return skb->data + offset;
1727 } 1727 }
1728 1728
1729 static inline void *skb_gro_mac_header(struct sk_buff *skb) 1729 static inline void *skb_gro_mac_header(struct sk_buff *skb)
1730 { 1730 {
1731 return NAPI_GRO_CB(skb)->frag0 ?: skb_mac_header(skb); 1731 return NAPI_GRO_CB(skb)->frag0 ?: skb_mac_header(skb);
1732 } 1732 }
1733 1733
1734 static inline void *skb_gro_network_header(struct sk_buff *skb) 1734 static inline void *skb_gro_network_header(struct sk_buff *skb)
1735 { 1735 {
1736 return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + 1736 return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) +
1737 skb_network_offset(skb); 1737 skb_network_offset(skb);
1738 } 1738 }
1739 1739
1740 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, 1740 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
1741 unsigned short type, 1741 unsigned short type,
1742 const void *daddr, const void *saddr, 1742 const void *daddr, const void *saddr,
1743 unsigned int len) 1743 unsigned int len)
1744 { 1744 {
1745 if (!dev->header_ops || !dev->header_ops->create) 1745 if (!dev->header_ops || !dev->header_ops->create)
1746 return 0; 1746 return 0;
1747 1747
1748 return dev->header_ops->create(skb, dev, type, daddr, saddr, len); 1748 return dev->header_ops->create(skb, dev, type, daddr, saddr, len);
1749 } 1749 }
1750 1750
1751 static inline int dev_parse_header(const struct sk_buff *skb, 1751 static inline int dev_parse_header(const struct sk_buff *skb,
1752 unsigned char *haddr) 1752 unsigned char *haddr)
1753 { 1753 {
1754 const struct net_device *dev = skb->dev; 1754 const struct net_device *dev = skb->dev;
1755 1755
1756 if (!dev->header_ops || !dev->header_ops->parse) 1756 if (!dev->header_ops || !dev->header_ops->parse)
1757 return 0; 1757 return 0;
1758 return dev->header_ops->parse(skb, haddr); 1758 return dev->header_ops->parse(skb, haddr);
1759 } 1759 }
1760 1760
1761 typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); 1761 typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len);
1762 extern int register_gifconf(unsigned int family, gifconf_func_t * gifconf); 1762 extern int register_gifconf(unsigned int family, gifconf_func_t * gifconf);
1763 static inline int unregister_gifconf(unsigned int family) 1763 static inline int unregister_gifconf(unsigned int family)
1764 { 1764 {
1765 return register_gifconf(family, NULL); 1765 return register_gifconf(family, NULL);
1766 } 1766 }
1767 1767
1768 /* 1768 /*
1769 * Incoming packets are placed on per-cpu queues 1769 * Incoming packets are placed on per-cpu queues
1770 */ 1770 */
1771 struct softnet_data { 1771 struct softnet_data {
1772 struct Qdisc *output_queue; 1772 struct Qdisc *output_queue;
1773 struct Qdisc **output_queue_tailp; 1773 struct Qdisc **output_queue_tailp;
1774 struct list_head poll_list; 1774 struct list_head poll_list;
1775 struct sk_buff *completion_queue; 1775 struct sk_buff *completion_queue;
1776 struct sk_buff_head process_queue; 1776 struct sk_buff_head process_queue;
1777 1777
1778 /* stats */ 1778 /* stats */
1779 unsigned int processed; 1779 unsigned int processed;
1780 unsigned int time_squeeze; 1780 unsigned int time_squeeze;
1781 unsigned int cpu_collision; 1781 unsigned int cpu_collision;
1782 unsigned int received_rps; 1782 unsigned int received_rps;
1783 1783
1784 #ifdef CONFIG_RPS 1784 #ifdef CONFIG_RPS
1785 struct softnet_data *rps_ipi_list; 1785 struct softnet_data *rps_ipi_list;
1786 1786
1787 /* Elements below can be accessed between CPUs for RPS */ 1787 /* Elements below can be accessed between CPUs for RPS */
1788 struct call_single_data csd ____cacheline_aligned_in_smp; 1788 struct call_single_data csd ____cacheline_aligned_in_smp;
1789 struct softnet_data *rps_ipi_next; 1789 struct softnet_data *rps_ipi_next;
1790 unsigned int cpu; 1790 unsigned int cpu;
1791 unsigned int input_queue_head; 1791 unsigned int input_queue_head;
1792 unsigned int input_queue_tail; 1792 unsigned int input_queue_tail;
1793 #endif 1793 #endif
1794 unsigned int dropped; 1794 unsigned int dropped;
1795 struct sk_buff_head input_pkt_queue; 1795 struct sk_buff_head input_pkt_queue;
1796 struct napi_struct backlog; 1796 struct napi_struct backlog;
1797 }; 1797 };
1798 1798
1799 static inline void input_queue_head_incr(struct softnet_data *sd) 1799 static inline void input_queue_head_incr(struct softnet_data *sd)
1800 { 1800 {
1801 #ifdef CONFIG_RPS 1801 #ifdef CONFIG_RPS
1802 sd->input_queue_head++; 1802 sd->input_queue_head++;
1803 #endif 1803 #endif
1804 } 1804 }
1805 1805
1806 static inline void input_queue_tail_incr_save(struct softnet_data *sd, 1806 static inline void input_queue_tail_incr_save(struct softnet_data *sd,
1807 unsigned int *qtail) 1807 unsigned int *qtail)
1808 { 1808 {
1809 #ifdef CONFIG_RPS 1809 #ifdef CONFIG_RPS
1810 *qtail = ++sd->input_queue_tail; 1810 *qtail = ++sd->input_queue_tail;
1811 #endif 1811 #endif
1812 } 1812 }
1813 1813
1814 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); 1814 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
1815 1815
1816 extern void __netif_schedule(struct Qdisc *q); 1816 extern void __netif_schedule(struct Qdisc *q);
1817 1817
1818 static inline void netif_schedule_queue(struct netdev_queue *txq) 1818 static inline void netif_schedule_queue(struct netdev_queue *txq)
1819 { 1819 {
1820 if (!(txq->state & QUEUE_STATE_ANY_XOFF)) 1820 if (!(txq->state & QUEUE_STATE_ANY_XOFF))
1821 __netif_schedule(txq->qdisc); 1821 __netif_schedule(txq->qdisc);
1822 } 1822 }
1823 1823
1824 static inline void netif_tx_schedule_all(struct net_device *dev) 1824 static inline void netif_tx_schedule_all(struct net_device *dev)
1825 { 1825 {
1826 unsigned int i; 1826 unsigned int i;
1827 1827
1828 for (i = 0; i < dev->num_tx_queues; i++) 1828 for (i = 0; i < dev->num_tx_queues; i++)
1829 netif_schedule_queue(netdev_get_tx_queue(dev, i)); 1829 netif_schedule_queue(netdev_get_tx_queue(dev, i));
1830 } 1830 }
1831 1831
1832 static inline void netif_tx_start_queue(struct netdev_queue *dev_queue) 1832 static inline void netif_tx_start_queue(struct netdev_queue *dev_queue)
1833 { 1833 {
1834 clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); 1834 clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
1835 } 1835 }
1836 1836
1837 /** 1837 /**
1838 * netif_start_queue - allow transmit 1838 * netif_start_queue - allow transmit
1839 * @dev: network device 1839 * @dev: network device
1840 * 1840 *
1841 * Allow upper layers to call the device hard_start_xmit routine. 1841 * Allow upper layers to call the device hard_start_xmit routine.
1842 */ 1842 */
1843 static inline void netif_start_queue(struct net_device *dev) 1843 static inline void netif_start_queue(struct net_device *dev)
1844 { 1844 {
1845 netif_tx_start_queue(netdev_get_tx_queue(dev, 0)); 1845 netif_tx_start_queue(netdev_get_tx_queue(dev, 0));
1846 } 1846 }
1847 1847
1848 static inline void netif_tx_start_all_queues(struct net_device *dev) 1848 static inline void netif_tx_start_all_queues(struct net_device *dev)
1849 { 1849 {
1850 unsigned int i; 1850 unsigned int i;
1851 1851
1852 for (i = 0; i < dev->num_tx_queues; i++) { 1852 for (i = 0; i < dev->num_tx_queues; i++) {
1853 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 1853 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
1854 netif_tx_start_queue(txq); 1854 netif_tx_start_queue(txq);
1855 } 1855 }
1856 } 1856 }
1857 1857
1858 static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) 1858 static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
1859 { 1859 {
1860 #ifdef CONFIG_NETPOLL_TRAP 1860 #ifdef CONFIG_NETPOLL_TRAP
1861 if (netpoll_trap()) { 1861 if (netpoll_trap()) {
1862 netif_tx_start_queue(dev_queue); 1862 netif_tx_start_queue(dev_queue);
1863 return; 1863 return;
1864 } 1864 }
1865 #endif 1865 #endif
1866 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) 1866 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state))
1867 __netif_schedule(dev_queue->qdisc); 1867 __netif_schedule(dev_queue->qdisc);
1868 } 1868 }
1869 1869
1870 /** 1870 /**
1871 * netif_wake_queue - restart transmit 1871 * netif_wake_queue - restart transmit
1872 * @dev: network device 1872 * @dev: network device
1873 * 1873 *
1874 * Allow upper layers to call the device hard_start_xmit routine. 1874 * Allow upper layers to call the device hard_start_xmit routine.
1875 * Used for flow control when transmit resources are available. 1875 * Used for flow control when transmit resources are available.
1876 */ 1876 */
1877 static inline void netif_wake_queue(struct net_device *dev) 1877 static inline void netif_wake_queue(struct net_device *dev)
1878 { 1878 {
1879 netif_tx_wake_queue(netdev_get_tx_queue(dev, 0)); 1879 netif_tx_wake_queue(netdev_get_tx_queue(dev, 0));
1880 } 1880 }
1881 1881
1882 static inline void netif_tx_wake_all_queues(struct net_device *dev) 1882 static inline void netif_tx_wake_all_queues(struct net_device *dev)
1883 { 1883 {
1884 unsigned int i; 1884 unsigned int i;
1885 1885
1886 for (i = 0; i < dev->num_tx_queues; i++) { 1886 for (i = 0; i < dev->num_tx_queues; i++) {
1887 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 1887 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
1888 netif_tx_wake_queue(txq); 1888 netif_tx_wake_queue(txq);
1889 } 1889 }
1890 } 1890 }
1891 1891
1892 static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) 1892 static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
1893 { 1893 {
1894 if (WARN_ON(!dev_queue)) { 1894 if (WARN_ON(!dev_queue)) {
1895 pr_info("netif_stop_queue() cannot be called before register_netdev()\n"); 1895 pr_info("netif_stop_queue() cannot be called before register_netdev()\n");
1896 return; 1896 return;
1897 } 1897 }
1898 set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); 1898 set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
1899 } 1899 }
1900 1900
1901 /** 1901 /**
1902 * netif_stop_queue - stop transmitted packets 1902 * netif_stop_queue - stop transmitted packets
1903 * @dev: network device 1903 * @dev: network device
1904 * 1904 *
1905 * Stop upper layers calling the device hard_start_xmit routine. 1905 * Stop upper layers calling the device hard_start_xmit routine.
1906 * Used for flow control when transmit resources are unavailable. 1906 * Used for flow control when transmit resources are unavailable.
1907 */ 1907 */
1908 static inline void netif_stop_queue(struct net_device *dev) 1908 static inline void netif_stop_queue(struct net_device *dev)
1909 { 1909 {
1910 netif_tx_stop_queue(netdev_get_tx_queue(dev, 0)); 1910 netif_tx_stop_queue(netdev_get_tx_queue(dev, 0));
1911 } 1911 }
1912 1912
1913 static inline void netif_tx_stop_all_queues(struct net_device *dev) 1913 static inline void netif_tx_stop_all_queues(struct net_device *dev)
1914 { 1914 {
1915 unsigned int i; 1915 unsigned int i;
1916 1916
1917 for (i = 0; i < dev->num_tx_queues; i++) { 1917 for (i = 0; i < dev->num_tx_queues; i++) {
1918 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 1918 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
1919 netif_tx_stop_queue(txq); 1919 netif_tx_stop_queue(txq);
1920 } 1920 }
1921 } 1921 }
1922 1922
1923 static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue) 1923 static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
1924 { 1924 {
1925 return test_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); 1925 return test_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
1926 } 1926 }
1927 1927
1928 /** 1928 /**
1929 * netif_queue_stopped - test if transmit queue is flowblocked 1929 * netif_queue_stopped - test if transmit queue is flowblocked
1930 * @dev: network device 1930 * @dev: network device
1931 * 1931 *
1932 * Test if transmit queue on device is currently unable to send. 1932 * Test if transmit queue on device is currently unable to send.
1933 */ 1933 */
1934 static inline bool netif_queue_stopped(const struct net_device *dev) 1934 static inline bool netif_queue_stopped(const struct net_device *dev)
1935 { 1935 {
1936 return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0)); 1936 return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0));
1937 } 1937 }
1938 1938
1939 static inline bool netif_xmit_stopped(const struct netdev_queue *dev_queue) 1939 static inline bool netif_xmit_stopped(const struct netdev_queue *dev_queue)
1940 { 1940 {
1941 return dev_queue->state & QUEUE_STATE_ANY_XOFF; 1941 return dev_queue->state & QUEUE_STATE_ANY_XOFF;
1942 } 1942 }
1943 1943
1944 static inline bool netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue) 1944 static inline bool netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue)
1945 { 1945 {
1946 return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN; 1946 return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN;
1947 } 1947 }
1948 1948
1949 static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, 1949 static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
1950 unsigned int bytes) 1950 unsigned int bytes)
1951 { 1951 {
1952 #ifdef CONFIG_BQL 1952 #ifdef CONFIG_BQL
1953 dql_queued(&dev_queue->dql, bytes); 1953 dql_queued(&dev_queue->dql, bytes);
1954 1954
1955 if (likely(dql_avail(&dev_queue->dql) >= 0)) 1955 if (likely(dql_avail(&dev_queue->dql) >= 0))
1956 return; 1956 return;
1957 1957
1958 set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); 1958 set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state);
1959 1959
1960 /* 1960 /*
1961 * The XOFF flag must be set before checking the dql_avail below, 1961 * The XOFF flag must be set before checking the dql_avail below,
1962 * because in netdev_tx_completed_queue we update the dql_completed 1962 * because in netdev_tx_completed_queue we update the dql_completed
1963 * before checking the XOFF flag. 1963 * before checking the XOFF flag.
1964 */ 1964 */
1965 smp_mb(); 1965 smp_mb();
1966 1966
1967 /* check again in case another CPU has just made room avail */ 1967 /* check again in case another CPU has just made room avail */
1968 if (unlikely(dql_avail(&dev_queue->dql) >= 0)) 1968 if (unlikely(dql_avail(&dev_queue->dql) >= 0))
1969 clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); 1969 clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state);
1970 #endif 1970 #endif
1971 } 1971 }
1972 1972
1973 static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes) 1973 static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes)
1974 { 1974 {
1975 netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes); 1975 netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes);
1976 } 1976 }
1977 1977
1978 static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue, 1978 static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
1979 unsigned int pkts, unsigned int bytes) 1979 unsigned int pkts, unsigned int bytes)
1980 { 1980 {
1981 #ifdef CONFIG_BQL 1981 #ifdef CONFIG_BQL
1982 if (unlikely(!bytes)) 1982 if (unlikely(!bytes))
1983 return; 1983 return;
1984 1984
1985 dql_completed(&dev_queue->dql, bytes); 1985 dql_completed(&dev_queue->dql, bytes);
1986 1986
1987 /* 1987 /*
1988 * Without the memory barrier there is a small possiblity that 1988 * Without the memory barrier there is a small possiblity that
1989 * netdev_tx_sent_queue will miss the update and cause the queue to 1989 * netdev_tx_sent_queue will miss the update and cause the queue to
1990 * be stopped forever 1990 * be stopped forever
1991 */ 1991 */
1992 smp_mb(); 1992 smp_mb();
1993 1993
1994 if (dql_avail(&dev_queue->dql) < 0) 1994 if (dql_avail(&dev_queue->dql) < 0)
1995 return; 1995 return;
1996 1996
1997 if (test_and_clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state)) 1997 if (test_and_clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state))
1998 netif_schedule_queue(dev_queue); 1998 netif_schedule_queue(dev_queue);
1999 #endif 1999 #endif
2000 } 2000 }
2001 2001
2002 static inline void netdev_completed_queue(struct net_device *dev, 2002 static inline void netdev_completed_queue(struct net_device *dev,
2003 unsigned int pkts, unsigned int bytes) 2003 unsigned int pkts, unsigned int bytes)
2004 { 2004 {
2005 netdev_tx_completed_queue(netdev_get_tx_queue(dev, 0), pkts, bytes); 2005 netdev_tx_completed_queue(netdev_get_tx_queue(dev, 0), pkts, bytes);
2006 } 2006 }
2007 2007
2008 static inline void netdev_tx_reset_queue(struct netdev_queue *q) 2008 static inline void netdev_tx_reset_queue(struct netdev_queue *q)
2009 { 2009 {
2010 #ifdef CONFIG_BQL 2010 #ifdef CONFIG_BQL
2011 clear_bit(__QUEUE_STATE_STACK_XOFF, &q->state); 2011 clear_bit(__QUEUE_STATE_STACK_XOFF, &q->state);
2012 dql_reset(&q->dql); 2012 dql_reset(&q->dql);
2013 #endif 2013 #endif
2014 } 2014 }
2015 2015
2016 static inline void netdev_reset_queue(struct net_device *dev_queue) 2016 static inline void netdev_reset_queue(struct net_device *dev_queue)
2017 { 2017 {
2018 netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0)); 2018 netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0));
2019 } 2019 }
2020 2020
2021 /** 2021 /**
2022 * netif_running - test if up 2022 * netif_running - test if up
2023 * @dev: network device 2023 * @dev: network device
2024 * 2024 *
2025 * Test if the device has been brought up. 2025 * Test if the device has been brought up.
2026 */ 2026 */
2027 static inline bool netif_running(const struct net_device *dev) 2027 static inline bool netif_running(const struct net_device *dev)
2028 { 2028 {
2029 return test_bit(__LINK_STATE_START, &dev->state); 2029 return test_bit(__LINK_STATE_START, &dev->state);
2030 } 2030 }
2031 2031
2032 /* 2032 /*
2033 * Routines to manage the subqueues on a device. We only need start 2033 * Routines to manage the subqueues on a device. We only need start
2034 * stop, and a check if it's stopped. All other device management is 2034 * stop, and a check if it's stopped. All other device management is
2035 * done at the overall netdevice level. 2035 * done at the overall netdevice level.
2036 * Also test the device if we're multiqueue. 2036 * Also test the device if we're multiqueue.
2037 */ 2037 */
2038 2038
2039 /** 2039 /**
2040 * netif_start_subqueue - allow sending packets on subqueue 2040 * netif_start_subqueue - allow sending packets on subqueue
2041 * @dev: network device 2041 * @dev: network device
2042 * @queue_index: sub queue index 2042 * @queue_index: sub queue index
2043 * 2043 *
2044 * Start individual transmit queue of a device with multiple transmit queues. 2044 * Start individual transmit queue of a device with multiple transmit queues.
2045 */ 2045 */
2046 static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) 2046 static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index)
2047 { 2047 {
2048 struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); 2048 struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
2049 2049
2050 netif_tx_start_queue(txq); 2050 netif_tx_start_queue(txq);
2051 } 2051 }
2052 2052
2053 /** 2053 /**
2054 * netif_stop_subqueue - stop sending packets on subqueue 2054 * netif_stop_subqueue - stop sending packets on subqueue
2055 * @dev: network device 2055 * @dev: network device
2056 * @queue_index: sub queue index 2056 * @queue_index: sub queue index
2057 * 2057 *
2058 * Stop individual transmit queue of a device with multiple transmit queues. 2058 * Stop individual transmit queue of a device with multiple transmit queues.
2059 */ 2059 */
2060 static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) 2060 static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
2061 { 2061 {
2062 struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); 2062 struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
2063 #ifdef CONFIG_NETPOLL_TRAP 2063 #ifdef CONFIG_NETPOLL_TRAP
2064 if (netpoll_trap()) 2064 if (netpoll_trap())
2065 return; 2065 return;
2066 #endif 2066 #endif
2067 netif_tx_stop_queue(txq); 2067 netif_tx_stop_queue(txq);
2068 } 2068 }
2069 2069
2070 /** 2070 /**
2071 * netif_subqueue_stopped - test status of subqueue 2071 * netif_subqueue_stopped - test status of subqueue
2072 * @dev: network device 2072 * @dev: network device
2073 * @queue_index: sub queue index 2073 * @queue_index: sub queue index
2074 * 2074 *
2075 * Check individual transmit queue of a device with multiple transmit queues. 2075 * Check individual transmit queue of a device with multiple transmit queues.
2076 */ 2076 */
2077 static inline bool __netif_subqueue_stopped(const struct net_device *dev, 2077 static inline bool __netif_subqueue_stopped(const struct net_device *dev,
2078 u16 queue_index) 2078 u16 queue_index)
2079 { 2079 {
2080 struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); 2080 struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
2081 2081
2082 return netif_tx_queue_stopped(txq); 2082 return netif_tx_queue_stopped(txq);
2083 } 2083 }
2084 2084
2085 static inline bool netif_subqueue_stopped(const struct net_device *dev, 2085 static inline bool netif_subqueue_stopped(const struct net_device *dev,
2086 struct sk_buff *skb) 2086 struct sk_buff *skb)
2087 { 2087 {
2088 return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb)); 2088 return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb));
2089 } 2089 }
2090 2090
2091 /** 2091 /**
2092 * netif_wake_subqueue - allow sending packets on subqueue 2092 * netif_wake_subqueue - allow sending packets on subqueue
2093 * @dev: network device 2093 * @dev: network device
2094 * @queue_index: sub queue index 2094 * @queue_index: sub queue index
2095 * 2095 *
2096 * Resume individual transmit queue of a device with multiple transmit queues. 2096 * Resume individual transmit queue of a device with multiple transmit queues.
2097 */ 2097 */
2098 static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) 2098 static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
2099 { 2099 {
2100 struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); 2100 struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
2101 #ifdef CONFIG_NETPOLL_TRAP 2101 #ifdef CONFIG_NETPOLL_TRAP
2102 if (netpoll_trap()) 2102 if (netpoll_trap())
2103 return; 2103 return;
2104 #endif 2104 #endif
2105 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) 2105 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state))
2106 __netif_schedule(txq->qdisc); 2106 __netif_schedule(txq->qdisc);
2107 } 2107 }
2108 2108
2109 #ifdef CONFIG_XPS 2109 #ifdef CONFIG_XPS
2110 extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, 2110 extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask,
2111 u16 index); 2111 u16 index);
2112 #else 2112 #else
2113 static inline int netif_set_xps_queue(struct net_device *dev, 2113 static inline int netif_set_xps_queue(struct net_device *dev,
2114 struct cpumask *mask, 2114 struct cpumask *mask,
2115 u16 index) 2115 u16 index)
2116 { 2116 {
2117 return 0; 2117 return 0;
2118 } 2118 }
2119 #endif 2119 #endif
2120 2120
2121 /* 2121 /*
2122 * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used 2122 * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used
2123 * as a distribution range limit for the returned value. 2123 * as a distribution range limit for the returned value.
2124 */ 2124 */
2125 static inline u16 skb_tx_hash(const struct net_device *dev, 2125 static inline u16 skb_tx_hash(const struct net_device *dev,
2126 const struct sk_buff *skb) 2126 const struct sk_buff *skb)
2127 { 2127 {
2128 return __skb_tx_hash(dev, skb, dev->real_num_tx_queues); 2128 return __skb_tx_hash(dev, skb, dev->real_num_tx_queues);
2129 } 2129 }
2130 2130
2131 /** 2131 /**
2132 * netif_is_multiqueue - test if device has multiple transmit queues 2132 * netif_is_multiqueue - test if device has multiple transmit queues
2133 * @dev: network device 2133 * @dev: network device
2134 * 2134 *
2135 * Check if device has multiple transmit queues 2135 * Check if device has multiple transmit queues
2136 */ 2136 */
2137 static inline bool netif_is_multiqueue(const struct net_device *dev) 2137 static inline bool netif_is_multiqueue(const struct net_device *dev)
2138 { 2138 {
2139 return dev->num_tx_queues > 1; 2139 return dev->num_tx_queues > 1;
2140 } 2140 }
2141 2141
2142 extern int netif_set_real_num_tx_queues(struct net_device *dev, 2142 extern int netif_set_real_num_tx_queues(struct net_device *dev,
2143 unsigned int txq); 2143 unsigned int txq);
2144 2144
2145 #ifdef CONFIG_RPS 2145 #ifdef CONFIG_RPS
2146 extern int netif_set_real_num_rx_queues(struct net_device *dev, 2146 extern int netif_set_real_num_rx_queues(struct net_device *dev,
2147 unsigned int rxq); 2147 unsigned int rxq);
2148 #else 2148 #else
2149 static inline int netif_set_real_num_rx_queues(struct net_device *dev, 2149 static inline int netif_set_real_num_rx_queues(struct net_device *dev,
2150 unsigned int rxq) 2150 unsigned int rxq)
2151 { 2151 {
2152 return 0; 2152 return 0;
2153 } 2153 }
2154 #endif 2154 #endif
2155 2155
2156 static inline int netif_copy_real_num_queues(struct net_device *to_dev, 2156 static inline int netif_copy_real_num_queues(struct net_device *to_dev,
2157 const struct net_device *from_dev) 2157 const struct net_device *from_dev)
2158 { 2158 {
2159 int err; 2159 int err;
2160 2160
2161 err = netif_set_real_num_tx_queues(to_dev, 2161 err = netif_set_real_num_tx_queues(to_dev,
2162 from_dev->real_num_tx_queues); 2162 from_dev->real_num_tx_queues);
2163 if (err) 2163 if (err)
2164 return err; 2164 return err;
2165 #ifdef CONFIG_RPS 2165 #ifdef CONFIG_RPS
2166 return netif_set_real_num_rx_queues(to_dev, 2166 return netif_set_real_num_rx_queues(to_dev,
2167 from_dev->real_num_rx_queues); 2167 from_dev->real_num_rx_queues);
2168 #else 2168 #else
2169 return 0; 2169 return 0;
2170 #endif 2170 #endif
2171 } 2171 }
2172 2172
2173 #define DEFAULT_MAX_NUM_RSS_QUEUES (8) 2173 #define DEFAULT_MAX_NUM_RSS_QUEUES (8)
2174 extern int netif_get_num_default_rss_queues(void); 2174 extern int netif_get_num_default_rss_queues(void);
2175 2175
2176 /* Use this variant when it is known for sure that it 2176 /* Use this variant when it is known for sure that it
2177 * is executing from hardware interrupt context or with hardware interrupts 2177 * is executing from hardware interrupt context or with hardware interrupts
2178 * disabled. 2178 * disabled.
2179 */ 2179 */
2180 extern void dev_kfree_skb_irq(struct sk_buff *skb); 2180 extern void dev_kfree_skb_irq(struct sk_buff *skb);
2181 2181
2182 /* Use this variant in places where it could be invoked 2182 /* Use this variant in places where it could be invoked
2183 * from either hardware interrupt or other context, with hardware interrupts 2183 * from either hardware interrupt or other context, with hardware interrupts
2184 * either disabled or enabled. 2184 * either disabled or enabled.
2185 */ 2185 */
2186 extern void dev_kfree_skb_any(struct sk_buff *skb); 2186 extern void dev_kfree_skb_any(struct sk_buff *skb);
2187 2187
2188 extern int netif_rx(struct sk_buff *skb); 2188 extern int netif_rx(struct sk_buff *skb);
2189 extern int netif_rx_ni(struct sk_buff *skb); 2189 extern int netif_rx_ni(struct sk_buff *skb);
2190 extern int netif_receive_skb(struct sk_buff *skb); 2190 extern int netif_receive_skb(struct sk_buff *skb);
2191 extern gro_result_t napi_gro_receive(struct napi_struct *napi, 2191 extern gro_result_t napi_gro_receive(struct napi_struct *napi,
2192 struct sk_buff *skb); 2192 struct sk_buff *skb);
2193 extern void napi_gro_flush(struct napi_struct *napi, bool flush_old); 2193 extern void napi_gro_flush(struct napi_struct *napi, bool flush_old);
2194 extern struct sk_buff * napi_get_frags(struct napi_struct *napi); 2194 extern struct sk_buff * napi_get_frags(struct napi_struct *napi);
2195 extern gro_result_t napi_gro_frags(struct napi_struct *napi); 2195 extern gro_result_t napi_gro_frags(struct napi_struct *napi);
2196 2196
2197 static inline void napi_free_frags(struct napi_struct *napi) 2197 static inline void napi_free_frags(struct napi_struct *napi)
2198 { 2198 {
2199 kfree_skb(napi->skb); 2199 kfree_skb(napi->skb);
2200 napi->skb = NULL; 2200 napi->skb = NULL;
2201 } 2201 }
2202 2202
2203 extern int netdev_rx_handler_register(struct net_device *dev, 2203 extern int netdev_rx_handler_register(struct net_device *dev,
2204 rx_handler_func_t *rx_handler, 2204 rx_handler_func_t *rx_handler,
2205 void *rx_handler_data); 2205 void *rx_handler_data);
2206 extern void netdev_rx_handler_unregister(struct net_device *dev); 2206 extern void netdev_rx_handler_unregister(struct net_device *dev);
2207 2207
2208 extern bool dev_valid_name(const char *name); 2208 extern bool dev_valid_name(const char *name);
2209 extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); 2209 extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
2210 extern int dev_ethtool(struct net *net, struct ifreq *); 2210 extern int dev_ethtool(struct net *net, struct ifreq *);
2211 extern unsigned int dev_get_flags(const struct net_device *); 2211 extern unsigned int dev_get_flags(const struct net_device *);
2212 extern int __dev_change_flags(struct net_device *, unsigned int flags); 2212 extern int __dev_change_flags(struct net_device *, unsigned int flags);
2213 extern int dev_change_flags(struct net_device *, unsigned int); 2213 extern int dev_change_flags(struct net_device *, unsigned int);
2214 extern void __dev_notify_flags(struct net_device *, unsigned int old_flags); 2214 extern void __dev_notify_flags(struct net_device *, unsigned int old_flags);
2215 extern int dev_change_name(struct net_device *, const char *); 2215 extern int dev_change_name(struct net_device *, const char *);
2216 extern int dev_set_alias(struct net_device *, const char *, size_t); 2216 extern int dev_set_alias(struct net_device *, const char *, size_t);
2217 extern int dev_change_net_namespace(struct net_device *, 2217 extern int dev_change_net_namespace(struct net_device *,
2218 struct net *, const char *); 2218 struct net *, const char *);
2219 extern int dev_set_mtu(struct net_device *, int); 2219 extern int dev_set_mtu(struct net_device *, int);
2220 extern void dev_set_group(struct net_device *, int); 2220 extern void dev_set_group(struct net_device *, int);
2221 extern int dev_set_mac_address(struct net_device *, 2221 extern int dev_set_mac_address(struct net_device *,
2222 struct sockaddr *); 2222 struct sockaddr *);
2223 extern int dev_change_carrier(struct net_device *, 2223 extern int dev_change_carrier(struct net_device *,
2224 bool new_carrier); 2224 bool new_carrier);
2225 extern int dev_hard_start_xmit(struct sk_buff *skb, 2225 extern int dev_hard_start_xmit(struct sk_buff *skb,
2226 struct net_device *dev, 2226 struct net_device *dev,
2227 struct netdev_queue *txq); 2227 struct netdev_queue *txq);
2228 extern int dev_forward_skb(struct net_device *dev, 2228 extern int dev_forward_skb(struct net_device *dev,
2229 struct sk_buff *skb); 2229 struct sk_buff *skb);
2230 2230
2231 extern int netdev_budget; 2231 extern int netdev_budget;
2232 2232
2233 /* Called by rtnetlink.c:rtnl_unlock() */ 2233 /* Called by rtnetlink.c:rtnl_unlock() */
2234 extern void netdev_run_todo(void); 2234 extern void netdev_run_todo(void);
2235 2235
2236 /** 2236 /**
2237 * dev_put - release reference to device 2237 * dev_put - release reference to device
2238 * @dev: network device 2238 * @dev: network device
2239 * 2239 *
2240 * Release reference to device to allow it to be freed. 2240 * Release reference to device to allow it to be freed.
2241 */ 2241 */
2242 static inline void dev_put(struct net_device *dev) 2242 static inline void dev_put(struct net_device *dev)
2243 { 2243 {
2244 this_cpu_dec(*dev->pcpu_refcnt); 2244 this_cpu_dec(*dev->pcpu_refcnt);
2245 } 2245 }
2246 2246
2247 /** 2247 /**
2248 * dev_hold - get reference to device 2248 * dev_hold - get reference to device
2249 * @dev: network device 2249 * @dev: network device
2250 * 2250 *
2251 * Hold reference to device to keep it from being freed. 2251 * Hold reference to device to keep it from being freed.
2252 */ 2252 */
2253 static inline void dev_hold(struct net_device *dev) 2253 static inline void dev_hold(struct net_device *dev)
2254 { 2254 {
2255 this_cpu_inc(*dev->pcpu_refcnt); 2255 this_cpu_inc(*dev->pcpu_refcnt);
2256 } 2256 }
2257 2257
2258 /* Carrier loss detection, dial on demand. The functions netif_carrier_on 2258 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
2259 * and _off may be called from IRQ context, but it is caller 2259 * and _off may be called from IRQ context, but it is caller
2260 * who is responsible for serialization of these calls. 2260 * who is responsible for serialization of these calls.
2261 * 2261 *
2262 * The name carrier is inappropriate, these functions should really be 2262 * The name carrier is inappropriate, these functions should really be
2263 * called netif_lowerlayer_*() because they represent the state of any 2263 * called netif_lowerlayer_*() because they represent the state of any
2264 * kind of lower layer not just hardware media. 2264 * kind of lower layer not just hardware media.
2265 */ 2265 */
2266 2266
2267 extern void linkwatch_init_dev(struct net_device *dev); 2267 extern void linkwatch_init_dev(struct net_device *dev);
2268 extern void linkwatch_fire_event(struct net_device *dev); 2268 extern void linkwatch_fire_event(struct net_device *dev);
2269 extern void linkwatch_forget_dev(struct net_device *dev); 2269 extern void linkwatch_forget_dev(struct net_device *dev);
2270 2270
2271 /** 2271 /**
2272 * netif_carrier_ok - test if carrier present 2272 * netif_carrier_ok - test if carrier present
2273 * @dev: network device 2273 * @dev: network device
2274 * 2274 *
2275 * Check if carrier is present on device 2275 * Check if carrier is present on device
2276 */ 2276 */
2277 static inline bool netif_carrier_ok(const struct net_device *dev) 2277 static inline bool netif_carrier_ok(const struct net_device *dev)
2278 { 2278 {
2279 return !test_bit(__LINK_STATE_NOCARRIER, &dev->state); 2279 return !test_bit(__LINK_STATE_NOCARRIER, &dev->state);
2280 } 2280 }
2281 2281
2282 extern unsigned long dev_trans_start(struct net_device *dev); 2282 extern unsigned long dev_trans_start(struct net_device *dev);
2283 2283
2284 extern void __netdev_watchdog_up(struct net_device *dev); 2284 extern void __netdev_watchdog_up(struct net_device *dev);
2285 2285
2286 extern void netif_carrier_on(struct net_device *dev); 2286 extern void netif_carrier_on(struct net_device *dev);
2287 2287
2288 extern void netif_carrier_off(struct net_device *dev); 2288 extern void netif_carrier_off(struct net_device *dev);
2289 2289
2290 /** 2290 /**
2291 * netif_dormant_on - mark device as dormant. 2291 * netif_dormant_on - mark device as dormant.
2292 * @dev: network device 2292 * @dev: network device
2293 * 2293 *
2294 * Mark device as dormant (as per RFC2863). 2294 * Mark device as dormant (as per RFC2863).
2295 * 2295 *
2296 * The dormant state indicates that the relevant interface is not 2296 * The dormant state indicates that the relevant interface is not
2297 * actually in a condition to pass packets (i.e., it is not 'up') but is 2297 * actually in a condition to pass packets (i.e., it is not 'up') but is
2298 * in a "pending" state, waiting for some external event. For "on- 2298 * in a "pending" state, waiting for some external event. For "on-
2299 * demand" interfaces, this new state identifies the situation where the 2299 * demand" interfaces, this new state identifies the situation where the
2300 * interface is waiting for events to place it in the up state. 2300 * interface is waiting for events to place it in the up state.
2301 * 2301 *
2302 */ 2302 */
2303 static inline void netif_dormant_on(struct net_device *dev) 2303 static inline void netif_dormant_on(struct net_device *dev)
2304 { 2304 {
2305 if (!test_and_set_bit(__LINK_STATE_DORMANT, &dev->state)) 2305 if (!test_and_set_bit(__LINK_STATE_DORMANT, &dev->state))
2306 linkwatch_fire_event(dev); 2306 linkwatch_fire_event(dev);
2307 } 2307 }
2308 2308
2309 /** 2309 /**
2310 * netif_dormant_off - set device as not dormant. 2310 * netif_dormant_off - set device as not dormant.
2311 * @dev: network device 2311 * @dev: network device
2312 * 2312 *
2313 * Device is not in dormant state. 2313 * Device is not in dormant state.
2314 */ 2314 */
2315 static inline void netif_dormant_off(struct net_device *dev) 2315 static inline void netif_dormant_off(struct net_device *dev)
2316 { 2316 {
2317 if (test_and_clear_bit(__LINK_STATE_DORMANT, &dev->state)) 2317 if (test_and_clear_bit(__LINK_STATE_DORMANT, &dev->state))
2318 linkwatch_fire_event(dev); 2318 linkwatch_fire_event(dev);
2319 } 2319 }
2320 2320
2321 /** 2321 /**
2322 * netif_dormant - test if carrier present 2322 * netif_dormant - test if carrier present
2323 * @dev: network device 2323 * @dev: network device
2324 * 2324 *
2325 * Check if carrier is present on device 2325 * Check if carrier is present on device
2326 */ 2326 */
2327 static inline bool netif_dormant(const struct net_device *dev) 2327 static inline bool netif_dormant(const struct net_device *dev)
2328 { 2328 {
2329 return test_bit(__LINK_STATE_DORMANT, &dev->state); 2329 return test_bit(__LINK_STATE_DORMANT, &dev->state);
2330 } 2330 }
2331 2331
2332 2332
2333 /** 2333 /**
2334 * netif_oper_up - test if device is operational 2334 * netif_oper_up - test if device is operational
2335 * @dev: network device 2335 * @dev: network device
2336 * 2336 *
2337 * Check if carrier is operational 2337 * Check if carrier is operational
2338 */ 2338 */
2339 static inline bool netif_oper_up(const struct net_device *dev) 2339 static inline bool netif_oper_up(const struct net_device *dev)
2340 { 2340 {
2341 return (dev->operstate == IF_OPER_UP || 2341 return (dev->operstate == IF_OPER_UP ||
2342 dev->operstate == IF_OPER_UNKNOWN /* backward compat */); 2342 dev->operstate == IF_OPER_UNKNOWN /* backward compat */);
2343 } 2343 }
2344 2344
2345 /** 2345 /**
2346 * netif_device_present - is device available or removed 2346 * netif_device_present - is device available or removed
2347 * @dev: network device 2347 * @dev: network device
2348 * 2348 *
2349 * Check if device has not been removed from system. 2349 * Check if device has not been removed from system.
2350 */ 2350 */
2351 static inline bool netif_device_present(struct net_device *dev) 2351 static inline bool netif_device_present(struct net_device *dev)
2352 { 2352 {
2353 return test_bit(__LINK_STATE_PRESENT, &dev->state); 2353 return test_bit(__LINK_STATE_PRESENT, &dev->state);
2354 } 2354 }
2355 2355
2356 extern void netif_device_detach(struct net_device *dev); 2356 extern void netif_device_detach(struct net_device *dev);
2357 2357
2358 extern void netif_device_attach(struct net_device *dev); 2358 extern void netif_device_attach(struct net_device *dev);
2359 2359
2360 /* 2360 /*
2361 * Network interface message level settings 2361 * Network interface message level settings
2362 */ 2362 */
2363 2363
2364 enum { 2364 enum {
2365 NETIF_MSG_DRV = 0x0001, 2365 NETIF_MSG_DRV = 0x0001,
2366 NETIF_MSG_PROBE = 0x0002, 2366 NETIF_MSG_PROBE = 0x0002,
2367 NETIF_MSG_LINK = 0x0004, 2367 NETIF_MSG_LINK = 0x0004,
2368 NETIF_MSG_TIMER = 0x0008, 2368 NETIF_MSG_TIMER = 0x0008,
2369 NETIF_MSG_IFDOWN = 0x0010, 2369 NETIF_MSG_IFDOWN = 0x0010,
2370 NETIF_MSG_IFUP = 0x0020, 2370 NETIF_MSG_IFUP = 0x0020,
2371 NETIF_MSG_RX_ERR = 0x0040, 2371 NETIF_MSG_RX_ERR = 0x0040,
2372 NETIF_MSG_TX_ERR = 0x0080, 2372 NETIF_MSG_TX_ERR = 0x0080,
2373 NETIF_MSG_TX_QUEUED = 0x0100, 2373 NETIF_MSG_TX_QUEUED = 0x0100,
2374 NETIF_MSG_INTR = 0x0200, 2374 NETIF_MSG_INTR = 0x0200,
2375 NETIF_MSG_TX_DONE = 0x0400, 2375 NETIF_MSG_TX_DONE = 0x0400,
2376 NETIF_MSG_RX_STATUS = 0x0800, 2376 NETIF_MSG_RX_STATUS = 0x0800,
2377 NETIF_MSG_PKTDATA = 0x1000, 2377 NETIF_MSG_PKTDATA = 0x1000,
2378 NETIF_MSG_HW = 0x2000, 2378 NETIF_MSG_HW = 0x2000,
2379 NETIF_MSG_WOL = 0x4000, 2379 NETIF_MSG_WOL = 0x4000,
2380 }; 2380 };
2381 2381
2382 #define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV) 2382 #define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV)
2383 #define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE) 2383 #define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE)
2384 #define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK) 2384 #define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK)
2385 #define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER) 2385 #define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER)
2386 #define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN) 2386 #define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN)
2387 #define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP) 2387 #define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP)
2388 #define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR) 2388 #define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR)
2389 #define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR) 2389 #define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR)
2390 #define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED) 2390 #define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED)
2391 #define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR) 2391 #define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR)
2392 #define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE) 2392 #define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE)
2393 #define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS) 2393 #define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS)
2394 #define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA) 2394 #define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA)
2395 #define netif_msg_hw(p) ((p)->msg_enable & NETIF_MSG_HW) 2395 #define netif_msg_hw(p) ((p)->msg_enable & NETIF_MSG_HW)
2396 #define netif_msg_wol(p) ((p)->msg_enable & NETIF_MSG_WOL) 2396 #define netif_msg_wol(p) ((p)->msg_enable & NETIF_MSG_WOL)
2397 2397
2398 static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) 2398 static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
2399 { 2399 {
2400 /* use default */ 2400 /* use default */
2401 if (debug_value < 0 || debug_value >= (sizeof(u32) * 8)) 2401 if (debug_value < 0 || debug_value >= (sizeof(u32) * 8))
2402 return default_msg_enable_bits; 2402 return default_msg_enable_bits;
2403 if (debug_value == 0) /* no output */ 2403 if (debug_value == 0) /* no output */
2404 return 0; 2404 return 0;
2405 /* set low N bits */ 2405 /* set low N bits */
2406 return (1 << debug_value) - 1; 2406 return (1 << debug_value) - 1;
2407 } 2407 }
2408 2408
2409 static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) 2409 static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
2410 { 2410 {
2411 spin_lock(&txq->_xmit_lock); 2411 spin_lock(&txq->_xmit_lock);
2412 txq->xmit_lock_owner = cpu; 2412 txq->xmit_lock_owner = cpu;
2413 } 2413 }
2414 2414
2415 static inline void __netif_tx_lock_bh(struct netdev_queue *txq) 2415 static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
2416 { 2416 {
2417 spin_lock_bh(&txq->_xmit_lock); 2417 spin_lock_bh(&txq->_xmit_lock);
2418 txq->xmit_lock_owner = smp_processor_id(); 2418 txq->xmit_lock_owner = smp_processor_id();
2419 } 2419 }
2420 2420
2421 static inline bool __netif_tx_trylock(struct netdev_queue *txq) 2421 static inline bool __netif_tx_trylock(struct netdev_queue *txq)
2422 { 2422 {
2423 bool ok = spin_trylock(&txq->_xmit_lock); 2423 bool ok = spin_trylock(&txq->_xmit_lock);
2424 if (likely(ok)) 2424 if (likely(ok))
2425 txq->xmit_lock_owner = smp_processor_id(); 2425 txq->xmit_lock_owner = smp_processor_id();
2426 return ok; 2426 return ok;
2427 } 2427 }
2428 2428
2429 static inline void __netif_tx_unlock(struct netdev_queue *txq) 2429 static inline void __netif_tx_unlock(struct netdev_queue *txq)
2430 { 2430 {
2431 txq->xmit_lock_owner = -1; 2431 txq->xmit_lock_owner = -1;
2432 spin_unlock(&txq->_xmit_lock); 2432 spin_unlock(&txq->_xmit_lock);
2433 } 2433 }
2434 2434
2435 static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) 2435 static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
2436 { 2436 {
2437 txq->xmit_lock_owner = -1; 2437 txq->xmit_lock_owner = -1;
2438 spin_unlock_bh(&txq->_xmit_lock); 2438 spin_unlock_bh(&txq->_xmit_lock);
2439 } 2439 }
2440 2440
2441 static inline void txq_trans_update(struct netdev_queue *txq) 2441 static inline void txq_trans_update(struct netdev_queue *txq)
2442 { 2442 {
2443 if (txq->xmit_lock_owner != -1) 2443 if (txq->xmit_lock_owner != -1)
2444 txq->trans_start = jiffies; 2444 txq->trans_start = jiffies;
2445 } 2445 }
2446 2446
2447 /** 2447 /**
2448 * netif_tx_lock - grab network device transmit lock 2448 * netif_tx_lock - grab network device transmit lock
2449 * @dev: network device 2449 * @dev: network device
2450 * 2450 *
2451 * Get network device transmit lock 2451 * Get network device transmit lock
2452 */ 2452 */
2453 static inline void netif_tx_lock(struct net_device *dev) 2453 static inline void netif_tx_lock(struct net_device *dev)
2454 { 2454 {
2455 unsigned int i; 2455 unsigned int i;
2456 int cpu; 2456 int cpu;
2457 2457
2458 spin_lock(&dev->tx_global_lock); 2458 spin_lock(&dev->tx_global_lock);
2459 cpu = smp_processor_id(); 2459 cpu = smp_processor_id();
2460 for (i = 0; i < dev->num_tx_queues; i++) { 2460 for (i = 0; i < dev->num_tx_queues; i++) {
2461 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 2461 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
2462 2462
2463 /* We are the only thread of execution doing a 2463 /* We are the only thread of execution doing a
2464 * freeze, but we have to grab the _xmit_lock in 2464 * freeze, but we have to grab the _xmit_lock in
2465 * order to synchronize with threads which are in 2465 * order to synchronize with threads which are in
2466 * the ->hard_start_xmit() handler and already 2466 * the ->hard_start_xmit() handler and already
2467 * checked the frozen bit. 2467 * checked the frozen bit.
2468 */ 2468 */
2469 __netif_tx_lock(txq, cpu); 2469 __netif_tx_lock(txq, cpu);
2470 set_bit(__QUEUE_STATE_FROZEN, &txq->state); 2470 set_bit(__QUEUE_STATE_FROZEN, &txq->state);
2471 __netif_tx_unlock(txq); 2471 __netif_tx_unlock(txq);
2472 } 2472 }
2473 } 2473 }
2474 2474
2475 static inline void netif_tx_lock_bh(struct net_device *dev) 2475 static inline void netif_tx_lock_bh(struct net_device *dev)
2476 { 2476 {
2477 local_bh_disable(); 2477 local_bh_disable();
2478 netif_tx_lock(dev); 2478 netif_tx_lock(dev);
2479 } 2479 }
2480 2480
2481 static inline void netif_tx_unlock(struct net_device *dev) 2481 static inline void netif_tx_unlock(struct net_device *dev)
2482 { 2482 {
2483 unsigned int i; 2483 unsigned int i;
2484 2484
2485 for (i = 0; i < dev->num_tx_queues; i++) { 2485 for (i = 0; i < dev->num_tx_queues; i++) {
2486 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 2486 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
2487 2487
2488 /* No need to grab the _xmit_lock here. If the 2488 /* No need to grab the _xmit_lock here. If the
2489 * queue is not stopped for another reason, we 2489 * queue is not stopped for another reason, we
2490 * force a schedule. 2490 * force a schedule.
2491 */ 2491 */
2492 clear_bit(__QUEUE_STATE_FROZEN, &txq->state); 2492 clear_bit(__QUEUE_STATE_FROZEN, &txq->state);
2493 netif_schedule_queue(txq); 2493 netif_schedule_queue(txq);
2494 } 2494 }
2495 spin_unlock(&dev->tx_global_lock); 2495 spin_unlock(&dev->tx_global_lock);
2496 } 2496 }
2497 2497
2498 static inline void netif_tx_unlock_bh(struct net_device *dev) 2498 static inline void netif_tx_unlock_bh(struct net_device *dev)
2499 { 2499 {
2500 netif_tx_unlock(dev); 2500 netif_tx_unlock(dev);
2501 local_bh_enable(); 2501 local_bh_enable();
2502 } 2502 }
2503 2503
2504 #define HARD_TX_LOCK(dev, txq, cpu) { \ 2504 #define HARD_TX_LOCK(dev, txq, cpu) { \
2505 if ((dev->features & NETIF_F_LLTX) == 0) { \ 2505 if ((dev->features & NETIF_F_LLTX) == 0) { \
2506 __netif_tx_lock(txq, cpu); \ 2506 __netif_tx_lock(txq, cpu); \
2507 } \ 2507 } \
2508 } 2508 }
2509 2509
2510 #define HARD_TX_UNLOCK(dev, txq) { \ 2510 #define HARD_TX_UNLOCK(dev, txq) { \
2511 if ((dev->features & NETIF_F_LLTX) == 0) { \ 2511 if ((dev->features & NETIF_F_LLTX) == 0) { \
2512 __netif_tx_unlock(txq); \ 2512 __netif_tx_unlock(txq); \
2513 } \ 2513 } \
2514 } 2514 }
2515 2515
2516 static inline void netif_tx_disable(struct net_device *dev) 2516 static inline void netif_tx_disable(struct net_device *dev)
2517 { 2517 {
2518 unsigned int i; 2518 unsigned int i;
2519 int cpu; 2519 int cpu;
2520 2520
2521 local_bh_disable(); 2521 local_bh_disable();
2522 cpu = smp_processor_id(); 2522 cpu = smp_processor_id();
2523 for (i = 0; i < dev->num_tx_queues; i++) { 2523 for (i = 0; i < dev->num_tx_queues; i++) {
2524 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 2524 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
2525 2525
2526 __netif_tx_lock(txq, cpu); 2526 __netif_tx_lock(txq, cpu);
2527 netif_tx_stop_queue(txq); 2527 netif_tx_stop_queue(txq);
2528 __netif_tx_unlock(txq); 2528 __netif_tx_unlock(txq);
2529 } 2529 }
2530 local_bh_enable(); 2530 local_bh_enable();
2531 } 2531 }
2532 2532
2533 static inline void netif_addr_lock(struct net_device *dev) 2533 static inline void netif_addr_lock(struct net_device *dev)
2534 { 2534 {
2535 spin_lock(&dev->addr_list_lock); 2535 spin_lock(&dev->addr_list_lock);
2536 } 2536 }
2537 2537
2538 static inline void netif_addr_lock_nested(struct net_device *dev) 2538 static inline void netif_addr_lock_nested(struct net_device *dev)
2539 { 2539 {
2540 spin_lock_nested(&dev->addr_list_lock, SINGLE_DEPTH_NESTING); 2540 spin_lock_nested(&dev->addr_list_lock, SINGLE_DEPTH_NESTING);
2541 } 2541 }
2542 2542
2543 static inline void netif_addr_lock_bh(struct net_device *dev) 2543 static inline void netif_addr_lock_bh(struct net_device *dev)
2544 { 2544 {
2545 spin_lock_bh(&dev->addr_list_lock); 2545 spin_lock_bh(&dev->addr_list_lock);
2546 } 2546 }
2547 2547
2548 static inline void netif_addr_unlock(struct net_device *dev) 2548 static inline void netif_addr_unlock(struct net_device *dev)
2549 { 2549 {
2550 spin_unlock(&dev->addr_list_lock); 2550 spin_unlock(&dev->addr_list_lock);
2551 } 2551 }
2552 2552
2553 static inline void netif_addr_unlock_bh(struct net_device *dev) 2553 static inline void netif_addr_unlock_bh(struct net_device *dev)
2554 { 2554 {
2555 spin_unlock_bh(&dev->addr_list_lock); 2555 spin_unlock_bh(&dev->addr_list_lock);
2556 } 2556 }
2557 2557
2558 /* 2558 /*
2559 * dev_addrs walker. Should be used only for read access. Call with 2559 * dev_addrs walker. Should be used only for read access. Call with
2560 * rcu_read_lock held. 2560 * rcu_read_lock held.
2561 */ 2561 */
2562 #define for_each_dev_addr(dev, ha) \ 2562 #define for_each_dev_addr(dev, ha) \
2563 list_for_each_entry_rcu(ha, &dev->dev_addrs.list, list) 2563 list_for_each_entry_rcu(ha, &dev->dev_addrs.list, list)
2564 2564
2565 /* These functions live elsewhere (drivers/net/net_init.c, but related) */ 2565 /* These functions live elsewhere (drivers/net/net_init.c, but related) */
2566 2566
2567 extern void ether_setup(struct net_device *dev); 2567 extern void ether_setup(struct net_device *dev);
2568 2568
2569 /* Support for loadable net-drivers */ 2569 /* Support for loadable net-drivers */
2570 extern struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, 2570 extern struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
2571 void (*setup)(struct net_device *), 2571 void (*setup)(struct net_device *),
2572 unsigned int txqs, unsigned int rxqs); 2572 unsigned int txqs, unsigned int rxqs);
2573 #define alloc_netdev(sizeof_priv, name, setup) \ 2573 #define alloc_netdev(sizeof_priv, name, setup) \
2574 alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1) 2574 alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1)
2575 2575
2576 #define alloc_netdev_mq(sizeof_priv, name, setup, count) \ 2576 #define alloc_netdev_mq(sizeof_priv, name, setup, count) \
2577 alloc_netdev_mqs(sizeof_priv, name, setup, count, count) 2577 alloc_netdev_mqs(sizeof_priv, name, setup, count, count)
2578 2578
2579 extern int register_netdev(struct net_device *dev); 2579 extern int register_netdev(struct net_device *dev);
2580 extern void unregister_netdev(struct net_device *dev); 2580 extern void unregister_netdev(struct net_device *dev);
2581 2581
2582 /* General hardware address lists handling functions */ 2582 /* General hardware address lists handling functions */
2583 extern int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, 2583 extern int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
2584 struct netdev_hw_addr_list *from_list, 2584 struct netdev_hw_addr_list *from_list,
2585 int addr_len, unsigned char addr_type); 2585 int addr_len, unsigned char addr_type);
2586 extern void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, 2586 extern void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
2587 struct netdev_hw_addr_list *from_list, 2587 struct netdev_hw_addr_list *from_list,
2588 int addr_len, unsigned char addr_type); 2588 int addr_len, unsigned char addr_type);
2589 extern int __hw_addr_sync(struct netdev_hw_addr_list *to_list, 2589 extern int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
2590 struct netdev_hw_addr_list *from_list, 2590 struct netdev_hw_addr_list *from_list,
2591 int addr_len); 2591 int addr_len);
2592 extern void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, 2592 extern void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
2593 struct netdev_hw_addr_list *from_list, 2593 struct netdev_hw_addr_list *from_list,
2594 int addr_len); 2594 int addr_len);
2595 extern void __hw_addr_flush(struct netdev_hw_addr_list *list); 2595 extern void __hw_addr_flush(struct netdev_hw_addr_list *list);
2596 extern void __hw_addr_init(struct netdev_hw_addr_list *list); 2596 extern void __hw_addr_init(struct netdev_hw_addr_list *list);
2597 2597
2598 /* Functions used for device addresses handling */ 2598 /* Functions used for device addresses handling */
2599 extern int dev_addr_add(struct net_device *dev, const unsigned char *addr, 2599 extern int dev_addr_add(struct net_device *dev, const unsigned char *addr,
2600 unsigned char addr_type); 2600 unsigned char addr_type);
2601 extern int dev_addr_del(struct net_device *dev, const unsigned char *addr, 2601 extern int dev_addr_del(struct net_device *dev, const unsigned char *addr,
2602 unsigned char addr_type); 2602 unsigned char addr_type);
2603 extern int dev_addr_add_multiple(struct net_device *to_dev, 2603 extern int dev_addr_add_multiple(struct net_device *to_dev,
2604 struct net_device *from_dev, 2604 struct net_device *from_dev,
2605 unsigned char addr_type); 2605 unsigned char addr_type);
2606 extern int dev_addr_del_multiple(struct net_device *to_dev, 2606 extern int dev_addr_del_multiple(struct net_device *to_dev,
2607 struct net_device *from_dev, 2607 struct net_device *from_dev,
2608 unsigned char addr_type); 2608 unsigned char addr_type);
2609 extern void dev_addr_flush(struct net_device *dev); 2609 extern void dev_addr_flush(struct net_device *dev);
2610 extern int dev_addr_init(struct net_device *dev); 2610 extern int dev_addr_init(struct net_device *dev);
2611 2611
2612 /* Functions used for unicast addresses handling */ 2612 /* Functions used for unicast addresses handling */
2613 extern int dev_uc_add(struct net_device *dev, const unsigned char *addr); 2613 extern int dev_uc_add(struct net_device *dev, const unsigned char *addr);
2614 extern int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr); 2614 extern int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr);
2615 extern int dev_uc_del(struct net_device *dev, const unsigned char *addr); 2615 extern int dev_uc_del(struct net_device *dev, const unsigned char *addr);
2616 extern int dev_uc_sync(struct net_device *to, struct net_device *from); 2616 extern int dev_uc_sync(struct net_device *to, struct net_device *from);
2617 extern void dev_uc_unsync(struct net_device *to, struct net_device *from); 2617 extern void dev_uc_unsync(struct net_device *to, struct net_device *from);
2618 extern void dev_uc_flush(struct net_device *dev); 2618 extern void dev_uc_flush(struct net_device *dev);
2619 extern void dev_uc_init(struct net_device *dev); 2619 extern void dev_uc_init(struct net_device *dev);
2620 2620
2621 /* Functions used for multicast addresses handling */ 2621 /* Functions used for multicast addresses handling */
2622 extern int dev_mc_add(struct net_device *dev, const unsigned char *addr); 2622 extern int dev_mc_add(struct net_device *dev, const unsigned char *addr);
2623 extern int dev_mc_add_global(struct net_device *dev, const unsigned char *addr); 2623 extern int dev_mc_add_global(struct net_device *dev, const unsigned char *addr);
2624 extern int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr); 2624 extern int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr);
2625 extern int dev_mc_del(struct net_device *dev, const unsigned char *addr); 2625 extern int dev_mc_del(struct net_device *dev, const unsigned char *addr);
2626 extern int dev_mc_del_global(struct net_device *dev, const unsigned char *addr); 2626 extern int dev_mc_del_global(struct net_device *dev, const unsigned char *addr);
2627 extern int dev_mc_sync(struct net_device *to, struct net_device *from); 2627 extern int dev_mc_sync(struct net_device *to, struct net_device *from);
2628 extern void dev_mc_unsync(struct net_device *to, struct net_device *from); 2628 extern void dev_mc_unsync(struct net_device *to, struct net_device *from);
2629 extern void dev_mc_flush(struct net_device *dev); 2629 extern void dev_mc_flush(struct net_device *dev);
2630 extern void dev_mc_init(struct net_device *dev); 2630 extern void dev_mc_init(struct net_device *dev);
2631 2631
2632 /* Functions used for secondary unicast and multicast support */ 2632 /* Functions used for secondary unicast and multicast support */
2633 extern void dev_set_rx_mode(struct net_device *dev); 2633 extern void dev_set_rx_mode(struct net_device *dev);
2634 extern void __dev_set_rx_mode(struct net_device *dev); 2634 extern void __dev_set_rx_mode(struct net_device *dev);
2635 extern int dev_set_promiscuity(struct net_device *dev, int inc); 2635 extern int dev_set_promiscuity(struct net_device *dev, int inc);
2636 extern int dev_set_allmulti(struct net_device *dev, int inc); 2636 extern int dev_set_allmulti(struct net_device *dev, int inc);
2637 extern void netdev_state_change(struct net_device *dev); 2637 extern void netdev_state_change(struct net_device *dev);
2638 extern void netdev_notify_peers(struct net_device *dev); 2638 extern void netdev_notify_peers(struct net_device *dev);
2639 extern void netdev_features_change(struct net_device *dev); 2639 extern void netdev_features_change(struct net_device *dev);
2640 /* Load a device via the kmod */ 2640 /* Load a device via the kmod */
2641 extern void dev_load(struct net *net, const char *name); 2641 extern void dev_load(struct net *net, const char *name);
2642 extern void dev_mcast_init(void); 2642 extern void dev_mcast_init(void);
2643 extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, 2643 extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
2644 struct rtnl_link_stats64 *storage); 2644 struct rtnl_link_stats64 *storage);
2645 extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, 2645 extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
2646 const struct net_device_stats *netdev_stats); 2646 const struct net_device_stats *netdev_stats);
2647 2647
2648 extern int netdev_max_backlog; 2648 extern int netdev_max_backlog;
2649 extern int netdev_tstamp_prequeue; 2649 extern int netdev_tstamp_prequeue;
2650 extern int weight_p; 2650 extern int weight_p;
2651 extern int bpf_jit_enable; 2651 extern int bpf_jit_enable;
2652 2652
2653 extern bool netdev_has_upper_dev(struct net_device *dev, 2653 extern bool netdev_has_upper_dev(struct net_device *dev,
2654 struct net_device *upper_dev); 2654 struct net_device *upper_dev);
2655 extern bool netdev_has_any_upper_dev(struct net_device *dev); 2655 extern bool netdev_has_any_upper_dev(struct net_device *dev);
2656 extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev); 2656 extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
2657 extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); 2657 extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
2658 extern int netdev_upper_dev_link(struct net_device *dev, 2658 extern int netdev_upper_dev_link(struct net_device *dev,
2659 struct net_device *upper_dev); 2659 struct net_device *upper_dev);
2660 extern int netdev_master_upper_dev_link(struct net_device *dev, 2660 extern int netdev_master_upper_dev_link(struct net_device *dev,
2661 struct net_device *upper_dev); 2661 struct net_device *upper_dev);
2662 extern void netdev_upper_dev_unlink(struct net_device *dev, 2662 extern void netdev_upper_dev_unlink(struct net_device *dev,
2663 struct net_device *upper_dev); 2663 struct net_device *upper_dev);
2664 extern int skb_checksum_help(struct sk_buff *skb); 2664 extern int skb_checksum_help(struct sk_buff *skb);
2665 extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, 2665 extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2666 netdev_features_t features); 2666 netdev_features_t features, bool tx_path);
2667
2668 static inline
2669 struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
2670 {
2671 return __skb_gso_segment(skb, features, true);
2672 }
2673
2667 #ifdef CONFIG_BUG 2674 #ifdef CONFIG_BUG
2668 extern void netdev_rx_csum_fault(struct net_device *dev); 2675 extern void netdev_rx_csum_fault(struct net_device *dev);
2669 #else 2676 #else
2670 static inline void netdev_rx_csum_fault(struct net_device *dev) 2677 static inline void netdev_rx_csum_fault(struct net_device *dev)
2671 { 2678 {
2672 } 2679 }
2673 #endif 2680 #endif
2674 /* rx skb timestamps */ 2681 /* rx skb timestamps */
2675 extern void net_enable_timestamp(void); 2682 extern void net_enable_timestamp(void);
2676 extern void net_disable_timestamp(void); 2683 extern void net_disable_timestamp(void);
2677 2684
2678 #ifdef CONFIG_PROC_FS 2685 #ifdef CONFIG_PROC_FS
2679 extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); 2686 extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
2680 extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); 2687 extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
2681 extern void dev_seq_stop(struct seq_file *seq, void *v); 2688 extern void dev_seq_stop(struct seq_file *seq, void *v);
2682 #endif 2689 #endif
2683 2690
2684 extern int netdev_class_create_file(struct class_attribute *class_attr); 2691 extern int netdev_class_create_file(struct class_attribute *class_attr);
2685 extern void netdev_class_remove_file(struct class_attribute *class_attr); 2692 extern void netdev_class_remove_file(struct class_attribute *class_attr);
2686 2693
2687 extern struct kobj_ns_type_operations net_ns_type_operations; 2694 extern struct kobj_ns_type_operations net_ns_type_operations;
2688 2695
2689 extern const char *netdev_drivername(const struct net_device *dev); 2696 extern const char *netdev_drivername(const struct net_device *dev);
2690 2697
2691 extern void linkwatch_run_queue(void); 2698 extern void linkwatch_run_queue(void);
2692 2699
2693 static inline netdev_features_t netdev_get_wanted_features( 2700 static inline netdev_features_t netdev_get_wanted_features(
2694 struct net_device *dev) 2701 struct net_device *dev)
2695 { 2702 {
2696 return (dev->features & ~dev->hw_features) | dev->wanted_features; 2703 return (dev->features & ~dev->hw_features) | dev->wanted_features;
2697 } 2704 }
2698 netdev_features_t netdev_increment_features(netdev_features_t all, 2705 netdev_features_t netdev_increment_features(netdev_features_t all,
2699 netdev_features_t one, netdev_features_t mask); 2706 netdev_features_t one, netdev_features_t mask);
2700 int __netdev_update_features(struct net_device *dev); 2707 int __netdev_update_features(struct net_device *dev);
2701 void netdev_update_features(struct net_device *dev); 2708 void netdev_update_features(struct net_device *dev);
2702 void netdev_change_features(struct net_device *dev); 2709 void netdev_change_features(struct net_device *dev);
2703 2710
2704 void netif_stacked_transfer_operstate(const struct net_device *rootdev, 2711 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
2705 struct net_device *dev); 2712 struct net_device *dev);
2706 2713
2707 netdev_features_t netif_skb_features(struct sk_buff *skb); 2714 netdev_features_t netif_skb_features(struct sk_buff *skb);
2708 2715
2709 static inline bool net_gso_ok(netdev_features_t features, int gso_type) 2716 static inline bool net_gso_ok(netdev_features_t features, int gso_type)
2710 { 2717 {
2711 netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT; 2718 netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT;
2712 2719
2713 /* check flags correspondence */ 2720 /* check flags correspondence */
2714 BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); 2721 BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT));
2715 BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT)); 2722 BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT));
2716 BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); 2723 BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT));
2717 BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); 2724 BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT));
2718 BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT)); 2725 BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT));
2719 BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); 2726 BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT));
2720 2727
2721 return (features & feature) == feature; 2728 return (features & feature) == feature;
2722 } 2729 }
2723 2730
2724 static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features) 2731 static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features)
2725 { 2732 {
2726 return net_gso_ok(features, skb_shinfo(skb)->gso_type) && 2733 return net_gso_ok(features, skb_shinfo(skb)->gso_type) &&
2727 (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); 2734 (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST));
2728 } 2735 }
2729 2736
2730 static inline bool netif_needs_gso(struct sk_buff *skb, 2737 static inline bool netif_needs_gso(struct sk_buff *skb,
2731 netdev_features_t features) 2738 netdev_features_t features)
2732 { 2739 {
2733 return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || 2740 return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
2734 unlikely((skb->ip_summed != CHECKSUM_PARTIAL) && 2741 unlikely((skb->ip_summed != CHECKSUM_PARTIAL) &&
2735 (skb->ip_summed != CHECKSUM_UNNECESSARY))); 2742 (skb->ip_summed != CHECKSUM_UNNECESSARY)));
2736 } 2743 }
2737 2744
2738 static inline void netif_set_gso_max_size(struct net_device *dev, 2745 static inline void netif_set_gso_max_size(struct net_device *dev,
2739 unsigned int size) 2746 unsigned int size)
2740 { 2747 {
2741 dev->gso_max_size = size; 2748 dev->gso_max_size = size;
2742 } 2749 }
2743 2750
2744 static inline bool netif_is_bond_slave(struct net_device *dev) 2751 static inline bool netif_is_bond_slave(struct net_device *dev)
2745 { 2752 {
2746 return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; 2753 return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING;
2747 } 2754 }
2748 2755
2749 static inline bool netif_supports_nofcs(struct net_device *dev) 2756 static inline bool netif_supports_nofcs(struct net_device *dev)
2750 { 2757 {
2751 return dev->priv_flags & IFF_SUPP_NOFCS; 2758 return dev->priv_flags & IFF_SUPP_NOFCS;
2752 } 2759 }
2753 2760
2754 extern struct pernet_operations __net_initdata loopback_net_ops; 2761 extern struct pernet_operations __net_initdata loopback_net_ops;
2755 2762
2756 /* Logging, debugging and troubleshooting/diagnostic helpers. */ 2763 /* Logging, debugging and troubleshooting/diagnostic helpers. */
2757 2764
2758 /* netdev_printk helpers, similar to dev_printk */ 2765 /* netdev_printk helpers, similar to dev_printk */
2759 2766
2760 static inline const char *netdev_name(const struct net_device *dev) 2767 static inline const char *netdev_name(const struct net_device *dev)
2761 { 2768 {
2762 if (dev->reg_state != NETREG_REGISTERED) 2769 if (dev->reg_state != NETREG_REGISTERED)
2763 return "(unregistered net_device)"; 2770 return "(unregistered net_device)";
2764 return dev->name; 2771 return dev->name;
2765 } 2772 }
2766 2773
2767 extern __printf(3, 4) 2774 extern __printf(3, 4)
2768 int netdev_printk(const char *level, const struct net_device *dev, 2775 int netdev_printk(const char *level, const struct net_device *dev,
2769 const char *format, ...); 2776 const char *format, ...);
2770 extern __printf(2, 3) 2777 extern __printf(2, 3)
2771 int netdev_emerg(const struct net_device *dev, const char *format, ...); 2778 int netdev_emerg(const struct net_device *dev, const char *format, ...);
2772 extern __printf(2, 3) 2779 extern __printf(2, 3)
2773 int netdev_alert(const struct net_device *dev, const char *format, ...); 2780 int netdev_alert(const struct net_device *dev, const char *format, ...);
2774 extern __printf(2, 3) 2781 extern __printf(2, 3)
2775 int netdev_crit(const struct net_device *dev, const char *format, ...); 2782 int netdev_crit(const struct net_device *dev, const char *format, ...);
2776 extern __printf(2, 3) 2783 extern __printf(2, 3)
2777 int netdev_err(const struct net_device *dev, const char *format, ...); 2784 int netdev_err(const struct net_device *dev, const char *format, ...);
2778 extern __printf(2, 3) 2785 extern __printf(2, 3)
2779 int netdev_warn(const struct net_device *dev, const char *format, ...); 2786 int netdev_warn(const struct net_device *dev, const char *format, ...);
2780 extern __printf(2, 3) 2787 extern __printf(2, 3)
2781 int netdev_notice(const struct net_device *dev, const char *format, ...); 2788 int netdev_notice(const struct net_device *dev, const char *format, ...);
2782 extern __printf(2, 3) 2789 extern __printf(2, 3)
2783 int netdev_info(const struct net_device *dev, const char *format, ...); 2790 int netdev_info(const struct net_device *dev, const char *format, ...);
2784 2791
2785 #define MODULE_ALIAS_NETDEV(device) \ 2792 #define MODULE_ALIAS_NETDEV(device) \
2786 MODULE_ALIAS("netdev-" device) 2793 MODULE_ALIAS("netdev-" device)
2787 2794
2788 #if defined(CONFIG_DYNAMIC_DEBUG) 2795 #if defined(CONFIG_DYNAMIC_DEBUG)
2789 #define netdev_dbg(__dev, format, args...) \ 2796 #define netdev_dbg(__dev, format, args...) \
2790 do { \ 2797 do { \
2791 dynamic_netdev_dbg(__dev, format, ##args); \ 2798 dynamic_netdev_dbg(__dev, format, ##args); \
2792 } while (0) 2799 } while (0)
2793 #elif defined(DEBUG) 2800 #elif defined(DEBUG)
2794 #define netdev_dbg(__dev, format, args...) \ 2801 #define netdev_dbg(__dev, format, args...) \
2795 netdev_printk(KERN_DEBUG, __dev, format, ##args) 2802 netdev_printk(KERN_DEBUG, __dev, format, ##args)
2796 #else 2803 #else
2797 #define netdev_dbg(__dev, format, args...) \ 2804 #define netdev_dbg(__dev, format, args...) \
2798 ({ \ 2805 ({ \
2799 if (0) \ 2806 if (0) \
2800 netdev_printk(KERN_DEBUG, __dev, format, ##args); \ 2807 netdev_printk(KERN_DEBUG, __dev, format, ##args); \
2801 0; \ 2808 0; \
2802 }) 2809 })
2803 #endif 2810 #endif
2804 2811
2805 #if defined(VERBOSE_DEBUG) 2812 #if defined(VERBOSE_DEBUG)
2806 #define netdev_vdbg netdev_dbg 2813 #define netdev_vdbg netdev_dbg
2807 #else 2814 #else
2808 2815
2809 #define netdev_vdbg(dev, format, args...) \ 2816 #define netdev_vdbg(dev, format, args...) \
2810 ({ \ 2817 ({ \
2811 if (0) \ 2818 if (0) \
2812 netdev_printk(KERN_DEBUG, dev, format, ##args); \ 2819 netdev_printk(KERN_DEBUG, dev, format, ##args); \
2813 0; \ 2820 0; \
2814 }) 2821 })
2815 #endif 2822 #endif
2816 2823
2817 /* 2824 /*
2818 * netdev_WARN() acts like dev_printk(), but with the key difference 2825 * netdev_WARN() acts like dev_printk(), but with the key difference
2819 * of using a WARN/WARN_ON to get the message out, including the 2826 * of using a WARN/WARN_ON to get the message out, including the
2820 * file/line information and a backtrace. 2827 * file/line information and a backtrace.
2821 */ 2828 */
2822 #define netdev_WARN(dev, format, args...) \ 2829 #define netdev_WARN(dev, format, args...) \
2823 WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args); 2830 WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args);
2824 2831
2825 /* netif printk helpers, similar to netdev_printk */ 2832 /* netif printk helpers, similar to netdev_printk */
2826 2833
2827 #define netif_printk(priv, type, level, dev, fmt, args...) \ 2834 #define netif_printk(priv, type, level, dev, fmt, args...) \
2828 do { \ 2835 do { \
2829 if (netif_msg_##type(priv)) \ 2836 if (netif_msg_##type(priv)) \
2830 netdev_printk(level, (dev), fmt, ##args); \ 2837 netdev_printk(level, (dev), fmt, ##args); \
2831 } while (0) 2838 } while (0)
2832 2839
2833 #define netif_level(level, priv, type, dev, fmt, args...) \ 2840 #define netif_level(level, priv, type, dev, fmt, args...) \
2834 do { \ 2841 do { \
2835 if (netif_msg_##type(priv)) \ 2842 if (netif_msg_##type(priv)) \
2836 netdev_##level(dev, fmt, ##args); \ 2843 netdev_##level(dev, fmt, ##args); \
2837 } while (0) 2844 } while (0)
2838 2845
2839 #define netif_emerg(priv, type, dev, fmt, args...) \ 2846 #define netif_emerg(priv, type, dev, fmt, args...) \
2840 netif_level(emerg, priv, type, dev, fmt, ##args) 2847 netif_level(emerg, priv, type, dev, fmt, ##args)
2841 #define netif_alert(priv, type, dev, fmt, args...) \ 2848 #define netif_alert(priv, type, dev, fmt, args...) \
2842 netif_level(alert, priv, type, dev, fmt, ##args) 2849 netif_level(alert, priv, type, dev, fmt, ##args)
2843 #define netif_crit(priv, type, dev, fmt, args...) \ 2850 #define netif_crit(priv, type, dev, fmt, args...) \
2844 netif_level(crit, priv, type, dev, fmt, ##args) 2851 netif_level(crit, priv, type, dev, fmt, ##args)
2845 #define netif_err(priv, type, dev, fmt, args...) \ 2852 #define netif_err(priv, type, dev, fmt, args...) \
2846 netif_level(err, priv, type, dev, fmt, ##args) 2853 netif_level(err, priv, type, dev, fmt, ##args)
2847 #define netif_warn(priv, type, dev, fmt, args...) \ 2854 #define netif_warn(priv, type, dev, fmt, args...) \
2848 netif_level(warn, priv, type, dev, fmt, ##args) 2855 netif_level(warn, priv, type, dev, fmt, ##args)
2849 #define netif_notice(priv, type, dev, fmt, args...) \ 2856 #define netif_notice(priv, type, dev, fmt, args...) \
2850 netif_level(notice, priv, type, dev, fmt, ##args) 2857 netif_level(notice, priv, type, dev, fmt, ##args)
2851 #define netif_info(priv, type, dev, fmt, args...) \ 2858 #define netif_info(priv, type, dev, fmt, args...) \
2852 netif_level(info, priv, type, dev, fmt, ##args) 2859 netif_level(info, priv, type, dev, fmt, ##args)
2853 2860
2854 #if defined(CONFIG_DYNAMIC_DEBUG) 2861 #if defined(CONFIG_DYNAMIC_DEBUG)
2855 #define netif_dbg(priv, type, netdev, format, args...) \ 2862 #define netif_dbg(priv, type, netdev, format, args...) \
2856 do { \ 2863 do { \
2857 if (netif_msg_##type(priv)) \ 2864 if (netif_msg_##type(priv)) \
2858 dynamic_netdev_dbg(netdev, format, ##args); \ 2865 dynamic_netdev_dbg(netdev, format, ##args); \
2859 } while (0) 2866 } while (0)
2860 #elif defined(DEBUG) 2867 #elif defined(DEBUG)
2861 #define netif_dbg(priv, type, dev, format, args...) \ 2868 #define netif_dbg(priv, type, dev, format, args...) \
2862 netif_printk(priv, type, KERN_DEBUG, dev, format, ##args) 2869 netif_printk(priv, type, KERN_DEBUG, dev, format, ##args)
2863 #else 2870 #else
2864 #define netif_dbg(priv, type, dev, format, args...) \ 2871 #define netif_dbg(priv, type, dev, format, args...) \
2865 ({ \ 2872 ({ \
2866 if (0) \ 2873 if (0) \
2867 netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ 2874 netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \
2868 0; \ 2875 0; \
2869 }) 2876 })
2870 #endif 2877 #endif
2871 2878
2872 #if defined(VERBOSE_DEBUG) 2879 #if defined(VERBOSE_DEBUG)
2873 #define netif_vdbg netif_dbg 2880 #define netif_vdbg netif_dbg
2874 #else 2881 #else
2875 #define netif_vdbg(priv, type, dev, format, args...) \ 2882 #define netif_vdbg(priv, type, dev, format, args...) \
2876 ({ \ 2883 ({ \
2877 if (0) \ 2884 if (0) \
2878 netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ 2885 netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \
2879 0; \ 2886 0; \
2880 }) 2887 })
2881 #endif 2888 #endif
2882 2889
2883 #endif /* _LINUX_NETDEVICE_H */ 2890 #endif /* _LINUX_NETDEVICE_H */
2884 2891
1 /* 1 /*
2 * NET3 Protocol independent device support routines. 2 * NET3 Protocol independent device support routines.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License 5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version. 7 * 2 of the License, or (at your option) any later version.
8 * 8 *
9 * Derived from the non IP parts of dev.c 1.0.19 9 * Derived from the non IP parts of dev.c 1.0.19
10 * Authors: Ross Biro 10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 * 13 *
14 * Additional Authors: 14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de> 15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org> 16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net> 17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu> 19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi> 20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 * 21 *
22 * Changes: 22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set 23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called 24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a 25 * before net_dev_init & also removed a
26 * few lines of code in the process. 26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back. 27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant 28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe. 29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock. 30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap 31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range 32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into 33 * Alan Cox : Moved ioctl permission check into
34 * drivers 34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI 35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when 36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8) 37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager. 38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths. 39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass 40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler 41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before 42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function 43 * calling netif_rx. Saves a function
44 * call a packet. 44 * call a packet.
45 * Alan Cox : Hashed net_bh() 45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes. 46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR 47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection. 48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close 49 * Alan Cox : Fixed nasty side effect of device close
50 * changes. 50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to 51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address() 52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to 53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc. 54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack. 55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise. 56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under 57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device. 58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there 59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function. 60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF 61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF 62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD 63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload 64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge 65 * A network device unload needs to purge
66 * the backlog queue. 66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME 67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code 68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait 69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt 70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling 71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback 72 * - netif_rx() feedback
73 */ 73 */
74 74
75 #include <asm/uaccess.h> 75 #include <asm/uaccess.h>
76 #include <linux/bitops.h> 76 #include <linux/bitops.h>
77 #include <linux/capability.h> 77 #include <linux/capability.h>
78 #include <linux/cpu.h> 78 #include <linux/cpu.h>
79 #include <linux/types.h> 79 #include <linux/types.h>
80 #include <linux/kernel.h> 80 #include <linux/kernel.h>
81 #include <linux/hash.h> 81 #include <linux/hash.h>
82 #include <linux/slab.h> 82 #include <linux/slab.h>
83 #include <linux/sched.h> 83 #include <linux/sched.h>
84 #include <linux/mutex.h> 84 #include <linux/mutex.h>
85 #include <linux/string.h> 85 #include <linux/string.h>
86 #include <linux/mm.h> 86 #include <linux/mm.h>
87 #include <linux/socket.h> 87 #include <linux/socket.h>
88 #include <linux/sockios.h> 88 #include <linux/sockios.h>
89 #include <linux/errno.h> 89 #include <linux/errno.h>
90 #include <linux/interrupt.h> 90 #include <linux/interrupt.h>
91 #include <linux/if_ether.h> 91 #include <linux/if_ether.h>
92 #include <linux/netdevice.h> 92 #include <linux/netdevice.h>
93 #include <linux/etherdevice.h> 93 #include <linux/etherdevice.h>
94 #include <linux/ethtool.h> 94 #include <linux/ethtool.h>
95 #include <linux/notifier.h> 95 #include <linux/notifier.h>
96 #include <linux/skbuff.h> 96 #include <linux/skbuff.h>
97 #include <net/net_namespace.h> 97 #include <net/net_namespace.h>
98 #include <net/sock.h> 98 #include <net/sock.h>
99 #include <linux/rtnetlink.h> 99 #include <linux/rtnetlink.h>
100 #include <linux/proc_fs.h> 100 #include <linux/proc_fs.h>
101 #include <linux/seq_file.h> 101 #include <linux/seq_file.h>
102 #include <linux/stat.h> 102 #include <linux/stat.h>
103 #include <net/dst.h> 103 #include <net/dst.h>
104 #include <net/pkt_sched.h> 104 #include <net/pkt_sched.h>
105 #include <net/checksum.h> 105 #include <net/checksum.h>
106 #include <net/xfrm.h> 106 #include <net/xfrm.h>
107 #include <linux/highmem.h> 107 #include <linux/highmem.h>
108 #include <linux/init.h> 108 #include <linux/init.h>
109 #include <linux/kmod.h> 109 #include <linux/kmod.h>
110 #include <linux/module.h> 110 #include <linux/module.h>
111 #include <linux/netpoll.h> 111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h> 112 #include <linux/rcupdate.h>
113 #include <linux/delay.h> 113 #include <linux/delay.h>
114 #include <net/wext.h> 114 #include <net/wext.h>
115 #include <net/iw_handler.h> 115 #include <net/iw_handler.h>
116 #include <asm/current.h> 116 #include <asm/current.h>
117 #include <linux/audit.h> 117 #include <linux/audit.h>
118 #include <linux/dmaengine.h> 118 #include <linux/dmaengine.h>
119 #include <linux/err.h> 119 #include <linux/err.h>
120 #include <linux/ctype.h> 120 #include <linux/ctype.h>
121 #include <linux/if_arp.h> 121 #include <linux/if_arp.h>
122 #include <linux/if_vlan.h> 122 #include <linux/if_vlan.h>
123 #include <linux/ip.h> 123 #include <linux/ip.h>
124 #include <net/ip.h> 124 #include <net/ip.h>
125 #include <linux/ipv6.h> 125 #include <linux/ipv6.h>
126 #include <linux/in.h> 126 #include <linux/in.h>
127 #include <linux/jhash.h> 127 #include <linux/jhash.h>
128 #include <linux/random.h> 128 #include <linux/random.h>
129 #include <trace/events/napi.h> 129 #include <trace/events/napi.h>
130 #include <trace/events/net.h> 130 #include <trace/events/net.h>
131 #include <trace/events/skb.h> 131 #include <trace/events/skb.h>
132 #include <linux/pci.h> 132 #include <linux/pci.h>
133 #include <linux/inetdevice.h> 133 #include <linux/inetdevice.h>
134 #include <linux/cpu_rmap.h> 134 #include <linux/cpu_rmap.h>
135 #include <linux/net_tstamp.h> 135 #include <linux/net_tstamp.h>
136 #include <linux/static_key.h> 136 #include <linux/static_key.h>
137 137
138 #include "net-sysfs.h" 138 #include "net-sysfs.h"
139 139
140 /* Instead of increasing this, you should create a hash table. */ 140 /* Instead of increasing this, you should create a hash table. */
141 #define MAX_GRO_SKBS 8 141 #define MAX_GRO_SKBS 8
142 142
143 /* This should be increased if a protocol with a bigger head is added. */ 143 /* This should be increased if a protocol with a bigger head is added. */
144 #define GRO_MAX_HEAD (MAX_HEADER + 128) 144 #define GRO_MAX_HEAD (MAX_HEADER + 128)
145 145
146 /* 146 /*
147 * The list of packet types we will receive (as opposed to discard) 147 * The list of packet types we will receive (as opposed to discard)
148 * and the routines to invoke. 148 * and the routines to invoke.
149 * 149 *
150 * Why 16. Because with 16 the only overlap we get on a hash of the 150 * Why 16. Because with 16 the only overlap we get on a hash of the
151 * low nibble of the protocol value is RARP/SNAP/X.25. 151 * low nibble of the protocol value is RARP/SNAP/X.25.
152 * 152 *
153 * NOTE: That is no longer true with the addition of VLAN tags. Not 153 * NOTE: That is no longer true with the addition of VLAN tags. Not
154 * sure which should go first, but I bet it won't make much 154 * sure which should go first, but I bet it won't make much
155 * difference if we are running VLANs. The good news is that 155 * difference if we are running VLANs. The good news is that
156 * this protocol won't be in the list unless compiled in, so 156 * this protocol won't be in the list unless compiled in, so
157 * the average user (w/out VLANs) will not be adversely affected. 157 * the average user (w/out VLANs) will not be adversely affected.
158 * --BLG 158 * --BLG
159 * 159 *
160 * 0800 IP 160 * 0800 IP
161 * 8100 802.1Q VLAN 161 * 8100 802.1Q VLAN
162 * 0001 802.3 162 * 0001 802.3
163 * 0002 AX.25 163 * 0002 AX.25
164 * 0004 802.2 164 * 0004 802.2
165 * 8035 RARP 165 * 8035 RARP
166 * 0005 SNAP 166 * 0005 SNAP
167 * 0805 X.25 167 * 0805 X.25
168 * 0806 ARP 168 * 0806 ARP
169 * 8137 IPX 169 * 8137 IPX
170 * 0009 Localtalk 170 * 0009 Localtalk
171 * 86DD IPv6 171 * 86DD IPv6
172 */ 172 */
173 173
174 #define PTYPE_HASH_SIZE (16) 174 #define PTYPE_HASH_SIZE (16)
175 #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) 175 #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
176 176
177 static DEFINE_SPINLOCK(ptype_lock); 177 static DEFINE_SPINLOCK(ptype_lock);
178 static DEFINE_SPINLOCK(offload_lock); 178 static DEFINE_SPINLOCK(offload_lock);
179 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; 179 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
180 static struct list_head ptype_all __read_mostly; /* Taps */ 180 static struct list_head ptype_all __read_mostly; /* Taps */
181 static struct list_head offload_base __read_mostly; 181 static struct list_head offload_base __read_mostly;
182 182
183 /* 183 /*
184 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 184 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
185 * semaphore. 185 * semaphore.
186 * 186 *
187 * Pure readers hold dev_base_lock for reading, or rcu_read_lock() 187 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
188 * 188 *
189 * Writers must hold the rtnl semaphore while they loop through the 189 * Writers must hold the rtnl semaphore while they loop through the
190 * dev_base_head list, and hold dev_base_lock for writing when they do the 190 * dev_base_head list, and hold dev_base_lock for writing when they do the
191 * actual updates. This allows pure readers to access the list even 191 * actual updates. This allows pure readers to access the list even
192 * while a writer is preparing to update it. 192 * while a writer is preparing to update it.
193 * 193 *
194 * To put it another way, dev_base_lock is held for writing only to 194 * To put it another way, dev_base_lock is held for writing only to
195 * protect against pure readers; the rtnl semaphore provides the 195 * protect against pure readers; the rtnl semaphore provides the
196 * protection against other writers. 196 * protection against other writers.
197 * 197 *
198 * See, for example usages, register_netdevice() and 198 * See, for example usages, register_netdevice() and
199 * unregister_netdevice(), which must be called with the rtnl 199 * unregister_netdevice(), which must be called with the rtnl
200 * semaphore held. 200 * semaphore held.
201 */ 201 */
202 DEFINE_RWLOCK(dev_base_lock); 202 DEFINE_RWLOCK(dev_base_lock);
203 EXPORT_SYMBOL(dev_base_lock); 203 EXPORT_SYMBOL(dev_base_lock);
204 204
205 seqcount_t devnet_rename_seq; 205 seqcount_t devnet_rename_seq;
206 206
207 static inline void dev_base_seq_inc(struct net *net) 207 static inline void dev_base_seq_inc(struct net *net)
208 { 208 {
209 while (++net->dev_base_seq == 0); 209 while (++net->dev_base_seq == 0);
210 } 210 }
211 211
212 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) 212 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
213 { 213 {
214 unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); 214 unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
215 215
216 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)]; 216 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
217 } 217 }
218 218
219 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) 219 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
220 { 220 {
221 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; 221 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
222 } 222 }
223 223
224 static inline void rps_lock(struct softnet_data *sd) 224 static inline void rps_lock(struct softnet_data *sd)
225 { 225 {
226 #ifdef CONFIG_RPS 226 #ifdef CONFIG_RPS
227 spin_lock(&sd->input_pkt_queue.lock); 227 spin_lock(&sd->input_pkt_queue.lock);
228 #endif 228 #endif
229 } 229 }
230 230
231 static inline void rps_unlock(struct softnet_data *sd) 231 static inline void rps_unlock(struct softnet_data *sd)
232 { 232 {
233 #ifdef CONFIG_RPS 233 #ifdef CONFIG_RPS
234 spin_unlock(&sd->input_pkt_queue.lock); 234 spin_unlock(&sd->input_pkt_queue.lock);
235 #endif 235 #endif
236 } 236 }
237 237
238 /* Device list insertion */ 238 /* Device list insertion */
239 static int list_netdevice(struct net_device *dev) 239 static int list_netdevice(struct net_device *dev)
240 { 240 {
241 struct net *net = dev_net(dev); 241 struct net *net = dev_net(dev);
242 242
243 ASSERT_RTNL(); 243 ASSERT_RTNL();
244 244
245 write_lock_bh(&dev_base_lock); 245 write_lock_bh(&dev_base_lock);
246 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); 246 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
247 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); 247 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
248 hlist_add_head_rcu(&dev->index_hlist, 248 hlist_add_head_rcu(&dev->index_hlist,
249 dev_index_hash(net, dev->ifindex)); 249 dev_index_hash(net, dev->ifindex));
250 write_unlock_bh(&dev_base_lock); 250 write_unlock_bh(&dev_base_lock);
251 251
252 dev_base_seq_inc(net); 252 dev_base_seq_inc(net);
253 253
254 return 0; 254 return 0;
255 } 255 }
256 256
257 /* Device list removal 257 /* Device list removal
258 * caller must respect a RCU grace period before freeing/reusing dev 258 * caller must respect a RCU grace period before freeing/reusing dev
259 */ 259 */
260 static void unlist_netdevice(struct net_device *dev) 260 static void unlist_netdevice(struct net_device *dev)
261 { 261 {
262 ASSERT_RTNL(); 262 ASSERT_RTNL();
263 263
264 /* Unlink dev from the device chain */ 264 /* Unlink dev from the device chain */
265 write_lock_bh(&dev_base_lock); 265 write_lock_bh(&dev_base_lock);
266 list_del_rcu(&dev->dev_list); 266 list_del_rcu(&dev->dev_list);
267 hlist_del_rcu(&dev->name_hlist); 267 hlist_del_rcu(&dev->name_hlist);
268 hlist_del_rcu(&dev->index_hlist); 268 hlist_del_rcu(&dev->index_hlist);
269 write_unlock_bh(&dev_base_lock); 269 write_unlock_bh(&dev_base_lock);
270 270
271 dev_base_seq_inc(dev_net(dev)); 271 dev_base_seq_inc(dev_net(dev));
272 } 272 }
273 273
274 /* 274 /*
275 * Our notifier list 275 * Our notifier list
276 */ 276 */
277 277
278 static RAW_NOTIFIER_HEAD(netdev_chain); 278 static RAW_NOTIFIER_HEAD(netdev_chain);
279 279
280 /* 280 /*
281 * Device drivers call our routines to queue packets here. We empty the 281 * Device drivers call our routines to queue packets here. We empty the
282 * queue in the local softnet handler. 282 * queue in the local softnet handler.
283 */ 283 */
284 284
285 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); 285 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
286 EXPORT_PER_CPU_SYMBOL(softnet_data); 286 EXPORT_PER_CPU_SYMBOL(softnet_data);
287 287
288 #ifdef CONFIG_LOCKDEP 288 #ifdef CONFIG_LOCKDEP
289 /* 289 /*
290 * register_netdevice() inits txq->_xmit_lock and sets lockdep class 290 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
291 * according to dev->type 291 * according to dev->type
292 */ 292 */
293 static const unsigned short netdev_lock_type[] = 293 static const unsigned short netdev_lock_type[] =
294 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, 294 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
295 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, 295 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
296 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, 296 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
297 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, 297 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
298 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, 298 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
299 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, 299 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
300 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, 300 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
301 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, 301 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
302 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, 302 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
303 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, 303 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
304 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, 304 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
305 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, 305 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
306 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, 306 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
307 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE, 307 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
308 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; 308 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
309 309
310 static const char *const netdev_lock_name[] = 310 static const char *const netdev_lock_name[] =
311 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", 311 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
312 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", 312 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
313 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", 313 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
314 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", 314 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
315 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", 315 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
316 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", 316 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
317 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", 317 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
318 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", 318 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
319 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", 319 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
320 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", 320 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
321 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", 321 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
322 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", 322 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
323 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", 323 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
324 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE", 324 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
325 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; 325 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
326 326
327 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; 327 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
328 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; 328 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
329 329
330 static inline unsigned short netdev_lock_pos(unsigned short dev_type) 330 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
331 { 331 {
332 int i; 332 int i;
333 333
334 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) 334 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
335 if (netdev_lock_type[i] == dev_type) 335 if (netdev_lock_type[i] == dev_type)
336 return i; 336 return i;
337 /* the last key is used by default */ 337 /* the last key is used by default */
338 return ARRAY_SIZE(netdev_lock_type) - 1; 338 return ARRAY_SIZE(netdev_lock_type) - 1;
339 } 339 }
340 340
341 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, 341 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
342 unsigned short dev_type) 342 unsigned short dev_type)
343 { 343 {
344 int i; 344 int i;
345 345
346 i = netdev_lock_pos(dev_type); 346 i = netdev_lock_pos(dev_type);
347 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], 347 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
348 netdev_lock_name[i]); 348 netdev_lock_name[i]);
349 } 349 }
350 350
351 static inline void netdev_set_addr_lockdep_class(struct net_device *dev) 351 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
352 { 352 {
353 int i; 353 int i;
354 354
355 i = netdev_lock_pos(dev->type); 355 i = netdev_lock_pos(dev->type);
356 lockdep_set_class_and_name(&dev->addr_list_lock, 356 lockdep_set_class_and_name(&dev->addr_list_lock,
357 &netdev_addr_lock_key[i], 357 &netdev_addr_lock_key[i],
358 netdev_lock_name[i]); 358 netdev_lock_name[i]);
359 } 359 }
360 #else 360 #else
361 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, 361 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
362 unsigned short dev_type) 362 unsigned short dev_type)
363 { 363 {
364 } 364 }
365 static inline void netdev_set_addr_lockdep_class(struct net_device *dev) 365 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
366 { 366 {
367 } 367 }
368 #endif 368 #endif
369 369
370 /******************************************************************************* 370 /*******************************************************************************
371 371
372 Protocol management and registration routines 372 Protocol management and registration routines
373 373
374 *******************************************************************************/ 374 *******************************************************************************/
375 375
376 /* 376 /*
377 * Add a protocol ID to the list. Now that the input handler is 377 * Add a protocol ID to the list. Now that the input handler is
378 * smarter we can dispense with all the messy stuff that used to be 378 * smarter we can dispense with all the messy stuff that used to be
379 * here. 379 * here.
380 * 380 *
381 * BEWARE!!! Protocol handlers, mangling input packets, 381 * BEWARE!!! Protocol handlers, mangling input packets,
382 * MUST BE last in hash buckets and checking protocol handlers 382 * MUST BE last in hash buckets and checking protocol handlers
383 * MUST start from promiscuous ptype_all chain in net_bh. 383 * MUST start from promiscuous ptype_all chain in net_bh.
384 * It is true now, do not change it. 384 * It is true now, do not change it.
385 * Explanation follows: if protocol handler, mangling packet, will 385 * Explanation follows: if protocol handler, mangling packet, will
386 * be the first on list, it is not able to sense, that packet 386 * be the first on list, it is not able to sense, that packet
387 * is cloned and should be copied-on-write, so that it will 387 * is cloned and should be copied-on-write, so that it will
388 * change it and subsequent readers will get broken packet. 388 * change it and subsequent readers will get broken packet.
389 * --ANK (980803) 389 * --ANK (980803)
390 */ 390 */
391 391
392 static inline struct list_head *ptype_head(const struct packet_type *pt) 392 static inline struct list_head *ptype_head(const struct packet_type *pt)
393 { 393 {
394 if (pt->type == htons(ETH_P_ALL)) 394 if (pt->type == htons(ETH_P_ALL))
395 return &ptype_all; 395 return &ptype_all;
396 else 396 else
397 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; 397 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
398 } 398 }
399 399
400 /** 400 /**
401 * dev_add_pack - add packet handler 401 * dev_add_pack - add packet handler
402 * @pt: packet type declaration 402 * @pt: packet type declaration
403 * 403 *
404 * Add a protocol handler to the networking stack. The passed &packet_type 404 * Add a protocol handler to the networking stack. The passed &packet_type
405 * is linked into kernel lists and may not be freed until it has been 405 * is linked into kernel lists and may not be freed until it has been
406 * removed from the kernel lists. 406 * removed from the kernel lists.
407 * 407 *
408 * This call does not sleep therefore it can not 408 * This call does not sleep therefore it can not
409 * guarantee all CPU's that are in middle of receiving packets 409 * guarantee all CPU's that are in middle of receiving packets
410 * will see the new packet type (until the next received packet). 410 * will see the new packet type (until the next received packet).
411 */ 411 */
412 412
413 void dev_add_pack(struct packet_type *pt) 413 void dev_add_pack(struct packet_type *pt)
414 { 414 {
415 struct list_head *head = ptype_head(pt); 415 struct list_head *head = ptype_head(pt);
416 416
417 spin_lock(&ptype_lock); 417 spin_lock(&ptype_lock);
418 list_add_rcu(&pt->list, head); 418 list_add_rcu(&pt->list, head);
419 spin_unlock(&ptype_lock); 419 spin_unlock(&ptype_lock);
420 } 420 }
421 EXPORT_SYMBOL(dev_add_pack); 421 EXPORT_SYMBOL(dev_add_pack);
422 422
423 /** 423 /**
424 * __dev_remove_pack - remove packet handler 424 * __dev_remove_pack - remove packet handler
425 * @pt: packet type declaration 425 * @pt: packet type declaration
426 * 426 *
427 * Remove a protocol handler that was previously added to the kernel 427 * Remove a protocol handler that was previously added to the kernel
428 * protocol handlers by dev_add_pack(). The passed &packet_type is removed 428 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
429 * from the kernel lists and can be freed or reused once this function 429 * from the kernel lists and can be freed or reused once this function
430 * returns. 430 * returns.
431 * 431 *
432 * The packet type might still be in use by receivers 432 * The packet type might still be in use by receivers
433 * and must not be freed until after all the CPU's have gone 433 * and must not be freed until after all the CPU's have gone
434 * through a quiescent state. 434 * through a quiescent state.
435 */ 435 */
436 void __dev_remove_pack(struct packet_type *pt) 436 void __dev_remove_pack(struct packet_type *pt)
437 { 437 {
438 struct list_head *head = ptype_head(pt); 438 struct list_head *head = ptype_head(pt);
439 struct packet_type *pt1; 439 struct packet_type *pt1;
440 440
441 spin_lock(&ptype_lock); 441 spin_lock(&ptype_lock);
442 442
443 list_for_each_entry(pt1, head, list) { 443 list_for_each_entry(pt1, head, list) {
444 if (pt == pt1) { 444 if (pt == pt1) {
445 list_del_rcu(&pt->list); 445 list_del_rcu(&pt->list);
446 goto out; 446 goto out;
447 } 447 }
448 } 448 }
449 449
450 pr_warn("dev_remove_pack: %p not found\n", pt); 450 pr_warn("dev_remove_pack: %p not found\n", pt);
451 out: 451 out:
452 spin_unlock(&ptype_lock); 452 spin_unlock(&ptype_lock);
453 } 453 }
454 EXPORT_SYMBOL(__dev_remove_pack); 454 EXPORT_SYMBOL(__dev_remove_pack);
455 455
456 /** 456 /**
457 * dev_remove_pack - remove packet handler 457 * dev_remove_pack - remove packet handler
458 * @pt: packet type declaration 458 * @pt: packet type declaration
459 * 459 *
460 * Remove a protocol handler that was previously added to the kernel 460 * Remove a protocol handler that was previously added to the kernel
461 * protocol handlers by dev_add_pack(). The passed &packet_type is removed 461 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
462 * from the kernel lists and can be freed or reused once this function 462 * from the kernel lists and can be freed or reused once this function
463 * returns. 463 * returns.
464 * 464 *
465 * This call sleeps to guarantee that no CPU is looking at the packet 465 * This call sleeps to guarantee that no CPU is looking at the packet
466 * type after return. 466 * type after return.
467 */ 467 */
468 void dev_remove_pack(struct packet_type *pt) 468 void dev_remove_pack(struct packet_type *pt)
469 { 469 {
470 __dev_remove_pack(pt); 470 __dev_remove_pack(pt);
471 471
472 synchronize_net(); 472 synchronize_net();
473 } 473 }
474 EXPORT_SYMBOL(dev_remove_pack); 474 EXPORT_SYMBOL(dev_remove_pack);
475 475
476 476
477 /** 477 /**
478 * dev_add_offload - register offload handlers 478 * dev_add_offload - register offload handlers
479 * @po: protocol offload declaration 479 * @po: protocol offload declaration
480 * 480 *
481 * Add protocol offload handlers to the networking stack. The passed 481 * Add protocol offload handlers to the networking stack. The passed
482 * &proto_offload is linked into kernel lists and may not be freed until 482 * &proto_offload is linked into kernel lists and may not be freed until
483 * it has been removed from the kernel lists. 483 * it has been removed from the kernel lists.
484 * 484 *
485 * This call does not sleep therefore it can not 485 * This call does not sleep therefore it can not
486 * guarantee all CPU's that are in middle of receiving packets 486 * guarantee all CPU's that are in middle of receiving packets
487 * will see the new offload handlers (until the next received packet). 487 * will see the new offload handlers (until the next received packet).
488 */ 488 */
489 void dev_add_offload(struct packet_offload *po) 489 void dev_add_offload(struct packet_offload *po)
490 { 490 {
491 struct list_head *head = &offload_base; 491 struct list_head *head = &offload_base;
492 492
493 spin_lock(&offload_lock); 493 spin_lock(&offload_lock);
494 list_add_rcu(&po->list, head); 494 list_add_rcu(&po->list, head);
495 spin_unlock(&offload_lock); 495 spin_unlock(&offload_lock);
496 } 496 }
497 EXPORT_SYMBOL(dev_add_offload); 497 EXPORT_SYMBOL(dev_add_offload);
498 498
499 /** 499 /**
500 * __dev_remove_offload - remove offload handler 500 * __dev_remove_offload - remove offload handler
501 * @po: packet offload declaration 501 * @po: packet offload declaration
502 * 502 *
503 * Remove a protocol offload handler that was previously added to the 503 * Remove a protocol offload handler that was previously added to the
504 * kernel offload handlers by dev_add_offload(). The passed &offload_type 504 * kernel offload handlers by dev_add_offload(). The passed &offload_type
505 * is removed from the kernel lists and can be freed or reused once this 505 * is removed from the kernel lists and can be freed or reused once this
506 * function returns. 506 * function returns.
507 * 507 *
508 * The packet type might still be in use by receivers 508 * The packet type might still be in use by receivers
509 * and must not be freed until after all the CPU's have gone 509 * and must not be freed until after all the CPU's have gone
510 * through a quiescent state. 510 * through a quiescent state.
511 */ 511 */
512 void __dev_remove_offload(struct packet_offload *po) 512 void __dev_remove_offload(struct packet_offload *po)
513 { 513 {
514 struct list_head *head = &offload_base; 514 struct list_head *head = &offload_base;
515 struct packet_offload *po1; 515 struct packet_offload *po1;
516 516
517 spin_lock(&offload_lock); 517 spin_lock(&offload_lock);
518 518
519 list_for_each_entry(po1, head, list) { 519 list_for_each_entry(po1, head, list) {
520 if (po == po1) { 520 if (po == po1) {
521 list_del_rcu(&po->list); 521 list_del_rcu(&po->list);
522 goto out; 522 goto out;
523 } 523 }
524 } 524 }
525 525
526 pr_warn("dev_remove_offload: %p not found\n", po); 526 pr_warn("dev_remove_offload: %p not found\n", po);
527 out: 527 out:
528 spin_unlock(&offload_lock); 528 spin_unlock(&offload_lock);
529 } 529 }
530 EXPORT_SYMBOL(__dev_remove_offload); 530 EXPORT_SYMBOL(__dev_remove_offload);
531 531
532 /** 532 /**
533 * dev_remove_offload - remove packet offload handler 533 * dev_remove_offload - remove packet offload handler
534 * @po: packet offload declaration 534 * @po: packet offload declaration
535 * 535 *
536 * Remove a packet offload handler that was previously added to the kernel 536 * Remove a packet offload handler that was previously added to the kernel
537 * offload handlers by dev_add_offload(). The passed &offload_type is 537 * offload handlers by dev_add_offload(). The passed &offload_type is
538 * removed from the kernel lists and can be freed or reused once this 538 * removed from the kernel lists and can be freed or reused once this
539 * function returns. 539 * function returns.
540 * 540 *
541 * This call sleeps to guarantee that no CPU is looking at the packet 541 * This call sleeps to guarantee that no CPU is looking at the packet
542 * type after return. 542 * type after return.
543 */ 543 */
544 void dev_remove_offload(struct packet_offload *po) 544 void dev_remove_offload(struct packet_offload *po)
545 { 545 {
546 __dev_remove_offload(po); 546 __dev_remove_offload(po);
547 547
548 synchronize_net(); 548 synchronize_net();
549 } 549 }
550 EXPORT_SYMBOL(dev_remove_offload); 550 EXPORT_SYMBOL(dev_remove_offload);
551 551
552 /****************************************************************************** 552 /******************************************************************************
553 553
554 Device Boot-time Settings Routines 554 Device Boot-time Settings Routines
555 555
556 *******************************************************************************/ 556 *******************************************************************************/
557 557
558 /* Boot time configuration table */ 558 /* Boot time configuration table */
559 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; 559 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
560 560
561 /** 561 /**
562 * netdev_boot_setup_add - add new setup entry 562 * netdev_boot_setup_add - add new setup entry
563 * @name: name of the device 563 * @name: name of the device
564 * @map: configured settings for the device 564 * @map: configured settings for the device
565 * 565 *
566 * Adds new setup entry to the dev_boot_setup list. The function 566 * Adds new setup entry to the dev_boot_setup list. The function
567 * returns 0 on error and 1 on success. This is a generic routine to 567 * returns 0 on error and 1 on success. This is a generic routine to
568 * all netdevices. 568 * all netdevices.
569 */ 569 */
570 static int netdev_boot_setup_add(char *name, struct ifmap *map) 570 static int netdev_boot_setup_add(char *name, struct ifmap *map)
571 { 571 {
572 struct netdev_boot_setup *s; 572 struct netdev_boot_setup *s;
573 int i; 573 int i;
574 574
575 s = dev_boot_setup; 575 s = dev_boot_setup;
576 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { 576 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
577 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { 577 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
578 memset(s[i].name, 0, sizeof(s[i].name)); 578 memset(s[i].name, 0, sizeof(s[i].name));
579 strlcpy(s[i].name, name, IFNAMSIZ); 579 strlcpy(s[i].name, name, IFNAMSIZ);
580 memcpy(&s[i].map, map, sizeof(s[i].map)); 580 memcpy(&s[i].map, map, sizeof(s[i].map));
581 break; 581 break;
582 } 582 }
583 } 583 }
584 584
585 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; 585 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
586 } 586 }
587 587
588 /** 588 /**
589 * netdev_boot_setup_check - check boot time settings 589 * netdev_boot_setup_check - check boot time settings
590 * @dev: the netdevice 590 * @dev: the netdevice
591 * 591 *
592 * Check boot time settings for the device. 592 * Check boot time settings for the device.
593 * The found settings are set for the device to be used 593 * The found settings are set for the device to be used
594 * later in the device probing. 594 * later in the device probing.
595 * Returns 0 if no settings found, 1 if they are. 595 * Returns 0 if no settings found, 1 if they are.
596 */ 596 */
597 int netdev_boot_setup_check(struct net_device *dev) 597 int netdev_boot_setup_check(struct net_device *dev)
598 { 598 {
599 struct netdev_boot_setup *s = dev_boot_setup; 599 struct netdev_boot_setup *s = dev_boot_setup;
600 int i; 600 int i;
601 601
602 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { 602 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
603 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && 603 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
604 !strcmp(dev->name, s[i].name)) { 604 !strcmp(dev->name, s[i].name)) {
605 dev->irq = s[i].map.irq; 605 dev->irq = s[i].map.irq;
606 dev->base_addr = s[i].map.base_addr; 606 dev->base_addr = s[i].map.base_addr;
607 dev->mem_start = s[i].map.mem_start; 607 dev->mem_start = s[i].map.mem_start;
608 dev->mem_end = s[i].map.mem_end; 608 dev->mem_end = s[i].map.mem_end;
609 return 1; 609 return 1;
610 } 610 }
611 } 611 }
612 return 0; 612 return 0;
613 } 613 }
614 EXPORT_SYMBOL(netdev_boot_setup_check); 614 EXPORT_SYMBOL(netdev_boot_setup_check);
615 615
616 616
617 /** 617 /**
618 * netdev_boot_base - get address from boot time settings 618 * netdev_boot_base - get address from boot time settings
619 * @prefix: prefix for network device 619 * @prefix: prefix for network device
620 * @unit: id for network device 620 * @unit: id for network device
621 * 621 *
622 * Check boot time settings for the base address of device. 622 * Check boot time settings for the base address of device.
623 * The found settings are set for the device to be used 623 * The found settings are set for the device to be used
624 * later in the device probing. 624 * later in the device probing.
625 * Returns 0 if no settings found. 625 * Returns 0 if no settings found.
626 */ 626 */
627 unsigned long netdev_boot_base(const char *prefix, int unit) 627 unsigned long netdev_boot_base(const char *prefix, int unit)
628 { 628 {
629 const struct netdev_boot_setup *s = dev_boot_setup; 629 const struct netdev_boot_setup *s = dev_boot_setup;
630 char name[IFNAMSIZ]; 630 char name[IFNAMSIZ];
631 int i; 631 int i;
632 632
633 sprintf(name, "%s%d", prefix, unit); 633 sprintf(name, "%s%d", prefix, unit);
634 634
635 /* 635 /*
636 * If device already registered then return base of 1 636 * If device already registered then return base of 1
637 * to indicate not to probe for this interface 637 * to indicate not to probe for this interface
638 */ 638 */
639 if (__dev_get_by_name(&init_net, name)) 639 if (__dev_get_by_name(&init_net, name))
640 return 1; 640 return 1;
641 641
642 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) 642 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
643 if (!strcmp(name, s[i].name)) 643 if (!strcmp(name, s[i].name))
644 return s[i].map.base_addr; 644 return s[i].map.base_addr;
645 return 0; 645 return 0;
646 } 646 }
647 647
648 /* 648 /*
649 * Saves at boot time configured settings for any netdevice. 649 * Saves at boot time configured settings for any netdevice.
650 */ 650 */
651 int __init netdev_boot_setup(char *str) 651 int __init netdev_boot_setup(char *str)
652 { 652 {
653 int ints[5]; 653 int ints[5];
654 struct ifmap map; 654 struct ifmap map;
655 655
656 str = get_options(str, ARRAY_SIZE(ints), ints); 656 str = get_options(str, ARRAY_SIZE(ints), ints);
657 if (!str || !*str) 657 if (!str || !*str)
658 return 0; 658 return 0;
659 659
660 /* Save settings */ 660 /* Save settings */
661 memset(&map, 0, sizeof(map)); 661 memset(&map, 0, sizeof(map));
662 if (ints[0] > 0) 662 if (ints[0] > 0)
663 map.irq = ints[1]; 663 map.irq = ints[1];
664 if (ints[0] > 1) 664 if (ints[0] > 1)
665 map.base_addr = ints[2]; 665 map.base_addr = ints[2];
666 if (ints[0] > 2) 666 if (ints[0] > 2)
667 map.mem_start = ints[3]; 667 map.mem_start = ints[3];
668 if (ints[0] > 3) 668 if (ints[0] > 3)
669 map.mem_end = ints[4]; 669 map.mem_end = ints[4];
670 670
671 /* Add new entry to the list */ 671 /* Add new entry to the list */
672 return netdev_boot_setup_add(str, &map); 672 return netdev_boot_setup_add(str, &map);
673 } 673 }
674 674
675 __setup("netdev=", netdev_boot_setup); 675 __setup("netdev=", netdev_boot_setup);
676 676
677 /******************************************************************************* 677 /*******************************************************************************
678 678
679 Device Interface Subroutines 679 Device Interface Subroutines
680 680
681 *******************************************************************************/ 681 *******************************************************************************/
682 682
683 /** 683 /**
684 * __dev_get_by_name - find a device by its name 684 * __dev_get_by_name - find a device by its name
685 * @net: the applicable net namespace 685 * @net: the applicable net namespace
686 * @name: name to find 686 * @name: name to find
687 * 687 *
688 * Find an interface by name. Must be called under RTNL semaphore 688 * Find an interface by name. Must be called under RTNL semaphore
689 * or @dev_base_lock. If the name is found a pointer to the device 689 * or @dev_base_lock. If the name is found a pointer to the device
690 * is returned. If the name is not found then %NULL is returned. The 690 * is returned. If the name is not found then %NULL is returned. The
691 * reference counters are not incremented so the caller must be 691 * reference counters are not incremented so the caller must be
692 * careful with locks. 692 * careful with locks.
693 */ 693 */
694 694
695 struct net_device *__dev_get_by_name(struct net *net, const char *name) 695 struct net_device *__dev_get_by_name(struct net *net, const char *name)
696 { 696 {
697 struct hlist_node *p; 697 struct hlist_node *p;
698 struct net_device *dev; 698 struct net_device *dev;
699 struct hlist_head *head = dev_name_hash(net, name); 699 struct hlist_head *head = dev_name_hash(net, name);
700 700
701 hlist_for_each_entry(dev, p, head, name_hlist) 701 hlist_for_each_entry(dev, p, head, name_hlist)
702 if (!strncmp(dev->name, name, IFNAMSIZ)) 702 if (!strncmp(dev->name, name, IFNAMSIZ))
703 return dev; 703 return dev;
704 704
705 return NULL; 705 return NULL;
706 } 706 }
707 EXPORT_SYMBOL(__dev_get_by_name); 707 EXPORT_SYMBOL(__dev_get_by_name);
708 708
709 /** 709 /**
710 * dev_get_by_name_rcu - find a device by its name 710 * dev_get_by_name_rcu - find a device by its name
711 * @net: the applicable net namespace 711 * @net: the applicable net namespace
712 * @name: name to find 712 * @name: name to find
713 * 713 *
714 * Find an interface by name. 714 * Find an interface by name.
715 * If the name is found a pointer to the device is returned. 715 * If the name is found a pointer to the device is returned.
716 * If the name is not found then %NULL is returned. 716 * If the name is not found then %NULL is returned.
717 * The reference counters are not incremented so the caller must be 717 * The reference counters are not incremented so the caller must be
718 * careful with locks. The caller must hold RCU lock. 718 * careful with locks. The caller must hold RCU lock.
719 */ 719 */
720 720
721 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) 721 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
722 { 722 {
723 struct hlist_node *p; 723 struct hlist_node *p;
724 struct net_device *dev; 724 struct net_device *dev;
725 struct hlist_head *head = dev_name_hash(net, name); 725 struct hlist_head *head = dev_name_hash(net, name);
726 726
727 hlist_for_each_entry_rcu(dev, p, head, name_hlist) 727 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
728 if (!strncmp(dev->name, name, IFNAMSIZ)) 728 if (!strncmp(dev->name, name, IFNAMSIZ))
729 return dev; 729 return dev;
730 730
731 return NULL; 731 return NULL;
732 } 732 }
733 EXPORT_SYMBOL(dev_get_by_name_rcu); 733 EXPORT_SYMBOL(dev_get_by_name_rcu);
734 734
735 /** 735 /**
736 * dev_get_by_name - find a device by its name 736 * dev_get_by_name - find a device by its name
737 * @net: the applicable net namespace 737 * @net: the applicable net namespace
738 * @name: name to find 738 * @name: name to find
739 * 739 *
740 * Find an interface by name. This can be called from any 740 * Find an interface by name. This can be called from any
741 * context and does its own locking. The returned handle has 741 * context and does its own locking. The returned handle has
742 * the usage count incremented and the caller must use dev_put() to 742 * the usage count incremented and the caller must use dev_put() to
743 * release it when it is no longer needed. %NULL is returned if no 743 * release it when it is no longer needed. %NULL is returned if no
744 * matching device is found. 744 * matching device is found.
745 */ 745 */
746 746
747 struct net_device *dev_get_by_name(struct net *net, const char *name) 747 struct net_device *dev_get_by_name(struct net *net, const char *name)
748 { 748 {
749 struct net_device *dev; 749 struct net_device *dev;
750 750
751 rcu_read_lock(); 751 rcu_read_lock();
752 dev = dev_get_by_name_rcu(net, name); 752 dev = dev_get_by_name_rcu(net, name);
753 if (dev) 753 if (dev)
754 dev_hold(dev); 754 dev_hold(dev);
755 rcu_read_unlock(); 755 rcu_read_unlock();
756 return dev; 756 return dev;
757 } 757 }
758 EXPORT_SYMBOL(dev_get_by_name); 758 EXPORT_SYMBOL(dev_get_by_name);
759 759
760 /** 760 /**
761 * __dev_get_by_index - find a device by its ifindex 761 * __dev_get_by_index - find a device by its ifindex
762 * @net: the applicable net namespace 762 * @net: the applicable net namespace
763 * @ifindex: index of device 763 * @ifindex: index of device
764 * 764 *
765 * Search for an interface by index. Returns %NULL if the device 765 * Search for an interface by index. Returns %NULL if the device
766 * is not found or a pointer to the device. The device has not 766 * is not found or a pointer to the device. The device has not
767 * had its reference counter increased so the caller must be careful 767 * had its reference counter increased so the caller must be careful
768 * about locking. The caller must hold either the RTNL semaphore 768 * about locking. The caller must hold either the RTNL semaphore
769 * or @dev_base_lock. 769 * or @dev_base_lock.
770 */ 770 */
771 771
772 struct net_device *__dev_get_by_index(struct net *net, int ifindex) 772 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
773 { 773 {
774 struct hlist_node *p; 774 struct hlist_node *p;
775 struct net_device *dev; 775 struct net_device *dev;
776 struct hlist_head *head = dev_index_hash(net, ifindex); 776 struct hlist_head *head = dev_index_hash(net, ifindex);
777 777
778 hlist_for_each_entry(dev, p, head, index_hlist) 778 hlist_for_each_entry(dev, p, head, index_hlist)
779 if (dev->ifindex == ifindex) 779 if (dev->ifindex == ifindex)
780 return dev; 780 return dev;
781 781
782 return NULL; 782 return NULL;
783 } 783 }
784 EXPORT_SYMBOL(__dev_get_by_index); 784 EXPORT_SYMBOL(__dev_get_by_index);
785 785
786 /** 786 /**
787 * dev_get_by_index_rcu - find a device by its ifindex 787 * dev_get_by_index_rcu - find a device by its ifindex
788 * @net: the applicable net namespace 788 * @net: the applicable net namespace
789 * @ifindex: index of device 789 * @ifindex: index of device
790 * 790 *
791 * Search for an interface by index. Returns %NULL if the device 791 * Search for an interface by index. Returns %NULL if the device
792 * is not found or a pointer to the device. The device has not 792 * is not found or a pointer to the device. The device has not
793 * had its reference counter increased so the caller must be careful 793 * had its reference counter increased so the caller must be careful
794 * about locking. The caller must hold RCU lock. 794 * about locking. The caller must hold RCU lock.
795 */ 795 */
796 796
797 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) 797 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
798 { 798 {
799 struct hlist_node *p; 799 struct hlist_node *p;
800 struct net_device *dev; 800 struct net_device *dev;
801 struct hlist_head *head = dev_index_hash(net, ifindex); 801 struct hlist_head *head = dev_index_hash(net, ifindex);
802 802
803 hlist_for_each_entry_rcu(dev, p, head, index_hlist) 803 hlist_for_each_entry_rcu(dev, p, head, index_hlist)
804 if (dev->ifindex == ifindex) 804 if (dev->ifindex == ifindex)
805 return dev; 805 return dev;
806 806
807 return NULL; 807 return NULL;
808 } 808 }
809 EXPORT_SYMBOL(dev_get_by_index_rcu); 809 EXPORT_SYMBOL(dev_get_by_index_rcu);
810 810
811 811
812 /** 812 /**
813 * dev_get_by_index - find a device by its ifindex 813 * dev_get_by_index - find a device by its ifindex
814 * @net: the applicable net namespace 814 * @net: the applicable net namespace
815 * @ifindex: index of device 815 * @ifindex: index of device
816 * 816 *
817 * Search for an interface by index. Returns NULL if the device 817 * Search for an interface by index. Returns NULL if the device
818 * is not found or a pointer to the device. The device returned has 818 * is not found or a pointer to the device. The device returned has
819 * had a reference added and the pointer is safe until the user calls 819 * had a reference added and the pointer is safe until the user calls
820 * dev_put to indicate they have finished with it. 820 * dev_put to indicate they have finished with it.
821 */ 821 */
822 822
823 struct net_device *dev_get_by_index(struct net *net, int ifindex) 823 struct net_device *dev_get_by_index(struct net *net, int ifindex)
824 { 824 {
825 struct net_device *dev; 825 struct net_device *dev;
826 826
827 rcu_read_lock(); 827 rcu_read_lock();
828 dev = dev_get_by_index_rcu(net, ifindex); 828 dev = dev_get_by_index_rcu(net, ifindex);
829 if (dev) 829 if (dev)
830 dev_hold(dev); 830 dev_hold(dev);
831 rcu_read_unlock(); 831 rcu_read_unlock();
832 return dev; 832 return dev;
833 } 833 }
834 EXPORT_SYMBOL(dev_get_by_index); 834 EXPORT_SYMBOL(dev_get_by_index);
835 835
836 /** 836 /**
837 * dev_getbyhwaddr_rcu - find a device by its hardware address 837 * dev_getbyhwaddr_rcu - find a device by its hardware address
838 * @net: the applicable net namespace 838 * @net: the applicable net namespace
839 * @type: media type of device 839 * @type: media type of device
840 * @ha: hardware address 840 * @ha: hardware address
841 * 841 *
842 * Search for an interface by MAC address. Returns NULL if the device 842 * Search for an interface by MAC address. Returns NULL if the device
843 * is not found or a pointer to the device. 843 * is not found or a pointer to the device.
844 * The caller must hold RCU or RTNL. 844 * The caller must hold RCU or RTNL.
845 * The returned device has not had its ref count increased 845 * The returned device has not had its ref count increased
846 * and the caller must therefore be careful about locking 846 * and the caller must therefore be careful about locking
847 * 847 *
848 */ 848 */
849 849
850 struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, 850 struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
851 const char *ha) 851 const char *ha)
852 { 852 {
853 struct net_device *dev; 853 struct net_device *dev;
854 854
855 for_each_netdev_rcu(net, dev) 855 for_each_netdev_rcu(net, dev)
856 if (dev->type == type && 856 if (dev->type == type &&
857 !memcmp(dev->dev_addr, ha, dev->addr_len)) 857 !memcmp(dev->dev_addr, ha, dev->addr_len))
858 return dev; 858 return dev;
859 859
860 return NULL; 860 return NULL;
861 } 861 }
862 EXPORT_SYMBOL(dev_getbyhwaddr_rcu); 862 EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
863 863
864 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) 864 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
865 { 865 {
866 struct net_device *dev; 866 struct net_device *dev;
867 867
868 ASSERT_RTNL(); 868 ASSERT_RTNL();
869 for_each_netdev(net, dev) 869 for_each_netdev(net, dev)
870 if (dev->type == type) 870 if (dev->type == type)
871 return dev; 871 return dev;
872 872
873 return NULL; 873 return NULL;
874 } 874 }
875 EXPORT_SYMBOL(__dev_getfirstbyhwtype); 875 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
876 876
877 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) 877 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
878 { 878 {
879 struct net_device *dev, *ret = NULL; 879 struct net_device *dev, *ret = NULL;
880 880
881 rcu_read_lock(); 881 rcu_read_lock();
882 for_each_netdev_rcu(net, dev) 882 for_each_netdev_rcu(net, dev)
883 if (dev->type == type) { 883 if (dev->type == type) {
884 dev_hold(dev); 884 dev_hold(dev);
885 ret = dev; 885 ret = dev;
886 break; 886 break;
887 } 887 }
888 rcu_read_unlock(); 888 rcu_read_unlock();
889 return ret; 889 return ret;
890 } 890 }
891 EXPORT_SYMBOL(dev_getfirstbyhwtype); 891 EXPORT_SYMBOL(dev_getfirstbyhwtype);
892 892
893 /** 893 /**
894 * dev_get_by_flags_rcu - find any device with given flags 894 * dev_get_by_flags_rcu - find any device with given flags
895 * @net: the applicable net namespace 895 * @net: the applicable net namespace
896 * @if_flags: IFF_* values 896 * @if_flags: IFF_* values
897 * @mask: bitmask of bits in if_flags to check 897 * @mask: bitmask of bits in if_flags to check
898 * 898 *
899 * Search for any interface with the given flags. Returns NULL if a device 899 * Search for any interface with the given flags. Returns NULL if a device
900 * is not found or a pointer to the device. Must be called inside 900 * is not found or a pointer to the device. Must be called inside
901 * rcu_read_lock(), and result refcount is unchanged. 901 * rcu_read_lock(), and result refcount is unchanged.
902 */ 902 */
903 903
904 struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags, 904 struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
905 unsigned short mask) 905 unsigned short mask)
906 { 906 {
907 struct net_device *dev, *ret; 907 struct net_device *dev, *ret;
908 908
909 ret = NULL; 909 ret = NULL;
910 for_each_netdev_rcu(net, dev) { 910 for_each_netdev_rcu(net, dev) {
911 if (((dev->flags ^ if_flags) & mask) == 0) { 911 if (((dev->flags ^ if_flags) & mask) == 0) {
912 ret = dev; 912 ret = dev;
913 break; 913 break;
914 } 914 }
915 } 915 }
916 return ret; 916 return ret;
917 } 917 }
918 EXPORT_SYMBOL(dev_get_by_flags_rcu); 918 EXPORT_SYMBOL(dev_get_by_flags_rcu);
919 919
920 /** 920 /**
921 * dev_valid_name - check if name is okay for network device 921 * dev_valid_name - check if name is okay for network device
922 * @name: name string 922 * @name: name string
923 * 923 *
924 * Network device names need to be valid file names to 924 * Network device names need to be valid file names to
925 * to allow sysfs to work. We also disallow any kind of 925 * to allow sysfs to work. We also disallow any kind of
926 * whitespace. 926 * whitespace.
927 */ 927 */
928 bool dev_valid_name(const char *name) 928 bool dev_valid_name(const char *name)
929 { 929 {
930 if (*name == '\0') 930 if (*name == '\0')
931 return false; 931 return false;
932 if (strlen(name) >= IFNAMSIZ) 932 if (strlen(name) >= IFNAMSIZ)
933 return false; 933 return false;
934 if (!strcmp(name, ".") || !strcmp(name, "..")) 934 if (!strcmp(name, ".") || !strcmp(name, ".."))
935 return false; 935 return false;
936 936
937 while (*name) { 937 while (*name) {
938 if (*name == '/' || isspace(*name)) 938 if (*name == '/' || isspace(*name))
939 return false; 939 return false;
940 name++; 940 name++;
941 } 941 }
942 return true; 942 return true;
943 } 943 }
944 EXPORT_SYMBOL(dev_valid_name); 944 EXPORT_SYMBOL(dev_valid_name);
945 945
946 /** 946 /**
947 * __dev_alloc_name - allocate a name for a device 947 * __dev_alloc_name - allocate a name for a device
948 * @net: network namespace to allocate the device name in 948 * @net: network namespace to allocate the device name in
949 * @name: name format string 949 * @name: name format string
950 * @buf: scratch buffer and result name string 950 * @buf: scratch buffer and result name string
951 * 951 *
952 * Passed a format string - eg "lt%d" it will try and find a suitable 952 * Passed a format string - eg "lt%d" it will try and find a suitable
953 * id. It scans list of devices to build up a free map, then chooses 953 * id. It scans list of devices to build up a free map, then chooses
954 * the first empty slot. The caller must hold the dev_base or rtnl lock 954 * the first empty slot. The caller must hold the dev_base or rtnl lock
955 * while allocating the name and adding the device in order to avoid 955 * while allocating the name and adding the device in order to avoid
956 * duplicates. 956 * duplicates.
957 * Limited to bits_per_byte * page size devices (ie 32K on most platforms). 957 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
958 * Returns the number of the unit assigned or a negative errno code. 958 * Returns the number of the unit assigned or a negative errno code.
959 */ 959 */
960 960
961 static int __dev_alloc_name(struct net *net, const char *name, char *buf) 961 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
962 { 962 {
963 int i = 0; 963 int i = 0;
964 const char *p; 964 const char *p;
965 const int max_netdevices = 8*PAGE_SIZE; 965 const int max_netdevices = 8*PAGE_SIZE;
966 unsigned long *inuse; 966 unsigned long *inuse;
967 struct net_device *d; 967 struct net_device *d;
968 968
969 p = strnchr(name, IFNAMSIZ-1, '%'); 969 p = strnchr(name, IFNAMSIZ-1, '%');
970 if (p) { 970 if (p) {
971 /* 971 /*
972 * Verify the string as this thing may have come from 972 * Verify the string as this thing may have come from
973 * the user. There must be either one "%d" and no other "%" 973 * the user. There must be either one "%d" and no other "%"
974 * characters. 974 * characters.
975 */ 975 */
976 if (p[1] != 'd' || strchr(p + 2, '%')) 976 if (p[1] != 'd' || strchr(p + 2, '%'))
977 return -EINVAL; 977 return -EINVAL;
978 978
979 /* Use one page as a bit array of possible slots */ 979 /* Use one page as a bit array of possible slots */
980 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC); 980 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
981 if (!inuse) 981 if (!inuse)
982 return -ENOMEM; 982 return -ENOMEM;
983 983
984 for_each_netdev(net, d) { 984 for_each_netdev(net, d) {
985 if (!sscanf(d->name, name, &i)) 985 if (!sscanf(d->name, name, &i))
986 continue; 986 continue;
987 if (i < 0 || i >= max_netdevices) 987 if (i < 0 || i >= max_netdevices)
988 continue; 988 continue;
989 989
990 /* avoid cases where sscanf is not exact inverse of printf */ 990 /* avoid cases where sscanf is not exact inverse of printf */
991 snprintf(buf, IFNAMSIZ, name, i); 991 snprintf(buf, IFNAMSIZ, name, i);
992 if (!strncmp(buf, d->name, IFNAMSIZ)) 992 if (!strncmp(buf, d->name, IFNAMSIZ))
993 set_bit(i, inuse); 993 set_bit(i, inuse);
994 } 994 }
995 995
996 i = find_first_zero_bit(inuse, max_netdevices); 996 i = find_first_zero_bit(inuse, max_netdevices);
997 free_page((unsigned long) inuse); 997 free_page((unsigned long) inuse);
998 } 998 }
999 999
1000 if (buf != name) 1000 if (buf != name)
1001 snprintf(buf, IFNAMSIZ, name, i); 1001 snprintf(buf, IFNAMSIZ, name, i);
1002 if (!__dev_get_by_name(net, buf)) 1002 if (!__dev_get_by_name(net, buf))
1003 return i; 1003 return i;
1004 1004
1005 /* It is possible to run out of possible slots 1005 /* It is possible to run out of possible slots
1006 * when the name is long and there isn't enough space left 1006 * when the name is long and there isn't enough space left
1007 * for the digits, or if all bits are used. 1007 * for the digits, or if all bits are used.
1008 */ 1008 */
1009 return -ENFILE; 1009 return -ENFILE;
1010 } 1010 }
1011 1011
1012 /** 1012 /**
1013 * dev_alloc_name - allocate a name for a device 1013 * dev_alloc_name - allocate a name for a device
1014 * @dev: device 1014 * @dev: device
1015 * @name: name format string 1015 * @name: name format string
1016 * 1016 *
1017 * Passed a format string - eg "lt%d" it will try and find a suitable 1017 * Passed a format string - eg "lt%d" it will try and find a suitable
1018 * id. It scans list of devices to build up a free map, then chooses 1018 * id. It scans list of devices to build up a free map, then chooses
1019 * the first empty slot. The caller must hold the dev_base or rtnl lock 1019 * the first empty slot. The caller must hold the dev_base or rtnl lock
1020 * while allocating the name and adding the device in order to avoid 1020 * while allocating the name and adding the device in order to avoid
1021 * duplicates. 1021 * duplicates.
1022 * Limited to bits_per_byte * page size devices (ie 32K on most platforms). 1022 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
1023 * Returns the number of the unit assigned or a negative errno code. 1023 * Returns the number of the unit assigned or a negative errno code.
1024 */ 1024 */
1025 1025
1026 int dev_alloc_name(struct net_device *dev, const char *name) 1026 int dev_alloc_name(struct net_device *dev, const char *name)
1027 { 1027 {
1028 char buf[IFNAMSIZ]; 1028 char buf[IFNAMSIZ];
1029 struct net *net; 1029 struct net *net;
1030 int ret; 1030 int ret;
1031 1031
1032 BUG_ON(!dev_net(dev)); 1032 BUG_ON(!dev_net(dev));
1033 net = dev_net(dev); 1033 net = dev_net(dev);
1034 ret = __dev_alloc_name(net, name, buf); 1034 ret = __dev_alloc_name(net, name, buf);
1035 if (ret >= 0) 1035 if (ret >= 0)
1036 strlcpy(dev->name, buf, IFNAMSIZ); 1036 strlcpy(dev->name, buf, IFNAMSIZ);
1037 return ret; 1037 return ret;
1038 } 1038 }
1039 EXPORT_SYMBOL(dev_alloc_name); 1039 EXPORT_SYMBOL(dev_alloc_name);
1040 1040
1041 static int dev_alloc_name_ns(struct net *net, 1041 static int dev_alloc_name_ns(struct net *net,
1042 struct net_device *dev, 1042 struct net_device *dev,
1043 const char *name) 1043 const char *name)
1044 { 1044 {
1045 char buf[IFNAMSIZ]; 1045 char buf[IFNAMSIZ];
1046 int ret; 1046 int ret;
1047 1047
1048 ret = __dev_alloc_name(net, name, buf); 1048 ret = __dev_alloc_name(net, name, buf);
1049 if (ret >= 0) 1049 if (ret >= 0)
1050 strlcpy(dev->name, buf, IFNAMSIZ); 1050 strlcpy(dev->name, buf, IFNAMSIZ);
1051 return ret; 1051 return ret;
1052 } 1052 }
1053 1053
1054 static int dev_get_valid_name(struct net *net, 1054 static int dev_get_valid_name(struct net *net,
1055 struct net_device *dev, 1055 struct net_device *dev,
1056 const char *name) 1056 const char *name)
1057 { 1057 {
1058 BUG_ON(!net); 1058 BUG_ON(!net);
1059 1059
1060 if (!dev_valid_name(name)) 1060 if (!dev_valid_name(name))
1061 return -EINVAL; 1061 return -EINVAL;
1062 1062
1063 if (strchr(name, '%')) 1063 if (strchr(name, '%'))
1064 return dev_alloc_name_ns(net, dev, name); 1064 return dev_alloc_name_ns(net, dev, name);
1065 else if (__dev_get_by_name(net, name)) 1065 else if (__dev_get_by_name(net, name))
1066 return -EEXIST; 1066 return -EEXIST;
1067 else if (dev->name != name) 1067 else if (dev->name != name)
1068 strlcpy(dev->name, name, IFNAMSIZ); 1068 strlcpy(dev->name, name, IFNAMSIZ);
1069 1069
1070 return 0; 1070 return 0;
1071 } 1071 }
1072 1072
1073 /** 1073 /**
1074 * dev_change_name - change name of a device 1074 * dev_change_name - change name of a device
1075 * @dev: device 1075 * @dev: device
1076 * @newname: name (or format string) must be at least IFNAMSIZ 1076 * @newname: name (or format string) must be at least IFNAMSIZ
1077 * 1077 *
1078 * Change name of a device, can pass format strings "eth%d". 1078 * Change name of a device, can pass format strings "eth%d".
1079 * for wildcarding. 1079 * for wildcarding.
1080 */ 1080 */
1081 int dev_change_name(struct net_device *dev, const char *newname) 1081 int dev_change_name(struct net_device *dev, const char *newname)
1082 { 1082 {
1083 char oldname[IFNAMSIZ]; 1083 char oldname[IFNAMSIZ];
1084 int err = 0; 1084 int err = 0;
1085 int ret; 1085 int ret;
1086 struct net *net; 1086 struct net *net;
1087 1087
1088 ASSERT_RTNL(); 1088 ASSERT_RTNL();
1089 BUG_ON(!dev_net(dev)); 1089 BUG_ON(!dev_net(dev));
1090 1090
1091 net = dev_net(dev); 1091 net = dev_net(dev);
1092 if (dev->flags & IFF_UP) 1092 if (dev->flags & IFF_UP)
1093 return -EBUSY; 1093 return -EBUSY;
1094 1094
1095 write_seqcount_begin(&devnet_rename_seq); 1095 write_seqcount_begin(&devnet_rename_seq);
1096 1096
1097 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { 1097 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1098 write_seqcount_end(&devnet_rename_seq); 1098 write_seqcount_end(&devnet_rename_seq);
1099 return 0; 1099 return 0;
1100 } 1100 }
1101 1101
1102 memcpy(oldname, dev->name, IFNAMSIZ); 1102 memcpy(oldname, dev->name, IFNAMSIZ);
1103 1103
1104 err = dev_get_valid_name(net, dev, newname); 1104 err = dev_get_valid_name(net, dev, newname);
1105 if (err < 0) { 1105 if (err < 0) {
1106 write_seqcount_end(&devnet_rename_seq); 1106 write_seqcount_end(&devnet_rename_seq);
1107 return err; 1107 return err;
1108 } 1108 }
1109 1109
1110 rollback: 1110 rollback:
1111 ret = device_rename(&dev->dev, dev->name); 1111 ret = device_rename(&dev->dev, dev->name);
1112 if (ret) { 1112 if (ret) {
1113 memcpy(dev->name, oldname, IFNAMSIZ); 1113 memcpy(dev->name, oldname, IFNAMSIZ);
1114 write_seqcount_end(&devnet_rename_seq); 1114 write_seqcount_end(&devnet_rename_seq);
1115 return ret; 1115 return ret;
1116 } 1116 }
1117 1117
1118 write_seqcount_end(&devnet_rename_seq); 1118 write_seqcount_end(&devnet_rename_seq);
1119 1119
1120 write_lock_bh(&dev_base_lock); 1120 write_lock_bh(&dev_base_lock);
1121 hlist_del_rcu(&dev->name_hlist); 1121 hlist_del_rcu(&dev->name_hlist);
1122 write_unlock_bh(&dev_base_lock); 1122 write_unlock_bh(&dev_base_lock);
1123 1123
1124 synchronize_rcu(); 1124 synchronize_rcu();
1125 1125
1126 write_lock_bh(&dev_base_lock); 1126 write_lock_bh(&dev_base_lock);
1127 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); 1127 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1128 write_unlock_bh(&dev_base_lock); 1128 write_unlock_bh(&dev_base_lock);
1129 1129
1130 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); 1130 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1131 ret = notifier_to_errno(ret); 1131 ret = notifier_to_errno(ret);
1132 1132
1133 if (ret) { 1133 if (ret) {
1134 /* err >= 0 after dev_alloc_name() or stores the first errno */ 1134 /* err >= 0 after dev_alloc_name() or stores the first errno */
1135 if (err >= 0) { 1135 if (err >= 0) {
1136 err = ret; 1136 err = ret;
1137 write_seqcount_begin(&devnet_rename_seq); 1137 write_seqcount_begin(&devnet_rename_seq);
1138 memcpy(dev->name, oldname, IFNAMSIZ); 1138 memcpy(dev->name, oldname, IFNAMSIZ);
1139 goto rollback; 1139 goto rollback;
1140 } else { 1140 } else {
1141 pr_err("%s: name change rollback failed: %d\n", 1141 pr_err("%s: name change rollback failed: %d\n",
1142 dev->name, ret); 1142 dev->name, ret);
1143 } 1143 }
1144 } 1144 }
1145 1145
1146 return err; 1146 return err;
1147 } 1147 }
1148 1148
1149 /** 1149 /**
1150 * dev_set_alias - change ifalias of a device 1150 * dev_set_alias - change ifalias of a device
1151 * @dev: device 1151 * @dev: device
1152 * @alias: name up to IFALIASZ 1152 * @alias: name up to IFALIASZ
1153 * @len: limit of bytes to copy from info 1153 * @len: limit of bytes to copy from info
1154 * 1154 *
1155 * Set ifalias for a device, 1155 * Set ifalias for a device,
1156 */ 1156 */
1157 int dev_set_alias(struct net_device *dev, const char *alias, size_t len) 1157 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1158 { 1158 {
1159 char *new_ifalias; 1159 char *new_ifalias;
1160 1160
1161 ASSERT_RTNL(); 1161 ASSERT_RTNL();
1162 1162
1163 if (len >= IFALIASZ) 1163 if (len >= IFALIASZ)
1164 return -EINVAL; 1164 return -EINVAL;
1165 1165
1166 if (!len) { 1166 if (!len) {
1167 kfree(dev->ifalias); 1167 kfree(dev->ifalias);
1168 dev->ifalias = NULL; 1168 dev->ifalias = NULL;
1169 return 0; 1169 return 0;
1170 } 1170 }
1171 1171
1172 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); 1172 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1173 if (!new_ifalias) 1173 if (!new_ifalias)
1174 return -ENOMEM; 1174 return -ENOMEM;
1175 dev->ifalias = new_ifalias; 1175 dev->ifalias = new_ifalias;
1176 1176
1177 strlcpy(dev->ifalias, alias, len+1); 1177 strlcpy(dev->ifalias, alias, len+1);
1178 return len; 1178 return len;
1179 } 1179 }
1180 1180
1181 1181
1182 /** 1182 /**
1183 * netdev_features_change - device changes features 1183 * netdev_features_change - device changes features
1184 * @dev: device to cause notification 1184 * @dev: device to cause notification
1185 * 1185 *
1186 * Called to indicate a device has changed features. 1186 * Called to indicate a device has changed features.
1187 */ 1187 */
1188 void netdev_features_change(struct net_device *dev) 1188 void netdev_features_change(struct net_device *dev)
1189 { 1189 {
1190 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev); 1190 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1191 } 1191 }
1192 EXPORT_SYMBOL(netdev_features_change); 1192 EXPORT_SYMBOL(netdev_features_change);
1193 1193
1194 /** 1194 /**
1195 * netdev_state_change - device changes state 1195 * netdev_state_change - device changes state
1196 * @dev: device to cause notification 1196 * @dev: device to cause notification
1197 * 1197 *
1198 * Called to indicate a device has changed state. This function calls 1198 * Called to indicate a device has changed state. This function calls
1199 * the notifier chains for netdev_chain and sends a NEWLINK message 1199 * the notifier chains for netdev_chain and sends a NEWLINK message
1200 * to the routing socket. 1200 * to the routing socket.
1201 */ 1201 */
1202 void netdev_state_change(struct net_device *dev) 1202 void netdev_state_change(struct net_device *dev)
1203 { 1203 {
1204 if (dev->flags & IFF_UP) { 1204 if (dev->flags & IFF_UP) {
1205 call_netdevice_notifiers(NETDEV_CHANGE, dev); 1205 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1206 rtmsg_ifinfo(RTM_NEWLINK, dev, 0); 1206 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1207 } 1207 }
1208 } 1208 }
1209 EXPORT_SYMBOL(netdev_state_change); 1209 EXPORT_SYMBOL(netdev_state_change);
1210 1210
1211 /** 1211 /**
1212 * netdev_notify_peers - notify network peers about existence of @dev 1212 * netdev_notify_peers - notify network peers about existence of @dev
1213 * @dev: network device 1213 * @dev: network device
1214 * 1214 *
1215 * Generate traffic such that interested network peers are aware of 1215 * Generate traffic such that interested network peers are aware of
1216 * @dev, such as by generating a gratuitous ARP. This may be used when 1216 * @dev, such as by generating a gratuitous ARP. This may be used when
1217 * a device wants to inform the rest of the network about some sort of 1217 * a device wants to inform the rest of the network about some sort of
1218 * reconfiguration such as a failover event or virtual machine 1218 * reconfiguration such as a failover event or virtual machine
1219 * migration. 1219 * migration.
1220 */ 1220 */
1221 void netdev_notify_peers(struct net_device *dev) 1221 void netdev_notify_peers(struct net_device *dev)
1222 { 1222 {
1223 rtnl_lock(); 1223 rtnl_lock();
1224 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); 1224 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1225 rtnl_unlock(); 1225 rtnl_unlock();
1226 } 1226 }
1227 EXPORT_SYMBOL(netdev_notify_peers); 1227 EXPORT_SYMBOL(netdev_notify_peers);
1228 1228
1229 /** 1229 /**
1230 * dev_load - load a network module 1230 * dev_load - load a network module
1231 * @net: the applicable net namespace 1231 * @net: the applicable net namespace
1232 * @name: name of interface 1232 * @name: name of interface
1233 * 1233 *
1234 * If a network interface is not present and the process has suitable 1234 * If a network interface is not present and the process has suitable
1235 * privileges this function loads the module. If module loading is not 1235 * privileges this function loads the module. If module loading is not
1236 * available in this kernel then it becomes a nop. 1236 * available in this kernel then it becomes a nop.
1237 */ 1237 */
1238 1238
1239 void dev_load(struct net *net, const char *name) 1239 void dev_load(struct net *net, const char *name)
1240 { 1240 {
1241 struct net_device *dev; 1241 struct net_device *dev;
1242 int no_module; 1242 int no_module;
1243 1243
1244 rcu_read_lock(); 1244 rcu_read_lock();
1245 dev = dev_get_by_name_rcu(net, name); 1245 dev = dev_get_by_name_rcu(net, name);
1246 rcu_read_unlock(); 1246 rcu_read_unlock();
1247 1247
1248 no_module = !dev; 1248 no_module = !dev;
1249 if (no_module && capable(CAP_NET_ADMIN)) 1249 if (no_module && capable(CAP_NET_ADMIN))
1250 no_module = request_module("netdev-%s", name); 1250 no_module = request_module("netdev-%s", name);
1251 if (no_module && capable(CAP_SYS_MODULE)) { 1251 if (no_module && capable(CAP_SYS_MODULE)) {
1252 if (!request_module("%s", name)) 1252 if (!request_module("%s", name))
1253 pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", 1253 pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
1254 name); 1254 name);
1255 } 1255 }
1256 } 1256 }
1257 EXPORT_SYMBOL(dev_load); 1257 EXPORT_SYMBOL(dev_load);
1258 1258
1259 static int __dev_open(struct net_device *dev) 1259 static int __dev_open(struct net_device *dev)
1260 { 1260 {
1261 const struct net_device_ops *ops = dev->netdev_ops; 1261 const struct net_device_ops *ops = dev->netdev_ops;
1262 int ret; 1262 int ret;
1263 1263
1264 ASSERT_RTNL(); 1264 ASSERT_RTNL();
1265 1265
1266 if (!netif_device_present(dev)) 1266 if (!netif_device_present(dev))
1267 return -ENODEV; 1267 return -ENODEV;
1268 1268
1269 /* Block netpoll from trying to do any rx path servicing. 1269 /* Block netpoll from trying to do any rx path servicing.
1270 * If we don't do this there is a chance ndo_poll_controller 1270 * If we don't do this there is a chance ndo_poll_controller
1271 * or ndo_poll may be running while we open the device 1271 * or ndo_poll may be running while we open the device
1272 */ 1272 */
1273 ret = netpoll_rx_disable(dev); 1273 ret = netpoll_rx_disable(dev);
1274 if (ret) 1274 if (ret)
1275 return ret; 1275 return ret;
1276 1276
1277 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); 1277 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1278 ret = notifier_to_errno(ret); 1278 ret = notifier_to_errno(ret);
1279 if (ret) 1279 if (ret)
1280 return ret; 1280 return ret;
1281 1281
1282 set_bit(__LINK_STATE_START, &dev->state); 1282 set_bit(__LINK_STATE_START, &dev->state);
1283 1283
1284 if (ops->ndo_validate_addr) 1284 if (ops->ndo_validate_addr)
1285 ret = ops->ndo_validate_addr(dev); 1285 ret = ops->ndo_validate_addr(dev);
1286 1286
1287 if (!ret && ops->ndo_open) 1287 if (!ret && ops->ndo_open)
1288 ret = ops->ndo_open(dev); 1288 ret = ops->ndo_open(dev);
1289 1289
1290 netpoll_rx_enable(dev); 1290 netpoll_rx_enable(dev);
1291 1291
1292 if (ret) 1292 if (ret)
1293 clear_bit(__LINK_STATE_START, &dev->state); 1293 clear_bit(__LINK_STATE_START, &dev->state);
1294 else { 1294 else {
1295 dev->flags |= IFF_UP; 1295 dev->flags |= IFF_UP;
1296 net_dmaengine_get(); 1296 net_dmaengine_get();
1297 dev_set_rx_mode(dev); 1297 dev_set_rx_mode(dev);
1298 dev_activate(dev); 1298 dev_activate(dev);
1299 add_device_randomness(dev->dev_addr, dev->addr_len); 1299 add_device_randomness(dev->dev_addr, dev->addr_len);
1300 } 1300 }
1301 1301
1302 return ret; 1302 return ret;
1303 } 1303 }
1304 1304
1305 /** 1305 /**
1306 * dev_open - prepare an interface for use. 1306 * dev_open - prepare an interface for use.
1307 * @dev: device to open 1307 * @dev: device to open
1308 * 1308 *
1309 * Takes a device from down to up state. The device's private open 1309 * Takes a device from down to up state. The device's private open
1310 * function is invoked and then the multicast lists are loaded. Finally 1310 * function is invoked and then the multicast lists are loaded. Finally
1311 * the device is moved into the up state and a %NETDEV_UP message is 1311 * the device is moved into the up state and a %NETDEV_UP message is
1312 * sent to the netdev notifier chain. 1312 * sent to the netdev notifier chain.
1313 * 1313 *
1314 * Calling this function on an active interface is a nop. On a failure 1314 * Calling this function on an active interface is a nop. On a failure
1315 * a negative errno code is returned. 1315 * a negative errno code is returned.
1316 */ 1316 */
1317 int dev_open(struct net_device *dev) 1317 int dev_open(struct net_device *dev)
1318 { 1318 {
1319 int ret; 1319 int ret;
1320 1320
1321 if (dev->flags & IFF_UP) 1321 if (dev->flags & IFF_UP)
1322 return 0; 1322 return 0;
1323 1323
1324 ret = __dev_open(dev); 1324 ret = __dev_open(dev);
1325 if (ret < 0) 1325 if (ret < 0)
1326 return ret; 1326 return ret;
1327 1327
1328 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); 1328 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1329 call_netdevice_notifiers(NETDEV_UP, dev); 1329 call_netdevice_notifiers(NETDEV_UP, dev);
1330 1330
1331 return ret; 1331 return ret;
1332 } 1332 }
1333 EXPORT_SYMBOL(dev_open); 1333 EXPORT_SYMBOL(dev_open);
1334 1334
1335 static int __dev_close_many(struct list_head *head) 1335 static int __dev_close_many(struct list_head *head)
1336 { 1336 {
1337 struct net_device *dev; 1337 struct net_device *dev;
1338 1338
1339 ASSERT_RTNL(); 1339 ASSERT_RTNL();
1340 might_sleep(); 1340 might_sleep();
1341 1341
1342 list_for_each_entry(dev, head, unreg_list) { 1342 list_for_each_entry(dev, head, unreg_list) {
1343 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); 1343 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1344 1344
1345 clear_bit(__LINK_STATE_START, &dev->state); 1345 clear_bit(__LINK_STATE_START, &dev->state);
1346 1346
1347 /* Synchronize to scheduled poll. We cannot touch poll list, it 1347 /* Synchronize to scheduled poll. We cannot touch poll list, it
1348 * can be even on different cpu. So just clear netif_running(). 1348 * can be even on different cpu. So just clear netif_running().
1349 * 1349 *
1350 * dev->stop() will invoke napi_disable() on all of it's 1350 * dev->stop() will invoke napi_disable() on all of it's
1351 * napi_struct instances on this device. 1351 * napi_struct instances on this device.
1352 */ 1352 */
1353 smp_mb__after_clear_bit(); /* Commit netif_running(). */ 1353 smp_mb__after_clear_bit(); /* Commit netif_running(). */
1354 } 1354 }
1355 1355
1356 dev_deactivate_many(head); 1356 dev_deactivate_many(head);
1357 1357
1358 list_for_each_entry(dev, head, unreg_list) { 1358 list_for_each_entry(dev, head, unreg_list) {
1359 const struct net_device_ops *ops = dev->netdev_ops; 1359 const struct net_device_ops *ops = dev->netdev_ops;
1360 1360
1361 /* 1361 /*
1362 * Call the device specific close. This cannot fail. 1362 * Call the device specific close. This cannot fail.
1363 * Only if device is UP 1363 * Only if device is UP
1364 * 1364 *
1365 * We allow it to be called even after a DETACH hot-plug 1365 * We allow it to be called even after a DETACH hot-plug
1366 * event. 1366 * event.
1367 */ 1367 */
1368 if (ops->ndo_stop) 1368 if (ops->ndo_stop)
1369 ops->ndo_stop(dev); 1369 ops->ndo_stop(dev);
1370 1370
1371 dev->flags &= ~IFF_UP; 1371 dev->flags &= ~IFF_UP;
1372 net_dmaengine_put(); 1372 net_dmaengine_put();
1373 } 1373 }
1374 1374
1375 return 0; 1375 return 0;
1376 } 1376 }
1377 1377
1378 static int __dev_close(struct net_device *dev) 1378 static int __dev_close(struct net_device *dev)
1379 { 1379 {
1380 int retval; 1380 int retval;
1381 LIST_HEAD(single); 1381 LIST_HEAD(single);
1382 1382
1383 /* Temporarily disable netpoll until the interface is down */ 1383 /* Temporarily disable netpoll until the interface is down */
1384 retval = netpoll_rx_disable(dev); 1384 retval = netpoll_rx_disable(dev);
1385 if (retval) 1385 if (retval)
1386 return retval; 1386 return retval;
1387 1387
1388 list_add(&dev->unreg_list, &single); 1388 list_add(&dev->unreg_list, &single);
1389 retval = __dev_close_many(&single); 1389 retval = __dev_close_many(&single);
1390 list_del(&single); 1390 list_del(&single);
1391 1391
1392 netpoll_rx_enable(dev); 1392 netpoll_rx_enable(dev);
1393 return retval; 1393 return retval;
1394 } 1394 }
1395 1395
1396 static int dev_close_many(struct list_head *head) 1396 static int dev_close_many(struct list_head *head)
1397 { 1397 {
1398 struct net_device *dev, *tmp; 1398 struct net_device *dev, *tmp;
1399 LIST_HEAD(tmp_list); 1399 LIST_HEAD(tmp_list);
1400 1400
1401 list_for_each_entry_safe(dev, tmp, head, unreg_list) 1401 list_for_each_entry_safe(dev, tmp, head, unreg_list)
1402 if (!(dev->flags & IFF_UP)) 1402 if (!(dev->flags & IFF_UP))
1403 list_move(&dev->unreg_list, &tmp_list); 1403 list_move(&dev->unreg_list, &tmp_list);
1404 1404
1405 __dev_close_many(head); 1405 __dev_close_many(head);
1406 1406
1407 list_for_each_entry(dev, head, unreg_list) { 1407 list_for_each_entry(dev, head, unreg_list) {
1408 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); 1408 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1409 call_netdevice_notifiers(NETDEV_DOWN, dev); 1409 call_netdevice_notifiers(NETDEV_DOWN, dev);
1410 } 1410 }
1411 1411
1412 /* rollback_registered_many needs the complete original list */ 1412 /* rollback_registered_many needs the complete original list */
1413 list_splice(&tmp_list, head); 1413 list_splice(&tmp_list, head);
1414 return 0; 1414 return 0;
1415 } 1415 }
1416 1416
1417 /** 1417 /**
1418 * dev_close - shutdown an interface. 1418 * dev_close - shutdown an interface.
1419 * @dev: device to shutdown 1419 * @dev: device to shutdown
1420 * 1420 *
1421 * This function moves an active device into down state. A 1421 * This function moves an active device into down state. A
1422 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device 1422 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1423 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier 1423 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1424 * chain. 1424 * chain.
1425 */ 1425 */
1426 int dev_close(struct net_device *dev) 1426 int dev_close(struct net_device *dev)
1427 { 1427 {
1428 int ret = 0; 1428 int ret = 0;
1429 if (dev->flags & IFF_UP) { 1429 if (dev->flags & IFF_UP) {
1430 LIST_HEAD(single); 1430 LIST_HEAD(single);
1431 1431
1432 /* Block netpoll rx while the interface is going down */ 1432 /* Block netpoll rx while the interface is going down */
1433 ret = netpoll_rx_disable(dev); 1433 ret = netpoll_rx_disable(dev);
1434 if (ret) 1434 if (ret)
1435 return ret; 1435 return ret;
1436 1436
1437 list_add(&dev->unreg_list, &single); 1437 list_add(&dev->unreg_list, &single);
1438 dev_close_many(&single); 1438 dev_close_many(&single);
1439 list_del(&single); 1439 list_del(&single);
1440 1440
1441 netpoll_rx_enable(dev); 1441 netpoll_rx_enable(dev);
1442 } 1442 }
1443 return ret; 1443 return ret;
1444 } 1444 }
1445 EXPORT_SYMBOL(dev_close); 1445 EXPORT_SYMBOL(dev_close);
1446 1446
1447 1447
1448 /** 1448 /**
1449 * dev_disable_lro - disable Large Receive Offload on a device 1449 * dev_disable_lro - disable Large Receive Offload on a device
1450 * @dev: device 1450 * @dev: device
1451 * 1451 *
1452 * Disable Large Receive Offload (LRO) on a net device. Must be 1452 * Disable Large Receive Offload (LRO) on a net device. Must be
1453 * called under RTNL. This is needed if received packets may be 1453 * called under RTNL. This is needed if received packets may be
1454 * forwarded to another interface. 1454 * forwarded to another interface.
1455 */ 1455 */
1456 void dev_disable_lro(struct net_device *dev) 1456 void dev_disable_lro(struct net_device *dev)
1457 { 1457 {
1458 /* 1458 /*
1459 * If we're trying to disable lro on a vlan device 1459 * If we're trying to disable lro on a vlan device
1460 * use the underlying physical device instead 1460 * use the underlying physical device instead
1461 */ 1461 */
1462 if (is_vlan_dev(dev)) 1462 if (is_vlan_dev(dev))
1463 dev = vlan_dev_real_dev(dev); 1463 dev = vlan_dev_real_dev(dev);
1464 1464
1465 dev->wanted_features &= ~NETIF_F_LRO; 1465 dev->wanted_features &= ~NETIF_F_LRO;
1466 netdev_update_features(dev); 1466 netdev_update_features(dev);
1467 1467
1468 if (unlikely(dev->features & NETIF_F_LRO)) 1468 if (unlikely(dev->features & NETIF_F_LRO))
1469 netdev_WARN(dev, "failed to disable LRO!\n"); 1469 netdev_WARN(dev, "failed to disable LRO!\n");
1470 } 1470 }
1471 EXPORT_SYMBOL(dev_disable_lro); 1471 EXPORT_SYMBOL(dev_disable_lro);
1472 1472
1473 1473
1474 static int dev_boot_phase = 1; 1474 static int dev_boot_phase = 1;
1475 1475
1476 /** 1476 /**
1477 * register_netdevice_notifier - register a network notifier block 1477 * register_netdevice_notifier - register a network notifier block
1478 * @nb: notifier 1478 * @nb: notifier
1479 * 1479 *
1480 * Register a notifier to be called when network device events occur. 1480 * Register a notifier to be called when network device events occur.
1481 * The notifier passed is linked into the kernel structures and must 1481 * The notifier passed is linked into the kernel structures and must
1482 * not be reused until it has been unregistered. A negative errno code 1482 * not be reused until it has been unregistered. A negative errno code
1483 * is returned on a failure. 1483 * is returned on a failure.
1484 * 1484 *
1485 * When registered all registration and up events are replayed 1485 * When registered all registration and up events are replayed
1486 * to the new notifier to allow device to have a race free 1486 * to the new notifier to allow device to have a race free
1487 * view of the network device list. 1487 * view of the network device list.
1488 */ 1488 */
1489 1489
1490 int register_netdevice_notifier(struct notifier_block *nb) 1490 int register_netdevice_notifier(struct notifier_block *nb)
1491 { 1491 {
1492 struct net_device *dev; 1492 struct net_device *dev;
1493 struct net_device *last; 1493 struct net_device *last;
1494 struct net *net; 1494 struct net *net;
1495 int err; 1495 int err;
1496 1496
1497 rtnl_lock(); 1497 rtnl_lock();
1498 err = raw_notifier_chain_register(&netdev_chain, nb); 1498 err = raw_notifier_chain_register(&netdev_chain, nb);
1499 if (err) 1499 if (err)
1500 goto unlock; 1500 goto unlock;
1501 if (dev_boot_phase) 1501 if (dev_boot_phase)
1502 goto unlock; 1502 goto unlock;
1503 for_each_net(net) { 1503 for_each_net(net) {
1504 for_each_netdev(net, dev) { 1504 for_each_netdev(net, dev) {
1505 err = nb->notifier_call(nb, NETDEV_REGISTER, dev); 1505 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1506 err = notifier_to_errno(err); 1506 err = notifier_to_errno(err);
1507 if (err) 1507 if (err)
1508 goto rollback; 1508 goto rollback;
1509 1509
1510 if (!(dev->flags & IFF_UP)) 1510 if (!(dev->flags & IFF_UP))
1511 continue; 1511 continue;
1512 1512
1513 nb->notifier_call(nb, NETDEV_UP, dev); 1513 nb->notifier_call(nb, NETDEV_UP, dev);
1514 } 1514 }
1515 } 1515 }
1516 1516
1517 unlock: 1517 unlock:
1518 rtnl_unlock(); 1518 rtnl_unlock();
1519 return err; 1519 return err;
1520 1520
1521 rollback: 1521 rollback:
1522 last = dev; 1522 last = dev;
1523 for_each_net(net) { 1523 for_each_net(net) {
1524 for_each_netdev(net, dev) { 1524 for_each_netdev(net, dev) {
1525 if (dev == last) 1525 if (dev == last)
1526 goto outroll; 1526 goto outroll;
1527 1527
1528 if (dev->flags & IFF_UP) { 1528 if (dev->flags & IFF_UP) {
1529 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); 1529 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1530 nb->notifier_call(nb, NETDEV_DOWN, dev); 1530 nb->notifier_call(nb, NETDEV_DOWN, dev);
1531 } 1531 }
1532 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1532 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1533 } 1533 }
1534 } 1534 }
1535 1535
1536 outroll: 1536 outroll:
1537 raw_notifier_chain_unregister(&netdev_chain, nb); 1537 raw_notifier_chain_unregister(&netdev_chain, nb);
1538 goto unlock; 1538 goto unlock;
1539 } 1539 }
1540 EXPORT_SYMBOL(register_netdevice_notifier); 1540 EXPORT_SYMBOL(register_netdevice_notifier);
1541 1541
1542 /** 1542 /**
1543 * unregister_netdevice_notifier - unregister a network notifier block 1543 * unregister_netdevice_notifier - unregister a network notifier block
1544 * @nb: notifier 1544 * @nb: notifier
1545 * 1545 *
1546 * Unregister a notifier previously registered by 1546 * Unregister a notifier previously registered by
1547 * register_netdevice_notifier(). The notifier is unlinked into the 1547 * register_netdevice_notifier(). The notifier is unlinked into the
1548 * kernel structures and may then be reused. A negative errno code 1548 * kernel structures and may then be reused. A negative errno code
1549 * is returned on a failure. 1549 * is returned on a failure.
1550 * 1550 *
1551 * After unregistering unregister and down device events are synthesized 1551 * After unregistering unregister and down device events are synthesized
1552 * for all devices on the device list to the removed notifier to remove 1552 * for all devices on the device list to the removed notifier to remove
1553 * the need for special case cleanup code. 1553 * the need for special case cleanup code.
1554 */ 1554 */
1555 1555
1556 int unregister_netdevice_notifier(struct notifier_block *nb) 1556 int unregister_netdevice_notifier(struct notifier_block *nb)
1557 { 1557 {
1558 struct net_device *dev; 1558 struct net_device *dev;
1559 struct net *net; 1559 struct net *net;
1560 int err; 1560 int err;
1561 1561
1562 rtnl_lock(); 1562 rtnl_lock();
1563 err = raw_notifier_chain_unregister(&netdev_chain, nb); 1563 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1564 if (err) 1564 if (err)
1565 goto unlock; 1565 goto unlock;
1566 1566
1567 for_each_net(net) { 1567 for_each_net(net) {
1568 for_each_netdev(net, dev) { 1568 for_each_netdev(net, dev) {
1569 if (dev->flags & IFF_UP) { 1569 if (dev->flags & IFF_UP) {
1570 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); 1570 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1571 nb->notifier_call(nb, NETDEV_DOWN, dev); 1571 nb->notifier_call(nb, NETDEV_DOWN, dev);
1572 } 1572 }
1573 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1573 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1574 } 1574 }
1575 } 1575 }
1576 unlock: 1576 unlock:
1577 rtnl_unlock(); 1577 rtnl_unlock();
1578 return err; 1578 return err;
1579 } 1579 }
1580 EXPORT_SYMBOL(unregister_netdevice_notifier); 1580 EXPORT_SYMBOL(unregister_netdevice_notifier);
1581 1581
1582 /** 1582 /**
1583 * call_netdevice_notifiers - call all network notifier blocks 1583 * call_netdevice_notifiers - call all network notifier blocks
1584 * @val: value passed unmodified to notifier function 1584 * @val: value passed unmodified to notifier function
1585 * @dev: net_device pointer passed unmodified to notifier function 1585 * @dev: net_device pointer passed unmodified to notifier function
1586 * 1586 *
1587 * Call all network notifier blocks. Parameters and return value 1587 * Call all network notifier blocks. Parameters and return value
1588 * are as for raw_notifier_call_chain(). 1588 * are as for raw_notifier_call_chain().
1589 */ 1589 */
1590 1590
1591 int call_netdevice_notifiers(unsigned long val, struct net_device *dev) 1591 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1592 { 1592 {
1593 ASSERT_RTNL(); 1593 ASSERT_RTNL();
1594 return raw_notifier_call_chain(&netdev_chain, val, dev); 1594 return raw_notifier_call_chain(&netdev_chain, val, dev);
1595 } 1595 }
1596 EXPORT_SYMBOL(call_netdevice_notifiers); 1596 EXPORT_SYMBOL(call_netdevice_notifiers);
1597 1597
1598 static struct static_key netstamp_needed __read_mostly; 1598 static struct static_key netstamp_needed __read_mostly;
1599 #ifdef HAVE_JUMP_LABEL 1599 #ifdef HAVE_JUMP_LABEL
1600 /* We are not allowed to call static_key_slow_dec() from irq context 1600 /* We are not allowed to call static_key_slow_dec() from irq context
1601 * If net_disable_timestamp() is called from irq context, defer the 1601 * If net_disable_timestamp() is called from irq context, defer the
1602 * static_key_slow_dec() calls. 1602 * static_key_slow_dec() calls.
1603 */ 1603 */
1604 static atomic_t netstamp_needed_deferred; 1604 static atomic_t netstamp_needed_deferred;
1605 #endif 1605 #endif
1606 1606
1607 void net_enable_timestamp(void) 1607 void net_enable_timestamp(void)
1608 { 1608 {
1609 #ifdef HAVE_JUMP_LABEL 1609 #ifdef HAVE_JUMP_LABEL
1610 int deferred = atomic_xchg(&netstamp_needed_deferred, 0); 1610 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1611 1611
1612 if (deferred) { 1612 if (deferred) {
1613 while (--deferred) 1613 while (--deferred)
1614 static_key_slow_dec(&netstamp_needed); 1614 static_key_slow_dec(&netstamp_needed);
1615 return; 1615 return;
1616 } 1616 }
1617 #endif 1617 #endif
1618 WARN_ON(in_interrupt()); 1618 WARN_ON(in_interrupt());
1619 static_key_slow_inc(&netstamp_needed); 1619 static_key_slow_inc(&netstamp_needed);
1620 } 1620 }
1621 EXPORT_SYMBOL(net_enable_timestamp); 1621 EXPORT_SYMBOL(net_enable_timestamp);
1622 1622
1623 void net_disable_timestamp(void) 1623 void net_disable_timestamp(void)
1624 { 1624 {
1625 #ifdef HAVE_JUMP_LABEL 1625 #ifdef HAVE_JUMP_LABEL
1626 if (in_interrupt()) { 1626 if (in_interrupt()) {
1627 atomic_inc(&netstamp_needed_deferred); 1627 atomic_inc(&netstamp_needed_deferred);
1628 return; 1628 return;
1629 } 1629 }
1630 #endif 1630 #endif
1631 static_key_slow_dec(&netstamp_needed); 1631 static_key_slow_dec(&netstamp_needed);
1632 } 1632 }
1633 EXPORT_SYMBOL(net_disable_timestamp); 1633 EXPORT_SYMBOL(net_disable_timestamp);
1634 1634
1635 static inline void net_timestamp_set(struct sk_buff *skb) 1635 static inline void net_timestamp_set(struct sk_buff *skb)
1636 { 1636 {
1637 skb->tstamp.tv64 = 0; 1637 skb->tstamp.tv64 = 0;
1638 if (static_key_false(&netstamp_needed)) 1638 if (static_key_false(&netstamp_needed))
1639 __net_timestamp(skb); 1639 __net_timestamp(skb);
1640 } 1640 }
1641 1641
1642 #define net_timestamp_check(COND, SKB) \ 1642 #define net_timestamp_check(COND, SKB) \
1643 if (static_key_false(&netstamp_needed)) { \ 1643 if (static_key_false(&netstamp_needed)) { \
1644 if ((COND) && !(SKB)->tstamp.tv64) \ 1644 if ((COND) && !(SKB)->tstamp.tv64) \
1645 __net_timestamp(SKB); \ 1645 __net_timestamp(SKB); \
1646 } \ 1646 } \
1647 1647
1648 static int net_hwtstamp_validate(struct ifreq *ifr) 1648 static int net_hwtstamp_validate(struct ifreq *ifr)
1649 { 1649 {
1650 struct hwtstamp_config cfg; 1650 struct hwtstamp_config cfg;
1651 enum hwtstamp_tx_types tx_type; 1651 enum hwtstamp_tx_types tx_type;
1652 enum hwtstamp_rx_filters rx_filter; 1652 enum hwtstamp_rx_filters rx_filter;
1653 int tx_type_valid = 0; 1653 int tx_type_valid = 0;
1654 int rx_filter_valid = 0; 1654 int rx_filter_valid = 0;
1655 1655
1656 if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) 1656 if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
1657 return -EFAULT; 1657 return -EFAULT;
1658 1658
1659 if (cfg.flags) /* reserved for future extensions */ 1659 if (cfg.flags) /* reserved for future extensions */
1660 return -EINVAL; 1660 return -EINVAL;
1661 1661
1662 tx_type = cfg.tx_type; 1662 tx_type = cfg.tx_type;
1663 rx_filter = cfg.rx_filter; 1663 rx_filter = cfg.rx_filter;
1664 1664
1665 switch (tx_type) { 1665 switch (tx_type) {
1666 case HWTSTAMP_TX_OFF: 1666 case HWTSTAMP_TX_OFF:
1667 case HWTSTAMP_TX_ON: 1667 case HWTSTAMP_TX_ON:
1668 case HWTSTAMP_TX_ONESTEP_SYNC: 1668 case HWTSTAMP_TX_ONESTEP_SYNC:
1669 tx_type_valid = 1; 1669 tx_type_valid = 1;
1670 break; 1670 break;
1671 } 1671 }
1672 1672
1673 switch (rx_filter) { 1673 switch (rx_filter) {
1674 case HWTSTAMP_FILTER_NONE: 1674 case HWTSTAMP_FILTER_NONE:
1675 case HWTSTAMP_FILTER_ALL: 1675 case HWTSTAMP_FILTER_ALL:
1676 case HWTSTAMP_FILTER_SOME: 1676 case HWTSTAMP_FILTER_SOME:
1677 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: 1677 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
1678 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: 1678 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
1679 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: 1679 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
1680 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: 1680 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
1681 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: 1681 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
1682 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: 1682 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
1683 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: 1683 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
1684 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: 1684 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
1685 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: 1685 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
1686 case HWTSTAMP_FILTER_PTP_V2_EVENT: 1686 case HWTSTAMP_FILTER_PTP_V2_EVENT:
1687 case HWTSTAMP_FILTER_PTP_V2_SYNC: 1687 case HWTSTAMP_FILTER_PTP_V2_SYNC:
1688 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: 1688 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
1689 rx_filter_valid = 1; 1689 rx_filter_valid = 1;
1690 break; 1690 break;
1691 } 1691 }
1692 1692
1693 if (!tx_type_valid || !rx_filter_valid) 1693 if (!tx_type_valid || !rx_filter_valid)
1694 return -ERANGE; 1694 return -ERANGE;
1695 1695
1696 return 0; 1696 return 0;
1697 } 1697 }
1698 1698
1699 static inline bool is_skb_forwardable(struct net_device *dev, 1699 static inline bool is_skb_forwardable(struct net_device *dev,
1700 struct sk_buff *skb) 1700 struct sk_buff *skb)
1701 { 1701 {
1702 unsigned int len; 1702 unsigned int len;
1703 1703
1704 if (!(dev->flags & IFF_UP)) 1704 if (!(dev->flags & IFF_UP))
1705 return false; 1705 return false;
1706 1706
1707 len = dev->mtu + dev->hard_header_len + VLAN_HLEN; 1707 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1708 if (skb->len <= len) 1708 if (skb->len <= len)
1709 return true; 1709 return true;
1710 1710
1711 /* if TSO is enabled, we don't care about the length as the packet 1711 /* if TSO is enabled, we don't care about the length as the packet
1712 * could be forwarded without being segmented before 1712 * could be forwarded without being segmented before
1713 */ 1713 */
1714 if (skb_is_gso(skb)) 1714 if (skb_is_gso(skb))
1715 return true; 1715 return true;
1716 1716
1717 return false; 1717 return false;
1718 } 1718 }
1719 1719
1720 /** 1720 /**
1721 * dev_forward_skb - loopback an skb to another netif 1721 * dev_forward_skb - loopback an skb to another netif
1722 * 1722 *
1723 * @dev: destination network device 1723 * @dev: destination network device
1724 * @skb: buffer to forward 1724 * @skb: buffer to forward
1725 * 1725 *
1726 * return values: 1726 * return values:
1727 * NET_RX_SUCCESS (no congestion) 1727 * NET_RX_SUCCESS (no congestion)
1728 * NET_RX_DROP (packet was dropped, but freed) 1728 * NET_RX_DROP (packet was dropped, but freed)
1729 * 1729 *
1730 * dev_forward_skb can be used for injecting an skb from the 1730 * dev_forward_skb can be used for injecting an skb from the
1731 * start_xmit function of one device into the receive queue 1731 * start_xmit function of one device into the receive queue
1732 * of another device. 1732 * of another device.
1733 * 1733 *
1734 * The receiving device may be in another namespace, so 1734 * The receiving device may be in another namespace, so
1735 * we have to clear all information in the skb that could 1735 * we have to clear all information in the skb that could
1736 * impact namespace isolation. 1736 * impact namespace isolation.
1737 */ 1737 */
1738 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) 1738 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1739 { 1739 {
1740 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { 1740 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1741 if (skb_copy_ubufs(skb, GFP_ATOMIC)) { 1741 if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1742 atomic_long_inc(&dev->rx_dropped); 1742 atomic_long_inc(&dev->rx_dropped);
1743 kfree_skb(skb); 1743 kfree_skb(skb);
1744 return NET_RX_DROP; 1744 return NET_RX_DROP;
1745 } 1745 }
1746 } 1746 }
1747 1747
1748 skb_orphan(skb); 1748 skb_orphan(skb);
1749 nf_reset(skb); 1749 nf_reset(skb);
1750 1750
1751 if (unlikely(!is_skb_forwardable(dev, skb))) { 1751 if (unlikely(!is_skb_forwardable(dev, skb))) {
1752 atomic_long_inc(&dev->rx_dropped); 1752 atomic_long_inc(&dev->rx_dropped);
1753 kfree_skb(skb); 1753 kfree_skb(skb);
1754 return NET_RX_DROP; 1754 return NET_RX_DROP;
1755 } 1755 }
1756 skb->skb_iif = 0; 1756 skb->skb_iif = 0;
1757 skb->dev = dev; 1757 skb->dev = dev;
1758 skb_dst_drop(skb); 1758 skb_dst_drop(skb);
1759 skb->tstamp.tv64 = 0; 1759 skb->tstamp.tv64 = 0;
1760 skb->pkt_type = PACKET_HOST; 1760 skb->pkt_type = PACKET_HOST;
1761 skb->protocol = eth_type_trans(skb, dev); 1761 skb->protocol = eth_type_trans(skb, dev);
1762 skb->mark = 0; 1762 skb->mark = 0;
1763 secpath_reset(skb); 1763 secpath_reset(skb);
1764 nf_reset(skb); 1764 nf_reset(skb);
1765 return netif_rx(skb); 1765 return netif_rx(skb);
1766 } 1766 }
1767 EXPORT_SYMBOL_GPL(dev_forward_skb); 1767 EXPORT_SYMBOL_GPL(dev_forward_skb);
1768 1768
1769 static inline int deliver_skb(struct sk_buff *skb, 1769 static inline int deliver_skb(struct sk_buff *skb,
1770 struct packet_type *pt_prev, 1770 struct packet_type *pt_prev,
1771 struct net_device *orig_dev) 1771 struct net_device *orig_dev)
1772 { 1772 {
1773 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) 1773 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1774 return -ENOMEM; 1774 return -ENOMEM;
1775 atomic_inc(&skb->users); 1775 atomic_inc(&skb->users);
1776 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 1776 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1777 } 1777 }
1778 1778
1779 static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) 1779 static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1780 { 1780 {
1781 if (!ptype->af_packet_priv || !skb->sk) 1781 if (!ptype->af_packet_priv || !skb->sk)
1782 return false; 1782 return false;
1783 1783
1784 if (ptype->id_match) 1784 if (ptype->id_match)
1785 return ptype->id_match(ptype, skb->sk); 1785 return ptype->id_match(ptype, skb->sk);
1786 else if ((struct sock *)ptype->af_packet_priv == skb->sk) 1786 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
1787 return true; 1787 return true;
1788 1788
1789 return false; 1789 return false;
1790 } 1790 }
1791 1791
1792 /* 1792 /*
1793 * Support routine. Sends outgoing frames to any network 1793 * Support routine. Sends outgoing frames to any network
1794 * taps currently in use. 1794 * taps currently in use.
1795 */ 1795 */
1796 1796
1797 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) 1797 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1798 { 1798 {
1799 struct packet_type *ptype; 1799 struct packet_type *ptype;
1800 struct sk_buff *skb2 = NULL; 1800 struct sk_buff *skb2 = NULL;
1801 struct packet_type *pt_prev = NULL; 1801 struct packet_type *pt_prev = NULL;
1802 1802
1803 rcu_read_lock(); 1803 rcu_read_lock();
1804 list_for_each_entry_rcu(ptype, &ptype_all, list) { 1804 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1805 /* Never send packets back to the socket 1805 /* Never send packets back to the socket
1806 * they originated from - MvS (miquels@drinkel.ow.org) 1806 * they originated from - MvS (miquels@drinkel.ow.org)
1807 */ 1807 */
1808 if ((ptype->dev == dev || !ptype->dev) && 1808 if ((ptype->dev == dev || !ptype->dev) &&
1809 (!skb_loop_sk(ptype, skb))) { 1809 (!skb_loop_sk(ptype, skb))) {
1810 if (pt_prev) { 1810 if (pt_prev) {
1811 deliver_skb(skb2, pt_prev, skb->dev); 1811 deliver_skb(skb2, pt_prev, skb->dev);
1812 pt_prev = ptype; 1812 pt_prev = ptype;
1813 continue; 1813 continue;
1814 } 1814 }
1815 1815
1816 skb2 = skb_clone(skb, GFP_ATOMIC); 1816 skb2 = skb_clone(skb, GFP_ATOMIC);
1817 if (!skb2) 1817 if (!skb2)
1818 break; 1818 break;
1819 1819
1820 net_timestamp_set(skb2); 1820 net_timestamp_set(skb2);
1821 1821
1822 /* skb->nh should be correctly 1822 /* skb->nh should be correctly
1823 set by sender, so that the second statement is 1823 set by sender, so that the second statement is
1824 just protection against buggy protocols. 1824 just protection against buggy protocols.
1825 */ 1825 */
1826 skb_reset_mac_header(skb2); 1826 skb_reset_mac_header(skb2);
1827 1827
1828 if (skb_network_header(skb2) < skb2->data || 1828 if (skb_network_header(skb2) < skb2->data ||
1829 skb2->network_header > skb2->tail) { 1829 skb2->network_header > skb2->tail) {
1830 net_crit_ratelimited("protocol %04x is buggy, dev %s\n", 1830 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1831 ntohs(skb2->protocol), 1831 ntohs(skb2->protocol),
1832 dev->name); 1832 dev->name);
1833 skb_reset_network_header(skb2); 1833 skb_reset_network_header(skb2);
1834 } 1834 }
1835 1835
1836 skb2->transport_header = skb2->network_header; 1836 skb2->transport_header = skb2->network_header;
1837 skb2->pkt_type = PACKET_OUTGOING; 1837 skb2->pkt_type = PACKET_OUTGOING;
1838 pt_prev = ptype; 1838 pt_prev = ptype;
1839 } 1839 }
1840 } 1840 }
1841 if (pt_prev) 1841 if (pt_prev)
1842 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); 1842 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1843 rcu_read_unlock(); 1843 rcu_read_unlock();
1844 } 1844 }
1845 1845
1846 /** 1846 /**
1847 * netif_setup_tc - Handle tc mappings on real_num_tx_queues change 1847 * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
1848 * @dev: Network device 1848 * @dev: Network device
1849 * @txq: number of queues available 1849 * @txq: number of queues available
1850 * 1850 *
1851 * If real_num_tx_queues is changed the tc mappings may no longer be 1851 * If real_num_tx_queues is changed the tc mappings may no longer be
1852 * valid. To resolve this verify the tc mapping remains valid and if 1852 * valid. To resolve this verify the tc mapping remains valid and if
1853 * not NULL the mapping. With no priorities mapping to this 1853 * not NULL the mapping. With no priorities mapping to this
1854 * offset/count pair it will no longer be used. In the worst case TC0 1854 * offset/count pair it will no longer be used. In the worst case TC0
1855 * is invalid nothing can be done so disable priority mappings. If is 1855 * is invalid nothing can be done so disable priority mappings. If is
1856 * expected that drivers will fix this mapping if they can before 1856 * expected that drivers will fix this mapping if they can before
1857 * calling netif_set_real_num_tx_queues. 1857 * calling netif_set_real_num_tx_queues.
1858 */ 1858 */
1859 static void netif_setup_tc(struct net_device *dev, unsigned int txq) 1859 static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1860 { 1860 {
1861 int i; 1861 int i;
1862 struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; 1862 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1863 1863
1864 /* If TC0 is invalidated disable TC mapping */ 1864 /* If TC0 is invalidated disable TC mapping */
1865 if (tc->offset + tc->count > txq) { 1865 if (tc->offset + tc->count > txq) {
1866 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n"); 1866 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
1867 dev->num_tc = 0; 1867 dev->num_tc = 0;
1868 return; 1868 return;
1869 } 1869 }
1870 1870
1871 /* Invalidated prio to tc mappings set to TC0 */ 1871 /* Invalidated prio to tc mappings set to TC0 */
1872 for (i = 1; i < TC_BITMASK + 1; i++) { 1872 for (i = 1; i < TC_BITMASK + 1; i++) {
1873 int q = netdev_get_prio_tc_map(dev, i); 1873 int q = netdev_get_prio_tc_map(dev, i);
1874 1874
1875 tc = &dev->tc_to_txq[q]; 1875 tc = &dev->tc_to_txq[q];
1876 if (tc->offset + tc->count > txq) { 1876 if (tc->offset + tc->count > txq) {
1877 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n", 1877 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1878 i, q); 1878 i, q);
1879 netdev_set_prio_tc_map(dev, i, 0); 1879 netdev_set_prio_tc_map(dev, i, 0);
1880 } 1880 }
1881 } 1881 }
1882 } 1882 }
1883 1883
1884 #ifdef CONFIG_XPS 1884 #ifdef CONFIG_XPS
1885 static DEFINE_MUTEX(xps_map_mutex); 1885 static DEFINE_MUTEX(xps_map_mutex);
1886 #define xmap_dereference(P) \ 1886 #define xmap_dereference(P) \
1887 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) 1887 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
1888 1888
1889 static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, 1889 static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
1890 int cpu, u16 index) 1890 int cpu, u16 index)
1891 { 1891 {
1892 struct xps_map *map = NULL; 1892 struct xps_map *map = NULL;
1893 int pos; 1893 int pos;
1894 1894
1895 if (dev_maps) 1895 if (dev_maps)
1896 map = xmap_dereference(dev_maps->cpu_map[cpu]); 1896 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1897 1897
1898 for (pos = 0; map && pos < map->len; pos++) { 1898 for (pos = 0; map && pos < map->len; pos++) {
1899 if (map->queues[pos] == index) { 1899 if (map->queues[pos] == index) {
1900 if (map->len > 1) { 1900 if (map->len > 1) {
1901 map->queues[pos] = map->queues[--map->len]; 1901 map->queues[pos] = map->queues[--map->len];
1902 } else { 1902 } else {
1903 RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL); 1903 RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
1904 kfree_rcu(map, rcu); 1904 kfree_rcu(map, rcu);
1905 map = NULL; 1905 map = NULL;
1906 } 1906 }
1907 break; 1907 break;
1908 } 1908 }
1909 } 1909 }
1910 1910
1911 return map; 1911 return map;
1912 } 1912 }
1913 1913
1914 static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) 1914 static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
1915 { 1915 {
1916 struct xps_dev_maps *dev_maps; 1916 struct xps_dev_maps *dev_maps;
1917 int cpu, i; 1917 int cpu, i;
1918 bool active = false; 1918 bool active = false;
1919 1919
1920 mutex_lock(&xps_map_mutex); 1920 mutex_lock(&xps_map_mutex);
1921 dev_maps = xmap_dereference(dev->xps_maps); 1921 dev_maps = xmap_dereference(dev->xps_maps);
1922 1922
1923 if (!dev_maps) 1923 if (!dev_maps)
1924 goto out_no_maps; 1924 goto out_no_maps;
1925 1925
1926 for_each_possible_cpu(cpu) { 1926 for_each_possible_cpu(cpu) {
1927 for (i = index; i < dev->num_tx_queues; i++) { 1927 for (i = index; i < dev->num_tx_queues; i++) {
1928 if (!remove_xps_queue(dev_maps, cpu, i)) 1928 if (!remove_xps_queue(dev_maps, cpu, i))
1929 break; 1929 break;
1930 } 1930 }
1931 if (i == dev->num_tx_queues) 1931 if (i == dev->num_tx_queues)
1932 active = true; 1932 active = true;
1933 } 1933 }
1934 1934
1935 if (!active) { 1935 if (!active) {
1936 RCU_INIT_POINTER(dev->xps_maps, NULL); 1936 RCU_INIT_POINTER(dev->xps_maps, NULL);
1937 kfree_rcu(dev_maps, rcu); 1937 kfree_rcu(dev_maps, rcu);
1938 } 1938 }
1939 1939
1940 for (i = index; i < dev->num_tx_queues; i++) 1940 for (i = index; i < dev->num_tx_queues; i++)
1941 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), 1941 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
1942 NUMA_NO_NODE); 1942 NUMA_NO_NODE);
1943 1943
1944 out_no_maps: 1944 out_no_maps:
1945 mutex_unlock(&xps_map_mutex); 1945 mutex_unlock(&xps_map_mutex);
1946 } 1946 }
1947 1947
1948 static struct xps_map *expand_xps_map(struct xps_map *map, 1948 static struct xps_map *expand_xps_map(struct xps_map *map,
1949 int cpu, u16 index) 1949 int cpu, u16 index)
1950 { 1950 {
1951 struct xps_map *new_map; 1951 struct xps_map *new_map;
1952 int alloc_len = XPS_MIN_MAP_ALLOC; 1952 int alloc_len = XPS_MIN_MAP_ALLOC;
1953 int i, pos; 1953 int i, pos;
1954 1954
1955 for (pos = 0; map && pos < map->len; pos++) { 1955 for (pos = 0; map && pos < map->len; pos++) {
1956 if (map->queues[pos] != index) 1956 if (map->queues[pos] != index)
1957 continue; 1957 continue;
1958 return map; 1958 return map;
1959 } 1959 }
1960 1960
1961 /* Need to add queue to this CPU's existing map */ 1961 /* Need to add queue to this CPU's existing map */
1962 if (map) { 1962 if (map) {
1963 if (pos < map->alloc_len) 1963 if (pos < map->alloc_len)
1964 return map; 1964 return map;
1965 1965
1966 alloc_len = map->alloc_len * 2; 1966 alloc_len = map->alloc_len * 2;
1967 } 1967 }
1968 1968
1969 /* Need to allocate new map to store queue on this CPU's map */ 1969 /* Need to allocate new map to store queue on this CPU's map */
1970 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL, 1970 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
1971 cpu_to_node(cpu)); 1971 cpu_to_node(cpu));
1972 if (!new_map) 1972 if (!new_map)
1973 return NULL; 1973 return NULL;
1974 1974
1975 for (i = 0; i < pos; i++) 1975 for (i = 0; i < pos; i++)
1976 new_map->queues[i] = map->queues[i]; 1976 new_map->queues[i] = map->queues[i];
1977 new_map->alloc_len = alloc_len; 1977 new_map->alloc_len = alloc_len;
1978 new_map->len = pos; 1978 new_map->len = pos;
1979 1979
1980 return new_map; 1980 return new_map;
1981 } 1981 }
1982 1982
1983 int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index) 1983 int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
1984 { 1984 {
1985 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; 1985 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
1986 struct xps_map *map, *new_map; 1986 struct xps_map *map, *new_map;
1987 int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES); 1987 int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
1988 int cpu, numa_node_id = -2; 1988 int cpu, numa_node_id = -2;
1989 bool active = false; 1989 bool active = false;
1990 1990
1991 mutex_lock(&xps_map_mutex); 1991 mutex_lock(&xps_map_mutex);
1992 1992
1993 dev_maps = xmap_dereference(dev->xps_maps); 1993 dev_maps = xmap_dereference(dev->xps_maps);
1994 1994
1995 /* allocate memory for queue storage */ 1995 /* allocate memory for queue storage */
1996 for_each_online_cpu(cpu) { 1996 for_each_online_cpu(cpu) {
1997 if (!cpumask_test_cpu(cpu, mask)) 1997 if (!cpumask_test_cpu(cpu, mask))
1998 continue; 1998 continue;
1999 1999
2000 if (!new_dev_maps) 2000 if (!new_dev_maps)
2001 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); 2001 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
2002 if (!new_dev_maps) 2002 if (!new_dev_maps)
2003 return -ENOMEM; 2003 return -ENOMEM;
2004 2004
2005 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : 2005 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
2006 NULL; 2006 NULL;
2007 2007
2008 map = expand_xps_map(map, cpu, index); 2008 map = expand_xps_map(map, cpu, index);
2009 if (!map) 2009 if (!map)
2010 goto error; 2010 goto error;
2011 2011
2012 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); 2012 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
2013 } 2013 }
2014 2014
2015 if (!new_dev_maps) 2015 if (!new_dev_maps)
2016 goto out_no_new_maps; 2016 goto out_no_new_maps;
2017 2017
2018 for_each_possible_cpu(cpu) { 2018 for_each_possible_cpu(cpu) {
2019 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { 2019 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
2020 /* add queue to CPU maps */ 2020 /* add queue to CPU maps */
2021 int pos = 0; 2021 int pos = 0;
2022 2022
2023 map = xmap_dereference(new_dev_maps->cpu_map[cpu]); 2023 map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2024 while ((pos < map->len) && (map->queues[pos] != index)) 2024 while ((pos < map->len) && (map->queues[pos] != index))
2025 pos++; 2025 pos++;
2026 2026
2027 if (pos == map->len) 2027 if (pos == map->len)
2028 map->queues[map->len++] = index; 2028 map->queues[map->len++] = index;
2029 #ifdef CONFIG_NUMA 2029 #ifdef CONFIG_NUMA
2030 if (numa_node_id == -2) 2030 if (numa_node_id == -2)
2031 numa_node_id = cpu_to_node(cpu); 2031 numa_node_id = cpu_to_node(cpu);
2032 else if (numa_node_id != cpu_to_node(cpu)) 2032 else if (numa_node_id != cpu_to_node(cpu))
2033 numa_node_id = -1; 2033 numa_node_id = -1;
2034 #endif 2034 #endif
2035 } else if (dev_maps) { 2035 } else if (dev_maps) {
2036 /* fill in the new device map from the old device map */ 2036 /* fill in the new device map from the old device map */
2037 map = xmap_dereference(dev_maps->cpu_map[cpu]); 2037 map = xmap_dereference(dev_maps->cpu_map[cpu]);
2038 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); 2038 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
2039 } 2039 }
2040 2040
2041 } 2041 }
2042 2042
2043 rcu_assign_pointer(dev->xps_maps, new_dev_maps); 2043 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
2044 2044
2045 /* Cleanup old maps */ 2045 /* Cleanup old maps */
2046 if (dev_maps) { 2046 if (dev_maps) {
2047 for_each_possible_cpu(cpu) { 2047 for_each_possible_cpu(cpu) {
2048 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); 2048 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2049 map = xmap_dereference(dev_maps->cpu_map[cpu]); 2049 map = xmap_dereference(dev_maps->cpu_map[cpu]);
2050 if (map && map != new_map) 2050 if (map && map != new_map)
2051 kfree_rcu(map, rcu); 2051 kfree_rcu(map, rcu);
2052 } 2052 }
2053 2053
2054 kfree_rcu(dev_maps, rcu); 2054 kfree_rcu(dev_maps, rcu);
2055 } 2055 }
2056 2056
2057 dev_maps = new_dev_maps; 2057 dev_maps = new_dev_maps;
2058 active = true; 2058 active = true;
2059 2059
2060 out_no_new_maps: 2060 out_no_new_maps:
2061 /* update Tx queue numa node */ 2061 /* update Tx queue numa node */
2062 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), 2062 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
2063 (numa_node_id >= 0) ? numa_node_id : 2063 (numa_node_id >= 0) ? numa_node_id :
2064 NUMA_NO_NODE); 2064 NUMA_NO_NODE);
2065 2065
2066 if (!dev_maps) 2066 if (!dev_maps)
2067 goto out_no_maps; 2067 goto out_no_maps;
2068 2068
2069 /* removes queue from unused CPUs */ 2069 /* removes queue from unused CPUs */
2070 for_each_possible_cpu(cpu) { 2070 for_each_possible_cpu(cpu) {
2071 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) 2071 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
2072 continue; 2072 continue;
2073 2073
2074 if (remove_xps_queue(dev_maps, cpu, index)) 2074 if (remove_xps_queue(dev_maps, cpu, index))
2075 active = true; 2075 active = true;
2076 } 2076 }
2077 2077
2078 /* free map if not active */ 2078 /* free map if not active */
2079 if (!active) { 2079 if (!active) {
2080 RCU_INIT_POINTER(dev->xps_maps, NULL); 2080 RCU_INIT_POINTER(dev->xps_maps, NULL);
2081 kfree_rcu(dev_maps, rcu); 2081 kfree_rcu(dev_maps, rcu);
2082 } 2082 }
2083 2083
2084 out_no_maps: 2084 out_no_maps:
2085 mutex_unlock(&xps_map_mutex); 2085 mutex_unlock(&xps_map_mutex);
2086 2086
2087 return 0; 2087 return 0;
2088 error: 2088 error:
2089 /* remove any maps that we added */ 2089 /* remove any maps that we added */
2090 for_each_possible_cpu(cpu) { 2090 for_each_possible_cpu(cpu) {
2091 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); 2091 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2092 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : 2092 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
2093 NULL; 2093 NULL;
2094 if (new_map && new_map != map) 2094 if (new_map && new_map != map)
2095 kfree(new_map); 2095 kfree(new_map);
2096 } 2096 }
2097 2097
2098 mutex_unlock(&xps_map_mutex); 2098 mutex_unlock(&xps_map_mutex);
2099 2099
2100 kfree(new_dev_maps); 2100 kfree(new_dev_maps);
2101 return -ENOMEM; 2101 return -ENOMEM;
2102 } 2102 }
2103 EXPORT_SYMBOL(netif_set_xps_queue); 2103 EXPORT_SYMBOL(netif_set_xps_queue);
2104 2104
2105 #endif 2105 #endif
2106 /* 2106 /*
2107 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 2107 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
2108 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. 2108 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
2109 */ 2109 */
2110 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) 2110 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2111 { 2111 {
2112 int rc; 2112 int rc;
2113 2113
2114 if (txq < 1 || txq > dev->num_tx_queues) 2114 if (txq < 1 || txq > dev->num_tx_queues)
2115 return -EINVAL; 2115 return -EINVAL;
2116 2116
2117 if (dev->reg_state == NETREG_REGISTERED || 2117 if (dev->reg_state == NETREG_REGISTERED ||
2118 dev->reg_state == NETREG_UNREGISTERING) { 2118 dev->reg_state == NETREG_UNREGISTERING) {
2119 ASSERT_RTNL(); 2119 ASSERT_RTNL();
2120 2120
2121 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, 2121 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
2122 txq); 2122 txq);
2123 if (rc) 2123 if (rc)
2124 return rc; 2124 return rc;
2125 2125
2126 if (dev->num_tc) 2126 if (dev->num_tc)
2127 netif_setup_tc(dev, txq); 2127 netif_setup_tc(dev, txq);
2128 2128
2129 if (txq < dev->real_num_tx_queues) { 2129 if (txq < dev->real_num_tx_queues) {
2130 qdisc_reset_all_tx_gt(dev, txq); 2130 qdisc_reset_all_tx_gt(dev, txq);
2131 #ifdef CONFIG_XPS 2131 #ifdef CONFIG_XPS
2132 netif_reset_xps_queues_gt(dev, txq); 2132 netif_reset_xps_queues_gt(dev, txq);
2133 #endif 2133 #endif
2134 } 2134 }
2135 } 2135 }
2136 2136
2137 dev->real_num_tx_queues = txq; 2137 dev->real_num_tx_queues = txq;
2138 return 0; 2138 return 0;
2139 } 2139 }
2140 EXPORT_SYMBOL(netif_set_real_num_tx_queues); 2140 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2141 2141
2142 #ifdef CONFIG_RPS 2142 #ifdef CONFIG_RPS
2143 /** 2143 /**
2144 * netif_set_real_num_rx_queues - set actual number of RX queues used 2144 * netif_set_real_num_rx_queues - set actual number of RX queues used
2145 * @dev: Network device 2145 * @dev: Network device
2146 * @rxq: Actual number of RX queues 2146 * @rxq: Actual number of RX queues
2147 * 2147 *
2148 * This must be called either with the rtnl_lock held or before 2148 * This must be called either with the rtnl_lock held or before
2149 * registration of the net device. Returns 0 on success, or a 2149 * registration of the net device. Returns 0 on success, or a
2150 * negative error code. If called before registration, it always 2150 * negative error code. If called before registration, it always
2151 * succeeds. 2151 * succeeds.
2152 */ 2152 */
2153 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) 2153 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
2154 { 2154 {
2155 int rc; 2155 int rc;
2156 2156
2157 if (rxq < 1 || rxq > dev->num_rx_queues) 2157 if (rxq < 1 || rxq > dev->num_rx_queues)
2158 return -EINVAL; 2158 return -EINVAL;
2159 2159
2160 if (dev->reg_state == NETREG_REGISTERED) { 2160 if (dev->reg_state == NETREG_REGISTERED) {
2161 ASSERT_RTNL(); 2161 ASSERT_RTNL();
2162 2162
2163 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues, 2163 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
2164 rxq); 2164 rxq);
2165 if (rc) 2165 if (rc)
2166 return rc; 2166 return rc;
2167 } 2167 }
2168 2168
2169 dev->real_num_rx_queues = rxq; 2169 dev->real_num_rx_queues = rxq;
2170 return 0; 2170 return 0;
2171 } 2171 }
2172 EXPORT_SYMBOL(netif_set_real_num_rx_queues); 2172 EXPORT_SYMBOL(netif_set_real_num_rx_queues);
2173 #endif 2173 #endif
2174 2174
2175 /** 2175 /**
2176 * netif_get_num_default_rss_queues - default number of RSS queues 2176 * netif_get_num_default_rss_queues - default number of RSS queues
2177 * 2177 *
2178 * This routine should set an upper limit on the number of RSS queues 2178 * This routine should set an upper limit on the number of RSS queues
2179 * used by default by multiqueue devices. 2179 * used by default by multiqueue devices.
2180 */ 2180 */
2181 int netif_get_num_default_rss_queues(void) 2181 int netif_get_num_default_rss_queues(void)
2182 { 2182 {
2183 return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus()); 2183 return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
2184 } 2184 }
2185 EXPORT_SYMBOL(netif_get_num_default_rss_queues); 2185 EXPORT_SYMBOL(netif_get_num_default_rss_queues);
2186 2186
2187 static inline void __netif_reschedule(struct Qdisc *q) 2187 static inline void __netif_reschedule(struct Qdisc *q)
2188 { 2188 {
2189 struct softnet_data *sd; 2189 struct softnet_data *sd;
2190 unsigned long flags; 2190 unsigned long flags;
2191 2191
2192 local_irq_save(flags); 2192 local_irq_save(flags);
2193 sd = &__get_cpu_var(softnet_data); 2193 sd = &__get_cpu_var(softnet_data);
2194 q->next_sched = NULL; 2194 q->next_sched = NULL;
2195 *sd->output_queue_tailp = q; 2195 *sd->output_queue_tailp = q;
2196 sd->output_queue_tailp = &q->next_sched; 2196 sd->output_queue_tailp = &q->next_sched;
2197 raise_softirq_irqoff(NET_TX_SOFTIRQ); 2197 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2198 local_irq_restore(flags); 2198 local_irq_restore(flags);
2199 } 2199 }
2200 2200
2201 void __netif_schedule(struct Qdisc *q) 2201 void __netif_schedule(struct Qdisc *q)
2202 { 2202 {
2203 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) 2203 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
2204 __netif_reschedule(q); 2204 __netif_reschedule(q);
2205 } 2205 }
2206 EXPORT_SYMBOL(__netif_schedule); 2206 EXPORT_SYMBOL(__netif_schedule);
2207 2207
2208 void dev_kfree_skb_irq(struct sk_buff *skb) 2208 void dev_kfree_skb_irq(struct sk_buff *skb)
2209 { 2209 {
2210 if (atomic_dec_and_test(&skb->users)) { 2210 if (atomic_dec_and_test(&skb->users)) {
2211 struct softnet_data *sd; 2211 struct softnet_data *sd;
2212 unsigned long flags; 2212 unsigned long flags;
2213 2213
2214 local_irq_save(flags); 2214 local_irq_save(flags);
2215 sd = &__get_cpu_var(softnet_data); 2215 sd = &__get_cpu_var(softnet_data);
2216 skb->next = sd->completion_queue; 2216 skb->next = sd->completion_queue;
2217 sd->completion_queue = skb; 2217 sd->completion_queue = skb;
2218 raise_softirq_irqoff(NET_TX_SOFTIRQ); 2218 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2219 local_irq_restore(flags); 2219 local_irq_restore(flags);
2220 } 2220 }
2221 } 2221 }
2222 EXPORT_SYMBOL(dev_kfree_skb_irq); 2222 EXPORT_SYMBOL(dev_kfree_skb_irq);
2223 2223
2224 void dev_kfree_skb_any(struct sk_buff *skb) 2224 void dev_kfree_skb_any(struct sk_buff *skb)
2225 { 2225 {
2226 if (in_irq() || irqs_disabled()) 2226 if (in_irq() || irqs_disabled())
2227 dev_kfree_skb_irq(skb); 2227 dev_kfree_skb_irq(skb);
2228 else 2228 else
2229 dev_kfree_skb(skb); 2229 dev_kfree_skb(skb);
2230 } 2230 }
2231 EXPORT_SYMBOL(dev_kfree_skb_any); 2231 EXPORT_SYMBOL(dev_kfree_skb_any);
2232 2232
2233 2233
2234 /** 2234 /**
2235 * netif_device_detach - mark device as removed 2235 * netif_device_detach - mark device as removed
2236 * @dev: network device 2236 * @dev: network device
2237 * 2237 *
2238 * Mark device as removed from system and therefore no longer available. 2238 * Mark device as removed from system and therefore no longer available.
2239 */ 2239 */
2240 void netif_device_detach(struct net_device *dev) 2240 void netif_device_detach(struct net_device *dev)
2241 { 2241 {
2242 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && 2242 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
2243 netif_running(dev)) { 2243 netif_running(dev)) {
2244 netif_tx_stop_all_queues(dev); 2244 netif_tx_stop_all_queues(dev);
2245 } 2245 }
2246 } 2246 }
2247 EXPORT_SYMBOL(netif_device_detach); 2247 EXPORT_SYMBOL(netif_device_detach);
2248 2248
2249 /** 2249 /**
2250 * netif_device_attach - mark device as attached 2250 * netif_device_attach - mark device as attached
2251 * @dev: network device 2251 * @dev: network device
2252 * 2252 *
2253 * Mark device as attached from system and restart if needed. 2253 * Mark device as attached from system and restart if needed.
2254 */ 2254 */
2255 void netif_device_attach(struct net_device *dev) 2255 void netif_device_attach(struct net_device *dev)
2256 { 2256 {
2257 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && 2257 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
2258 netif_running(dev)) { 2258 netif_running(dev)) {
2259 netif_tx_wake_all_queues(dev); 2259 netif_tx_wake_all_queues(dev);
2260 __netdev_watchdog_up(dev); 2260 __netdev_watchdog_up(dev);
2261 } 2261 }
2262 } 2262 }
2263 EXPORT_SYMBOL(netif_device_attach); 2263 EXPORT_SYMBOL(netif_device_attach);
2264 2264
2265 static void skb_warn_bad_offload(const struct sk_buff *skb) 2265 static void skb_warn_bad_offload(const struct sk_buff *skb)
2266 { 2266 {
2267 static const netdev_features_t null_features = 0; 2267 static const netdev_features_t null_features = 0;
2268 struct net_device *dev = skb->dev; 2268 struct net_device *dev = skb->dev;
2269 const char *driver = ""; 2269 const char *driver = "";
2270 2270
2271 if (dev && dev->dev.parent) 2271 if (dev && dev->dev.parent)
2272 driver = dev_driver_string(dev->dev.parent); 2272 driver = dev_driver_string(dev->dev.parent);
2273 2273
2274 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d " 2274 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
2275 "gso_type=%d ip_summed=%d\n", 2275 "gso_type=%d ip_summed=%d\n",
2276 driver, dev ? &dev->features : &null_features, 2276 driver, dev ? &dev->features : &null_features,
2277 skb->sk ? &skb->sk->sk_route_caps : &null_features, 2277 skb->sk ? &skb->sk->sk_route_caps : &null_features,
2278 skb->len, skb->data_len, skb_shinfo(skb)->gso_size, 2278 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
2279 skb_shinfo(skb)->gso_type, skb->ip_summed); 2279 skb_shinfo(skb)->gso_type, skb->ip_summed);
2280 } 2280 }
2281 2281
2282 /* 2282 /*
2283 * Invalidate hardware checksum when packet is to be mangled, and 2283 * Invalidate hardware checksum when packet is to be mangled, and
2284 * complete checksum manually on outgoing path. 2284 * complete checksum manually on outgoing path.
2285 */ 2285 */
2286 int skb_checksum_help(struct sk_buff *skb) 2286 int skb_checksum_help(struct sk_buff *skb)
2287 { 2287 {
2288 __wsum csum; 2288 __wsum csum;
2289 int ret = 0, offset; 2289 int ret = 0, offset;
2290 2290
2291 if (skb->ip_summed == CHECKSUM_COMPLETE) 2291 if (skb->ip_summed == CHECKSUM_COMPLETE)
2292 goto out_set_summed; 2292 goto out_set_summed;
2293 2293
2294 if (unlikely(skb_shinfo(skb)->gso_size)) { 2294 if (unlikely(skb_shinfo(skb)->gso_size)) {
2295 skb_warn_bad_offload(skb); 2295 skb_warn_bad_offload(skb);
2296 return -EINVAL; 2296 return -EINVAL;
2297 } 2297 }
2298 2298
2299 /* Before computing a checksum, we should make sure no frag could 2299 /* Before computing a checksum, we should make sure no frag could
2300 * be modified by an external entity : checksum could be wrong. 2300 * be modified by an external entity : checksum could be wrong.
2301 */ 2301 */
2302 if (skb_has_shared_frag(skb)) { 2302 if (skb_has_shared_frag(skb)) {
2303 ret = __skb_linearize(skb); 2303 ret = __skb_linearize(skb);
2304 if (ret) 2304 if (ret)
2305 goto out; 2305 goto out;
2306 } 2306 }
2307 2307
2308 offset = skb_checksum_start_offset(skb); 2308 offset = skb_checksum_start_offset(skb);
2309 BUG_ON(offset >= skb_headlen(skb)); 2309 BUG_ON(offset >= skb_headlen(skb));
2310 csum = skb_checksum(skb, offset, skb->len - offset, 0); 2310 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2311 2311
2312 offset += skb->csum_offset; 2312 offset += skb->csum_offset;
2313 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); 2313 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
2314 2314
2315 if (skb_cloned(skb) && 2315 if (skb_cloned(skb) &&
2316 !skb_clone_writable(skb, offset + sizeof(__sum16))) { 2316 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
2317 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 2317 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2318 if (ret) 2318 if (ret)
2319 goto out; 2319 goto out;
2320 } 2320 }
2321 2321
2322 *(__sum16 *)(skb->data + offset) = csum_fold(csum); 2322 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
2323 out_set_summed: 2323 out_set_summed:
2324 skb->ip_summed = CHECKSUM_NONE; 2324 skb->ip_summed = CHECKSUM_NONE;
2325 out: 2325 out:
2326 return ret; 2326 return ret;
2327 } 2327 }
2328 EXPORT_SYMBOL(skb_checksum_help); 2328 EXPORT_SYMBOL(skb_checksum_help);
2329 2329
2330 /* openvswitch calls this on rx path, so we need a different check.
2331 */
2332 static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
2333 {
2334 if (tx_path)
2335 return skb->ip_summed != CHECKSUM_PARTIAL;
2336 else
2337 return skb->ip_summed == CHECKSUM_NONE;
2338 }
2339
2330 /** 2340 /**
2331 * skb_gso_segment - Perform segmentation on skb. 2341 * __skb_gso_segment - Perform segmentation on skb.
2332 * @skb: buffer to segment 2342 * @skb: buffer to segment
2333 * @features: features for the output path (see dev->features) 2343 * @features: features for the output path (see dev->features)
2344 * @tx_path: whether it is called in TX path
2334 * 2345 *
2335 * This function segments the given skb and returns a list of segments. 2346 * This function segments the given skb and returns a list of segments.
2336 * 2347 *
2337 * It may return NULL if the skb requires no segmentation. This is 2348 * It may return NULL if the skb requires no segmentation. This is
2338 * only possible when GSO is used for verifying header integrity. 2349 * only possible when GSO is used for verifying header integrity.
2339 */ 2350 */
2340 struct sk_buff *skb_gso_segment(struct sk_buff *skb, 2351 struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2341 netdev_features_t features) 2352 netdev_features_t features, bool tx_path)
2342 { 2353 {
2343 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 2354 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2344 struct packet_offload *ptype; 2355 struct packet_offload *ptype;
2345 __be16 type = skb->protocol; 2356 __be16 type = skb->protocol;
2346 int vlan_depth = ETH_HLEN; 2357 int vlan_depth = ETH_HLEN;
2347 int err; 2358 int err;
2348 2359
2349 while (type == htons(ETH_P_8021Q)) { 2360 while (type == htons(ETH_P_8021Q)) {
2350 struct vlan_hdr *vh; 2361 struct vlan_hdr *vh;
2351 2362
2352 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) 2363 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
2353 return ERR_PTR(-EINVAL); 2364 return ERR_PTR(-EINVAL);
2354 2365
2355 vh = (struct vlan_hdr *)(skb->data + vlan_depth); 2366 vh = (struct vlan_hdr *)(skb->data + vlan_depth);
2356 type = vh->h_vlan_encapsulated_proto; 2367 type = vh->h_vlan_encapsulated_proto;
2357 vlan_depth += VLAN_HLEN; 2368 vlan_depth += VLAN_HLEN;
2358 } 2369 }
2359 2370
2360 skb_reset_mac_header(skb); 2371 skb_reset_mac_header(skb);
2361 skb->mac_len = skb->network_header - skb->mac_header; 2372 skb->mac_len = skb->network_header - skb->mac_header;
2362 __skb_pull(skb, skb->mac_len); 2373 __skb_pull(skb, skb->mac_len);
2363 2374
2364 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 2375 if (unlikely(skb_needs_check(skb, tx_path))) {
2365 skb_warn_bad_offload(skb); 2376 skb_warn_bad_offload(skb);
2366 2377
2367 if (skb_header_cloned(skb) && 2378 if (skb_header_cloned(skb) &&
2368 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 2379 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
2369 return ERR_PTR(err); 2380 return ERR_PTR(err);
2370 } 2381 }
2371 2382
2372 rcu_read_lock(); 2383 rcu_read_lock();
2373 list_for_each_entry_rcu(ptype, &offload_base, list) { 2384 list_for_each_entry_rcu(ptype, &offload_base, list) {
2374 if (ptype->type == type && ptype->callbacks.gso_segment) { 2385 if (ptype->type == type && ptype->callbacks.gso_segment) {
2375 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 2386 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2376 err = ptype->callbacks.gso_send_check(skb); 2387 err = ptype->callbacks.gso_send_check(skb);
2377 segs = ERR_PTR(err); 2388 segs = ERR_PTR(err);
2378 if (err || skb_gso_ok(skb, features)) 2389 if (err || skb_gso_ok(skb, features))
2379 break; 2390 break;
2380 __skb_push(skb, (skb->data - 2391 __skb_push(skb, (skb->data -
2381 skb_network_header(skb))); 2392 skb_network_header(skb)));
2382 } 2393 }
2383 segs = ptype->callbacks.gso_segment(skb, features); 2394 segs = ptype->callbacks.gso_segment(skb, features);
2384 break; 2395 break;
2385 } 2396 }
2386 } 2397 }
2387 rcu_read_unlock(); 2398 rcu_read_unlock();
2388 2399
2389 __skb_push(skb, skb->data - skb_mac_header(skb)); 2400 __skb_push(skb, skb->data - skb_mac_header(skb));
2390 2401
2391 return segs; 2402 return segs;
2392 } 2403 }
2393 EXPORT_SYMBOL(skb_gso_segment); 2404 EXPORT_SYMBOL(__skb_gso_segment);
2394 2405
2395 /* Take action when hardware reception checksum errors are detected. */ 2406 /* Take action when hardware reception checksum errors are detected. */
2396 #ifdef CONFIG_BUG 2407 #ifdef CONFIG_BUG
2397 void netdev_rx_csum_fault(struct net_device *dev) 2408 void netdev_rx_csum_fault(struct net_device *dev)
2398 { 2409 {
2399 if (net_ratelimit()) { 2410 if (net_ratelimit()) {
2400 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>"); 2411 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
2401 dump_stack(); 2412 dump_stack();
2402 } 2413 }
2403 } 2414 }
2404 EXPORT_SYMBOL(netdev_rx_csum_fault); 2415 EXPORT_SYMBOL(netdev_rx_csum_fault);
2405 #endif 2416 #endif
2406 2417
2407 /* Actually, we should eliminate this check as soon as we know, that: 2418 /* Actually, we should eliminate this check as soon as we know, that:
2408 * 1. IOMMU is present and allows to map all the memory. 2419 * 1. IOMMU is present and allows to map all the memory.
2409 * 2. No high memory really exists on this machine. 2420 * 2. No high memory really exists on this machine.
2410 */ 2421 */
2411 2422
2412 static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) 2423 static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2413 { 2424 {
2414 #ifdef CONFIG_HIGHMEM 2425 #ifdef CONFIG_HIGHMEM
2415 int i; 2426 int i;
2416 if (!(dev->features & NETIF_F_HIGHDMA)) { 2427 if (!(dev->features & NETIF_F_HIGHDMA)) {
2417 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 2428 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2418 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 2429 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2419 if (PageHighMem(skb_frag_page(frag))) 2430 if (PageHighMem(skb_frag_page(frag)))
2420 return 1; 2431 return 1;
2421 } 2432 }
2422 } 2433 }
2423 2434
2424 if (PCI_DMA_BUS_IS_PHYS) { 2435 if (PCI_DMA_BUS_IS_PHYS) {
2425 struct device *pdev = dev->dev.parent; 2436 struct device *pdev = dev->dev.parent;
2426 2437
2427 if (!pdev) 2438 if (!pdev)
2428 return 0; 2439 return 0;
2429 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 2440 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2430 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 2441 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2431 dma_addr_t addr = page_to_phys(skb_frag_page(frag)); 2442 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
2432 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) 2443 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2433 return 1; 2444 return 1;
2434 } 2445 }
2435 } 2446 }
2436 #endif 2447 #endif
2437 return 0; 2448 return 0;
2438 } 2449 }
2439 2450
2440 struct dev_gso_cb { 2451 struct dev_gso_cb {
2441 void (*destructor)(struct sk_buff *skb); 2452 void (*destructor)(struct sk_buff *skb);
2442 }; 2453 };
2443 2454
2444 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) 2455 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
2445 2456
2446 static void dev_gso_skb_destructor(struct sk_buff *skb) 2457 static void dev_gso_skb_destructor(struct sk_buff *skb)
2447 { 2458 {
2448 struct dev_gso_cb *cb; 2459 struct dev_gso_cb *cb;
2449 2460
2450 do { 2461 do {
2451 struct sk_buff *nskb = skb->next; 2462 struct sk_buff *nskb = skb->next;
2452 2463
2453 skb->next = nskb->next; 2464 skb->next = nskb->next;
2454 nskb->next = NULL; 2465 nskb->next = NULL;
2455 kfree_skb(nskb); 2466 kfree_skb(nskb);
2456 } while (skb->next); 2467 } while (skb->next);
2457 2468
2458 cb = DEV_GSO_CB(skb); 2469 cb = DEV_GSO_CB(skb);
2459 if (cb->destructor) 2470 if (cb->destructor)
2460 cb->destructor(skb); 2471 cb->destructor(skb);
2461 } 2472 }
2462 2473
2463 /** 2474 /**
2464 * dev_gso_segment - Perform emulated hardware segmentation on skb. 2475 * dev_gso_segment - Perform emulated hardware segmentation on skb.
2465 * @skb: buffer to segment 2476 * @skb: buffer to segment
2466 * @features: device features as applicable to this skb 2477 * @features: device features as applicable to this skb
2467 * 2478 *
2468 * This function segments the given skb and stores the list of segments 2479 * This function segments the given skb and stores the list of segments
2469 * in skb->next. 2480 * in skb->next.
2470 */ 2481 */
2471 static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) 2482 static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2472 { 2483 {
2473 struct sk_buff *segs; 2484 struct sk_buff *segs;
2474 2485
2475 segs = skb_gso_segment(skb, features); 2486 segs = skb_gso_segment(skb, features);
2476 2487
2477 /* Verifying header integrity only. */ 2488 /* Verifying header integrity only. */
2478 if (!segs) 2489 if (!segs)
2479 return 0; 2490 return 0;
2480 2491
2481 if (IS_ERR(segs)) 2492 if (IS_ERR(segs))
2482 return PTR_ERR(segs); 2493 return PTR_ERR(segs);
2483 2494
2484 skb->next = segs; 2495 skb->next = segs;
2485 DEV_GSO_CB(skb)->destructor = skb->destructor; 2496 DEV_GSO_CB(skb)->destructor = skb->destructor;
2486 skb->destructor = dev_gso_skb_destructor; 2497 skb->destructor = dev_gso_skb_destructor;
2487 2498
2488 return 0; 2499 return 0;
2489 } 2500 }
2490 2501
2491 static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) 2502 static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
2492 { 2503 {
2493 return ((features & NETIF_F_GEN_CSUM) || 2504 return ((features & NETIF_F_GEN_CSUM) ||
2494 ((features & NETIF_F_V4_CSUM) && 2505 ((features & NETIF_F_V4_CSUM) &&
2495 protocol == htons(ETH_P_IP)) || 2506 protocol == htons(ETH_P_IP)) ||
2496 ((features & NETIF_F_V6_CSUM) && 2507 ((features & NETIF_F_V6_CSUM) &&
2497 protocol == htons(ETH_P_IPV6)) || 2508 protocol == htons(ETH_P_IPV6)) ||
2498 ((features & NETIF_F_FCOE_CRC) && 2509 ((features & NETIF_F_FCOE_CRC) &&
2499 protocol == htons(ETH_P_FCOE))); 2510 protocol == htons(ETH_P_FCOE)));
2500 } 2511 }
2501 2512
2502 static netdev_features_t harmonize_features(struct sk_buff *skb, 2513 static netdev_features_t harmonize_features(struct sk_buff *skb,
2503 __be16 protocol, netdev_features_t features) 2514 __be16 protocol, netdev_features_t features)
2504 { 2515 {
2505 if (skb->ip_summed != CHECKSUM_NONE && 2516 if (skb->ip_summed != CHECKSUM_NONE &&
2506 !can_checksum_protocol(features, protocol)) { 2517 !can_checksum_protocol(features, protocol)) {
2507 features &= ~NETIF_F_ALL_CSUM; 2518 features &= ~NETIF_F_ALL_CSUM;
2508 features &= ~NETIF_F_SG; 2519 features &= ~NETIF_F_SG;
2509 } else if (illegal_highdma(skb->dev, skb)) { 2520 } else if (illegal_highdma(skb->dev, skb)) {
2510 features &= ~NETIF_F_SG; 2521 features &= ~NETIF_F_SG;
2511 } 2522 }
2512 2523
2513 return features; 2524 return features;
2514 } 2525 }
2515 2526
2516 netdev_features_t netif_skb_features(struct sk_buff *skb) 2527 netdev_features_t netif_skb_features(struct sk_buff *skb)
2517 { 2528 {
2518 __be16 protocol = skb->protocol; 2529 __be16 protocol = skb->protocol;
2519 netdev_features_t features = skb->dev->features; 2530 netdev_features_t features = skb->dev->features;
2520 2531
2521 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) 2532 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
2522 features &= ~NETIF_F_GSO_MASK; 2533 features &= ~NETIF_F_GSO_MASK;
2523 2534
2524 if (protocol == htons(ETH_P_8021Q)) { 2535 if (protocol == htons(ETH_P_8021Q)) {
2525 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 2536 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2526 protocol = veh->h_vlan_encapsulated_proto; 2537 protocol = veh->h_vlan_encapsulated_proto;
2527 } else if (!vlan_tx_tag_present(skb)) { 2538 } else if (!vlan_tx_tag_present(skb)) {
2528 return harmonize_features(skb, protocol, features); 2539 return harmonize_features(skb, protocol, features);
2529 } 2540 }
2530 2541
2531 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX); 2542 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
2532 2543
2533 if (protocol != htons(ETH_P_8021Q)) { 2544 if (protocol != htons(ETH_P_8021Q)) {
2534 return harmonize_features(skb, protocol, features); 2545 return harmonize_features(skb, protocol, features);
2535 } else { 2546 } else {
2536 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | 2547 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
2537 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; 2548 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
2538 return harmonize_features(skb, protocol, features); 2549 return harmonize_features(skb, protocol, features);
2539 } 2550 }
2540 } 2551 }
2541 EXPORT_SYMBOL(netif_skb_features); 2552 EXPORT_SYMBOL(netif_skb_features);
2542 2553
2543 /* 2554 /*
2544 * Returns true if either: 2555 * Returns true if either:
2545 * 1. skb has frag_list and the device doesn't support FRAGLIST, or 2556 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2546 * 2. skb is fragmented and the device does not support SG. 2557 * 2. skb is fragmented and the device does not support SG.
2547 */ 2558 */
2548 static inline int skb_needs_linearize(struct sk_buff *skb, 2559 static inline int skb_needs_linearize(struct sk_buff *skb,
2549 int features) 2560 int features)
2550 { 2561 {
2551 return skb_is_nonlinear(skb) && 2562 return skb_is_nonlinear(skb) &&
2552 ((skb_has_frag_list(skb) && 2563 ((skb_has_frag_list(skb) &&
2553 !(features & NETIF_F_FRAGLIST)) || 2564 !(features & NETIF_F_FRAGLIST)) ||
2554 (skb_shinfo(skb)->nr_frags && 2565 (skb_shinfo(skb)->nr_frags &&
2555 !(features & NETIF_F_SG))); 2566 !(features & NETIF_F_SG)));
2556 } 2567 }
2557 2568
2558 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 2569 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2559 struct netdev_queue *txq) 2570 struct netdev_queue *txq)
2560 { 2571 {
2561 const struct net_device_ops *ops = dev->netdev_ops; 2572 const struct net_device_ops *ops = dev->netdev_ops;
2562 int rc = NETDEV_TX_OK; 2573 int rc = NETDEV_TX_OK;
2563 unsigned int skb_len; 2574 unsigned int skb_len;
2564 2575
2565 if (likely(!skb->next)) { 2576 if (likely(!skb->next)) {
2566 netdev_features_t features; 2577 netdev_features_t features;
2567 2578
2568 /* 2579 /*
2569 * If device doesn't need skb->dst, release it right now while 2580 * If device doesn't need skb->dst, release it right now while
2570 * its hot in this cpu cache 2581 * its hot in this cpu cache
2571 */ 2582 */
2572 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 2583 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2573 skb_dst_drop(skb); 2584 skb_dst_drop(skb);
2574 2585
2575 features = netif_skb_features(skb); 2586 features = netif_skb_features(skb);
2576 2587
2577 if (vlan_tx_tag_present(skb) && 2588 if (vlan_tx_tag_present(skb) &&
2578 !(features & NETIF_F_HW_VLAN_TX)) { 2589 !(features & NETIF_F_HW_VLAN_TX)) {
2579 skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); 2590 skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
2580 if (unlikely(!skb)) 2591 if (unlikely(!skb))
2581 goto out; 2592 goto out;
2582 2593
2583 skb->vlan_tci = 0; 2594 skb->vlan_tci = 0;
2584 } 2595 }
2585 2596
2586 /* If encapsulation offload request, verify we are testing 2597 /* If encapsulation offload request, verify we are testing
2587 * hardware encapsulation features instead of standard 2598 * hardware encapsulation features instead of standard
2588 * features for the netdev 2599 * features for the netdev
2589 */ 2600 */
2590 if (skb->encapsulation) 2601 if (skb->encapsulation)
2591 features &= dev->hw_enc_features; 2602 features &= dev->hw_enc_features;
2592 2603
2593 if (netif_needs_gso(skb, features)) { 2604 if (netif_needs_gso(skb, features)) {
2594 if (unlikely(dev_gso_segment(skb, features))) 2605 if (unlikely(dev_gso_segment(skb, features)))
2595 goto out_kfree_skb; 2606 goto out_kfree_skb;
2596 if (skb->next) 2607 if (skb->next)
2597 goto gso; 2608 goto gso;
2598 } else { 2609 } else {
2599 if (skb_needs_linearize(skb, features) && 2610 if (skb_needs_linearize(skb, features) &&
2600 __skb_linearize(skb)) 2611 __skb_linearize(skb))
2601 goto out_kfree_skb; 2612 goto out_kfree_skb;
2602 2613
2603 /* If packet is not checksummed and device does not 2614 /* If packet is not checksummed and device does not
2604 * support checksumming for this protocol, complete 2615 * support checksumming for this protocol, complete
2605 * checksumming here. 2616 * checksumming here.
2606 */ 2617 */
2607 if (skb->ip_summed == CHECKSUM_PARTIAL) { 2618 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2608 if (skb->encapsulation) 2619 if (skb->encapsulation)
2609 skb_set_inner_transport_header(skb, 2620 skb_set_inner_transport_header(skb,
2610 skb_checksum_start_offset(skb)); 2621 skb_checksum_start_offset(skb));
2611 else 2622 else
2612 skb_set_transport_header(skb, 2623 skb_set_transport_header(skb,
2613 skb_checksum_start_offset(skb)); 2624 skb_checksum_start_offset(skb));
2614 if (!(features & NETIF_F_ALL_CSUM) && 2625 if (!(features & NETIF_F_ALL_CSUM) &&
2615 skb_checksum_help(skb)) 2626 skb_checksum_help(skb))
2616 goto out_kfree_skb; 2627 goto out_kfree_skb;
2617 } 2628 }
2618 } 2629 }
2619 2630
2620 if (!list_empty(&ptype_all)) 2631 if (!list_empty(&ptype_all))
2621 dev_queue_xmit_nit(skb, dev); 2632 dev_queue_xmit_nit(skb, dev);
2622 2633
2623 skb_len = skb->len; 2634 skb_len = skb->len;
2624 rc = ops->ndo_start_xmit(skb, dev); 2635 rc = ops->ndo_start_xmit(skb, dev);
2625 trace_net_dev_xmit(skb, rc, dev, skb_len); 2636 trace_net_dev_xmit(skb, rc, dev, skb_len);
2626 if (rc == NETDEV_TX_OK) 2637 if (rc == NETDEV_TX_OK)
2627 txq_trans_update(txq); 2638 txq_trans_update(txq);
2628 return rc; 2639 return rc;
2629 } 2640 }
2630 2641
2631 gso: 2642 gso:
2632 do { 2643 do {
2633 struct sk_buff *nskb = skb->next; 2644 struct sk_buff *nskb = skb->next;
2634 2645
2635 skb->next = nskb->next; 2646 skb->next = nskb->next;
2636 nskb->next = NULL; 2647 nskb->next = NULL;
2637 2648
2638 /* 2649 /*
2639 * If device doesn't need nskb->dst, release it right now while 2650 * If device doesn't need nskb->dst, release it right now while
2640 * its hot in this cpu cache 2651 * its hot in this cpu cache
2641 */ 2652 */
2642 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 2653 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2643 skb_dst_drop(nskb); 2654 skb_dst_drop(nskb);
2644 2655
2645 if (!list_empty(&ptype_all)) 2656 if (!list_empty(&ptype_all))
2646 dev_queue_xmit_nit(nskb, dev); 2657 dev_queue_xmit_nit(nskb, dev);
2647 2658
2648 skb_len = nskb->len; 2659 skb_len = nskb->len;
2649 rc = ops->ndo_start_xmit(nskb, dev); 2660 rc = ops->ndo_start_xmit(nskb, dev);
2650 trace_net_dev_xmit(nskb, rc, dev, skb_len); 2661 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2651 if (unlikely(rc != NETDEV_TX_OK)) { 2662 if (unlikely(rc != NETDEV_TX_OK)) {
2652 if (rc & ~NETDEV_TX_MASK) 2663 if (rc & ~NETDEV_TX_MASK)
2653 goto out_kfree_gso_skb; 2664 goto out_kfree_gso_skb;
2654 nskb->next = skb->next; 2665 nskb->next = skb->next;
2655 skb->next = nskb; 2666 skb->next = nskb;
2656 return rc; 2667 return rc;
2657 } 2668 }
2658 txq_trans_update(txq); 2669 txq_trans_update(txq);
2659 if (unlikely(netif_xmit_stopped(txq) && skb->next)) 2670 if (unlikely(netif_xmit_stopped(txq) && skb->next))
2660 return NETDEV_TX_BUSY; 2671 return NETDEV_TX_BUSY;
2661 } while (skb->next); 2672 } while (skb->next);
2662 2673
2663 out_kfree_gso_skb: 2674 out_kfree_gso_skb:
2664 if (likely(skb->next == NULL)) 2675 if (likely(skb->next == NULL))
2665 skb->destructor = DEV_GSO_CB(skb)->destructor; 2676 skb->destructor = DEV_GSO_CB(skb)->destructor;
2666 out_kfree_skb: 2677 out_kfree_skb:
2667 kfree_skb(skb); 2678 kfree_skb(skb);
2668 out: 2679 out:
2669 return rc; 2680 return rc;
2670 } 2681 }
2671 2682
2672 static void qdisc_pkt_len_init(struct sk_buff *skb) 2683 static void qdisc_pkt_len_init(struct sk_buff *skb)
2673 { 2684 {
2674 const struct skb_shared_info *shinfo = skb_shinfo(skb); 2685 const struct skb_shared_info *shinfo = skb_shinfo(skb);
2675 2686
2676 qdisc_skb_cb(skb)->pkt_len = skb->len; 2687 qdisc_skb_cb(skb)->pkt_len = skb->len;
2677 2688
2678 /* To get more precise estimation of bytes sent on wire, 2689 /* To get more precise estimation of bytes sent on wire,
2679 * we add to pkt_len the headers size of all segments 2690 * we add to pkt_len the headers size of all segments
2680 */ 2691 */
2681 if (shinfo->gso_size) { 2692 if (shinfo->gso_size) {
2682 unsigned int hdr_len; 2693 unsigned int hdr_len;
2683 2694
2684 /* mac layer + network layer */ 2695 /* mac layer + network layer */
2685 hdr_len = skb_transport_header(skb) - skb_mac_header(skb); 2696 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
2686 2697
2687 /* + transport layer */ 2698 /* + transport layer */
2688 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) 2699 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
2689 hdr_len += tcp_hdrlen(skb); 2700 hdr_len += tcp_hdrlen(skb);
2690 else 2701 else
2691 hdr_len += sizeof(struct udphdr); 2702 hdr_len += sizeof(struct udphdr);
2692 qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; 2703 qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len;
2693 } 2704 }
2694 } 2705 }
2695 2706
2696 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, 2707 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2697 struct net_device *dev, 2708 struct net_device *dev,
2698 struct netdev_queue *txq) 2709 struct netdev_queue *txq)
2699 { 2710 {
2700 spinlock_t *root_lock = qdisc_lock(q); 2711 spinlock_t *root_lock = qdisc_lock(q);
2701 bool contended; 2712 bool contended;
2702 int rc; 2713 int rc;
2703 2714
2704 qdisc_pkt_len_init(skb); 2715 qdisc_pkt_len_init(skb);
2705 qdisc_calculate_pkt_len(skb, q); 2716 qdisc_calculate_pkt_len(skb, q);
2706 /* 2717 /*
2707 * Heuristic to force contended enqueues to serialize on a 2718 * Heuristic to force contended enqueues to serialize on a
2708 * separate lock before trying to get qdisc main lock. 2719 * separate lock before trying to get qdisc main lock.
2709 * This permits __QDISC_STATE_RUNNING owner to get the lock more often 2720 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2710 * and dequeue packets faster. 2721 * and dequeue packets faster.
2711 */ 2722 */
2712 contended = qdisc_is_running(q); 2723 contended = qdisc_is_running(q);
2713 if (unlikely(contended)) 2724 if (unlikely(contended))
2714 spin_lock(&q->busylock); 2725 spin_lock(&q->busylock);
2715 2726
2716 spin_lock(root_lock); 2727 spin_lock(root_lock);
2717 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { 2728 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2718 kfree_skb(skb); 2729 kfree_skb(skb);
2719 rc = NET_XMIT_DROP; 2730 rc = NET_XMIT_DROP;
2720 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && 2731 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2721 qdisc_run_begin(q)) { 2732 qdisc_run_begin(q)) {
2722 /* 2733 /*
2723 * This is a work-conserving queue; there are no old skbs 2734 * This is a work-conserving queue; there are no old skbs
2724 * waiting to be sent out; and the qdisc is not running - 2735 * waiting to be sent out; and the qdisc is not running -
2725 * xmit the skb directly. 2736 * xmit the skb directly.
2726 */ 2737 */
2727 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) 2738 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2728 skb_dst_force(skb); 2739 skb_dst_force(skb);
2729 2740
2730 qdisc_bstats_update(q, skb); 2741 qdisc_bstats_update(q, skb);
2731 2742
2732 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { 2743 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2733 if (unlikely(contended)) { 2744 if (unlikely(contended)) {
2734 spin_unlock(&q->busylock); 2745 spin_unlock(&q->busylock);
2735 contended = false; 2746 contended = false;
2736 } 2747 }
2737 __qdisc_run(q); 2748 __qdisc_run(q);
2738 } else 2749 } else
2739 qdisc_run_end(q); 2750 qdisc_run_end(q);
2740 2751
2741 rc = NET_XMIT_SUCCESS; 2752 rc = NET_XMIT_SUCCESS;
2742 } else { 2753 } else {
2743 skb_dst_force(skb); 2754 skb_dst_force(skb);
2744 rc = q->enqueue(skb, q) & NET_XMIT_MASK; 2755 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2745 if (qdisc_run_begin(q)) { 2756 if (qdisc_run_begin(q)) {
2746 if (unlikely(contended)) { 2757 if (unlikely(contended)) {
2747 spin_unlock(&q->busylock); 2758 spin_unlock(&q->busylock);
2748 contended = false; 2759 contended = false;
2749 } 2760 }
2750 __qdisc_run(q); 2761 __qdisc_run(q);
2751 } 2762 }
2752 } 2763 }
2753 spin_unlock(root_lock); 2764 spin_unlock(root_lock);
2754 if (unlikely(contended)) 2765 if (unlikely(contended))
2755 spin_unlock(&q->busylock); 2766 spin_unlock(&q->busylock);
2756 return rc; 2767 return rc;
2757 } 2768 }
2758 2769
2759 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) 2770 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
2760 static void skb_update_prio(struct sk_buff *skb) 2771 static void skb_update_prio(struct sk_buff *skb)
2761 { 2772 {
2762 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); 2773 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
2763 2774
2764 if (!skb->priority && skb->sk && map) { 2775 if (!skb->priority && skb->sk && map) {
2765 unsigned int prioidx = skb->sk->sk_cgrp_prioidx; 2776 unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
2766 2777
2767 if (prioidx < map->priomap_len) 2778 if (prioidx < map->priomap_len)
2768 skb->priority = map->priomap[prioidx]; 2779 skb->priority = map->priomap[prioidx];
2769 } 2780 }
2770 } 2781 }
2771 #else 2782 #else
2772 #define skb_update_prio(skb) 2783 #define skb_update_prio(skb)
2773 #endif 2784 #endif
2774 2785
2775 static DEFINE_PER_CPU(int, xmit_recursion); 2786 static DEFINE_PER_CPU(int, xmit_recursion);
2776 #define RECURSION_LIMIT 10 2787 #define RECURSION_LIMIT 10
2777 2788
2778 /** 2789 /**
2779 * dev_loopback_xmit - loop back @skb 2790 * dev_loopback_xmit - loop back @skb
2780 * @skb: buffer to transmit 2791 * @skb: buffer to transmit
2781 */ 2792 */
2782 int dev_loopback_xmit(struct sk_buff *skb) 2793 int dev_loopback_xmit(struct sk_buff *skb)
2783 { 2794 {
2784 skb_reset_mac_header(skb); 2795 skb_reset_mac_header(skb);
2785 __skb_pull(skb, skb_network_offset(skb)); 2796 __skb_pull(skb, skb_network_offset(skb));
2786 skb->pkt_type = PACKET_LOOPBACK; 2797 skb->pkt_type = PACKET_LOOPBACK;
2787 skb->ip_summed = CHECKSUM_UNNECESSARY; 2798 skb->ip_summed = CHECKSUM_UNNECESSARY;
2788 WARN_ON(!skb_dst(skb)); 2799 WARN_ON(!skb_dst(skb));
2789 skb_dst_force(skb); 2800 skb_dst_force(skb);
2790 netif_rx_ni(skb); 2801 netif_rx_ni(skb);
2791 return 0; 2802 return 0;
2792 } 2803 }
2793 EXPORT_SYMBOL(dev_loopback_xmit); 2804 EXPORT_SYMBOL(dev_loopback_xmit);
2794 2805
2795 /** 2806 /**
2796 * dev_queue_xmit - transmit a buffer 2807 * dev_queue_xmit - transmit a buffer
2797 * @skb: buffer to transmit 2808 * @skb: buffer to transmit
2798 * 2809 *
2799 * Queue a buffer for transmission to a network device. The caller must 2810 * Queue a buffer for transmission to a network device. The caller must
2800 * have set the device and priority and built the buffer before calling 2811 * have set the device and priority and built the buffer before calling
2801 * this function. The function can be called from an interrupt. 2812 * this function. The function can be called from an interrupt.
2802 * 2813 *
2803 * A negative errno code is returned on a failure. A success does not 2814 * A negative errno code is returned on a failure. A success does not
2804 * guarantee the frame will be transmitted as it may be dropped due 2815 * guarantee the frame will be transmitted as it may be dropped due
2805 * to congestion or traffic shaping. 2816 * to congestion or traffic shaping.
2806 * 2817 *
2807 * ----------------------------------------------------------------------------------- 2818 * -----------------------------------------------------------------------------------
2808 * I notice this method can also return errors from the queue disciplines, 2819 * I notice this method can also return errors from the queue disciplines,
2809 * including NET_XMIT_DROP, which is a positive value. So, errors can also 2820 * including NET_XMIT_DROP, which is a positive value. So, errors can also
2810 * be positive. 2821 * be positive.
2811 * 2822 *
2812 * Regardless of the return value, the skb is consumed, so it is currently 2823 * Regardless of the return value, the skb is consumed, so it is currently
2813 * difficult to retry a send to this method. (You can bump the ref count 2824 * difficult to retry a send to this method. (You can bump the ref count
2814 * before sending to hold a reference for retry if you are careful.) 2825 * before sending to hold a reference for retry if you are careful.)
2815 * 2826 *
2816 * When calling this method, interrupts MUST be enabled. This is because 2827 * When calling this method, interrupts MUST be enabled. This is because
2817 * the BH enable code must have IRQs enabled so that it will not deadlock. 2828 * the BH enable code must have IRQs enabled so that it will not deadlock.
2818 * --BLG 2829 * --BLG
2819 */ 2830 */
2820 int dev_queue_xmit(struct sk_buff *skb) 2831 int dev_queue_xmit(struct sk_buff *skb)
2821 { 2832 {
2822 struct net_device *dev = skb->dev; 2833 struct net_device *dev = skb->dev;
2823 struct netdev_queue *txq; 2834 struct netdev_queue *txq;
2824 struct Qdisc *q; 2835 struct Qdisc *q;
2825 int rc = -ENOMEM; 2836 int rc = -ENOMEM;
2826 2837
2827 /* Disable soft irqs for various locks below. Also 2838 /* Disable soft irqs for various locks below. Also
2828 * stops preemption for RCU. 2839 * stops preemption for RCU.
2829 */ 2840 */
2830 rcu_read_lock_bh(); 2841 rcu_read_lock_bh();
2831 2842
2832 skb_update_prio(skb); 2843 skb_update_prio(skb);
2833 2844
2834 txq = netdev_pick_tx(dev, skb); 2845 txq = netdev_pick_tx(dev, skb);
2835 q = rcu_dereference_bh(txq->qdisc); 2846 q = rcu_dereference_bh(txq->qdisc);
2836 2847
2837 #ifdef CONFIG_NET_CLS_ACT 2848 #ifdef CONFIG_NET_CLS_ACT
2838 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); 2849 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2839 #endif 2850 #endif
2840 trace_net_dev_queue(skb); 2851 trace_net_dev_queue(skb);
2841 if (q->enqueue) { 2852 if (q->enqueue) {
2842 rc = __dev_xmit_skb(skb, q, dev, txq); 2853 rc = __dev_xmit_skb(skb, q, dev, txq);
2843 goto out; 2854 goto out;
2844 } 2855 }
2845 2856
2846 /* The device has no queue. Common case for software devices: 2857 /* The device has no queue. Common case for software devices:
2847 loopback, all the sorts of tunnels... 2858 loopback, all the sorts of tunnels...
2848 2859
2849 Really, it is unlikely that netif_tx_lock protection is necessary 2860 Really, it is unlikely that netif_tx_lock protection is necessary
2850 here. (f.e. loopback and IP tunnels are clean ignoring statistics 2861 here. (f.e. loopback and IP tunnels are clean ignoring statistics
2851 counters.) 2862 counters.)
2852 However, it is possible, that they rely on protection 2863 However, it is possible, that they rely on protection
2853 made by us here. 2864 made by us here.
2854 2865
2855 Check this and shot the lock. It is not prone from deadlocks. 2866 Check this and shot the lock. It is not prone from deadlocks.
2856 Either shot noqueue qdisc, it is even simpler 8) 2867 Either shot noqueue qdisc, it is even simpler 8)
2857 */ 2868 */
2858 if (dev->flags & IFF_UP) { 2869 if (dev->flags & IFF_UP) {
2859 int cpu = smp_processor_id(); /* ok because BHs are off */ 2870 int cpu = smp_processor_id(); /* ok because BHs are off */
2860 2871
2861 if (txq->xmit_lock_owner != cpu) { 2872 if (txq->xmit_lock_owner != cpu) {
2862 2873
2863 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) 2874 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2864 goto recursion_alert; 2875 goto recursion_alert;
2865 2876
2866 HARD_TX_LOCK(dev, txq, cpu); 2877 HARD_TX_LOCK(dev, txq, cpu);
2867 2878
2868 if (!netif_xmit_stopped(txq)) { 2879 if (!netif_xmit_stopped(txq)) {
2869 __this_cpu_inc(xmit_recursion); 2880 __this_cpu_inc(xmit_recursion);
2870 rc = dev_hard_start_xmit(skb, dev, txq); 2881 rc = dev_hard_start_xmit(skb, dev, txq);
2871 __this_cpu_dec(xmit_recursion); 2882 __this_cpu_dec(xmit_recursion);
2872 if (dev_xmit_complete(rc)) { 2883 if (dev_xmit_complete(rc)) {
2873 HARD_TX_UNLOCK(dev, txq); 2884 HARD_TX_UNLOCK(dev, txq);
2874 goto out; 2885 goto out;
2875 } 2886 }
2876 } 2887 }
2877 HARD_TX_UNLOCK(dev, txq); 2888 HARD_TX_UNLOCK(dev, txq);
2878 net_crit_ratelimited("Virtual device %s asks to queue packet!\n", 2889 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
2879 dev->name); 2890 dev->name);
2880 } else { 2891 } else {
2881 /* Recursion is detected! It is possible, 2892 /* Recursion is detected! It is possible,
2882 * unfortunately 2893 * unfortunately
2883 */ 2894 */
2884 recursion_alert: 2895 recursion_alert:
2885 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", 2896 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
2886 dev->name); 2897 dev->name);
2887 } 2898 }
2888 } 2899 }
2889 2900
2890 rc = -ENETDOWN; 2901 rc = -ENETDOWN;
2891 rcu_read_unlock_bh(); 2902 rcu_read_unlock_bh();
2892 2903
2893 kfree_skb(skb); 2904 kfree_skb(skb);
2894 return rc; 2905 return rc;
2895 out: 2906 out:
2896 rcu_read_unlock_bh(); 2907 rcu_read_unlock_bh();
2897 return rc; 2908 return rc;
2898 } 2909 }
2899 EXPORT_SYMBOL(dev_queue_xmit); 2910 EXPORT_SYMBOL(dev_queue_xmit);
2900 2911
2901 2912
2902 /*======================================================================= 2913 /*=======================================================================
2903 Receiver routines 2914 Receiver routines
2904 =======================================================================*/ 2915 =======================================================================*/
2905 2916
2906 int netdev_max_backlog __read_mostly = 1000; 2917 int netdev_max_backlog __read_mostly = 1000;
2907 EXPORT_SYMBOL(netdev_max_backlog); 2918 EXPORT_SYMBOL(netdev_max_backlog);
2908 2919
2909 int netdev_tstamp_prequeue __read_mostly = 1; 2920 int netdev_tstamp_prequeue __read_mostly = 1;
2910 int netdev_budget __read_mostly = 300; 2921 int netdev_budget __read_mostly = 300;
2911 int weight_p __read_mostly = 64; /* old backlog weight */ 2922 int weight_p __read_mostly = 64; /* old backlog weight */
2912 2923
2913 /* Called with irq disabled */ 2924 /* Called with irq disabled */
2914 static inline void ____napi_schedule(struct softnet_data *sd, 2925 static inline void ____napi_schedule(struct softnet_data *sd,
2915 struct napi_struct *napi) 2926 struct napi_struct *napi)
2916 { 2927 {
2917 list_add_tail(&napi->poll_list, &sd->poll_list); 2928 list_add_tail(&napi->poll_list, &sd->poll_list);
2918 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2929 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2919 } 2930 }
2920 2931
2921 #ifdef CONFIG_RPS 2932 #ifdef CONFIG_RPS
2922 2933
2923 /* One global table that all flow-based protocols share. */ 2934 /* One global table that all flow-based protocols share. */
2924 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; 2935 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2925 EXPORT_SYMBOL(rps_sock_flow_table); 2936 EXPORT_SYMBOL(rps_sock_flow_table);
2926 2937
2927 struct static_key rps_needed __read_mostly; 2938 struct static_key rps_needed __read_mostly;
2928 2939
2929 static struct rps_dev_flow * 2940 static struct rps_dev_flow *
2930 set_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2941 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2931 struct rps_dev_flow *rflow, u16 next_cpu) 2942 struct rps_dev_flow *rflow, u16 next_cpu)
2932 { 2943 {
2933 if (next_cpu != RPS_NO_CPU) { 2944 if (next_cpu != RPS_NO_CPU) {
2934 #ifdef CONFIG_RFS_ACCEL 2945 #ifdef CONFIG_RFS_ACCEL
2935 struct netdev_rx_queue *rxqueue; 2946 struct netdev_rx_queue *rxqueue;
2936 struct rps_dev_flow_table *flow_table; 2947 struct rps_dev_flow_table *flow_table;
2937 struct rps_dev_flow *old_rflow; 2948 struct rps_dev_flow *old_rflow;
2938 u32 flow_id; 2949 u32 flow_id;
2939 u16 rxq_index; 2950 u16 rxq_index;
2940 int rc; 2951 int rc;
2941 2952
2942 /* Should we steer this flow to a different hardware queue? */ 2953 /* Should we steer this flow to a different hardware queue? */
2943 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || 2954 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2944 !(dev->features & NETIF_F_NTUPLE)) 2955 !(dev->features & NETIF_F_NTUPLE))
2945 goto out; 2956 goto out;
2946 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); 2957 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2947 if (rxq_index == skb_get_rx_queue(skb)) 2958 if (rxq_index == skb_get_rx_queue(skb))
2948 goto out; 2959 goto out;
2949 2960
2950 rxqueue = dev->_rx + rxq_index; 2961 rxqueue = dev->_rx + rxq_index;
2951 flow_table = rcu_dereference(rxqueue->rps_flow_table); 2962 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2952 if (!flow_table) 2963 if (!flow_table)
2953 goto out; 2964 goto out;
2954 flow_id = skb->rxhash & flow_table->mask; 2965 flow_id = skb->rxhash & flow_table->mask;
2955 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, 2966 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2956 rxq_index, flow_id); 2967 rxq_index, flow_id);
2957 if (rc < 0) 2968 if (rc < 0)
2958 goto out; 2969 goto out;
2959 old_rflow = rflow; 2970 old_rflow = rflow;
2960 rflow = &flow_table->flows[flow_id]; 2971 rflow = &flow_table->flows[flow_id];
2961 rflow->filter = rc; 2972 rflow->filter = rc;
2962 if (old_rflow->filter == rflow->filter) 2973 if (old_rflow->filter == rflow->filter)
2963 old_rflow->filter = RPS_NO_FILTER; 2974 old_rflow->filter = RPS_NO_FILTER;
2964 out: 2975 out:
2965 #endif 2976 #endif
2966 rflow->last_qtail = 2977 rflow->last_qtail =
2967 per_cpu(softnet_data, next_cpu).input_queue_head; 2978 per_cpu(softnet_data, next_cpu).input_queue_head;
2968 } 2979 }
2969 2980
2970 rflow->cpu = next_cpu; 2981 rflow->cpu = next_cpu;
2971 return rflow; 2982 return rflow;
2972 } 2983 }
2973 2984
2974 /* 2985 /*
2975 * get_rps_cpu is called from netif_receive_skb and returns the target 2986 * get_rps_cpu is called from netif_receive_skb and returns the target
2976 * CPU from the RPS map of the receiving queue for a given skb. 2987 * CPU from the RPS map of the receiving queue for a given skb.
2977 * rcu_read_lock must be held on entry. 2988 * rcu_read_lock must be held on entry.
2978 */ 2989 */
2979 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2990 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2980 struct rps_dev_flow **rflowp) 2991 struct rps_dev_flow **rflowp)
2981 { 2992 {
2982 struct netdev_rx_queue *rxqueue; 2993 struct netdev_rx_queue *rxqueue;
2983 struct rps_map *map; 2994 struct rps_map *map;
2984 struct rps_dev_flow_table *flow_table; 2995 struct rps_dev_flow_table *flow_table;
2985 struct rps_sock_flow_table *sock_flow_table; 2996 struct rps_sock_flow_table *sock_flow_table;
2986 int cpu = -1; 2997 int cpu = -1;
2987 u16 tcpu; 2998 u16 tcpu;
2988 2999
2989 if (skb_rx_queue_recorded(skb)) { 3000 if (skb_rx_queue_recorded(skb)) {
2990 u16 index = skb_get_rx_queue(skb); 3001 u16 index = skb_get_rx_queue(skb);
2991 if (unlikely(index >= dev->real_num_rx_queues)) { 3002 if (unlikely(index >= dev->real_num_rx_queues)) {
2992 WARN_ONCE(dev->real_num_rx_queues > 1, 3003 WARN_ONCE(dev->real_num_rx_queues > 1,
2993 "%s received packet on queue %u, but number " 3004 "%s received packet on queue %u, but number "
2994 "of RX queues is %u\n", 3005 "of RX queues is %u\n",
2995 dev->name, index, dev->real_num_rx_queues); 3006 dev->name, index, dev->real_num_rx_queues);
2996 goto done; 3007 goto done;
2997 } 3008 }
2998 rxqueue = dev->_rx + index; 3009 rxqueue = dev->_rx + index;
2999 } else 3010 } else
3000 rxqueue = dev->_rx; 3011 rxqueue = dev->_rx;
3001 3012
3002 map = rcu_dereference(rxqueue->rps_map); 3013 map = rcu_dereference(rxqueue->rps_map);
3003 if (map) { 3014 if (map) {
3004 if (map->len == 1 && 3015 if (map->len == 1 &&
3005 !rcu_access_pointer(rxqueue->rps_flow_table)) { 3016 !rcu_access_pointer(rxqueue->rps_flow_table)) {
3006 tcpu = map->cpus[0]; 3017 tcpu = map->cpus[0];
3007 if (cpu_online(tcpu)) 3018 if (cpu_online(tcpu))
3008 cpu = tcpu; 3019 cpu = tcpu;
3009 goto done; 3020 goto done;
3010 } 3021 }
3011 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) { 3022 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
3012 goto done; 3023 goto done;
3013 } 3024 }
3014 3025
3015 skb_reset_network_header(skb); 3026 skb_reset_network_header(skb);
3016 if (!skb_get_rxhash(skb)) 3027 if (!skb_get_rxhash(skb))
3017 goto done; 3028 goto done;
3018 3029
3019 flow_table = rcu_dereference(rxqueue->rps_flow_table); 3030 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3020 sock_flow_table = rcu_dereference(rps_sock_flow_table); 3031 sock_flow_table = rcu_dereference(rps_sock_flow_table);
3021 if (flow_table && sock_flow_table) { 3032 if (flow_table && sock_flow_table) {
3022 u16 next_cpu; 3033 u16 next_cpu;
3023 struct rps_dev_flow *rflow; 3034 struct rps_dev_flow *rflow;
3024 3035
3025 rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; 3036 rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
3026 tcpu = rflow->cpu; 3037 tcpu = rflow->cpu;
3027 3038
3028 next_cpu = sock_flow_table->ents[skb->rxhash & 3039 next_cpu = sock_flow_table->ents[skb->rxhash &
3029 sock_flow_table->mask]; 3040 sock_flow_table->mask];
3030 3041
3031 /* 3042 /*
3032 * If the desired CPU (where last recvmsg was done) is 3043 * If the desired CPU (where last recvmsg was done) is
3033 * different from current CPU (one in the rx-queue flow 3044 * different from current CPU (one in the rx-queue flow
3034 * table entry), switch if one of the following holds: 3045 * table entry), switch if one of the following holds:
3035 * - Current CPU is unset (equal to RPS_NO_CPU). 3046 * - Current CPU is unset (equal to RPS_NO_CPU).
3036 * - Current CPU is offline. 3047 * - Current CPU is offline.
3037 * - The current CPU's queue tail has advanced beyond the 3048 * - The current CPU's queue tail has advanced beyond the
3038 * last packet that was enqueued using this table entry. 3049 * last packet that was enqueued using this table entry.
3039 * This guarantees that all previous packets for the flow 3050 * This guarantees that all previous packets for the flow
3040 * have been dequeued, thus preserving in order delivery. 3051 * have been dequeued, thus preserving in order delivery.
3041 */ 3052 */
3042 if (unlikely(tcpu != next_cpu) && 3053 if (unlikely(tcpu != next_cpu) &&
3043 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || 3054 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
3044 ((int)(per_cpu(softnet_data, tcpu).input_queue_head - 3055 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
3045 rflow->last_qtail)) >= 0)) { 3056 rflow->last_qtail)) >= 0)) {
3046 tcpu = next_cpu; 3057 tcpu = next_cpu;
3047 rflow = set_rps_cpu(dev, skb, rflow, next_cpu); 3058 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
3048 } 3059 }
3049 3060
3050 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { 3061 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
3051 *rflowp = rflow; 3062 *rflowp = rflow;
3052 cpu = tcpu; 3063 cpu = tcpu;
3053 goto done; 3064 goto done;
3054 } 3065 }
3055 } 3066 }
3056 3067
3057 if (map) { 3068 if (map) {
3058 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; 3069 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
3059 3070
3060 if (cpu_online(tcpu)) { 3071 if (cpu_online(tcpu)) {
3061 cpu = tcpu; 3072 cpu = tcpu;
3062 goto done; 3073 goto done;
3063 } 3074 }
3064 } 3075 }
3065 3076
3066 done: 3077 done:
3067 return cpu; 3078 return cpu;
3068 } 3079 }
3069 3080
3070 #ifdef CONFIG_RFS_ACCEL 3081 #ifdef CONFIG_RFS_ACCEL
3071 3082
3072 /** 3083 /**
3073 * rps_may_expire_flow - check whether an RFS hardware filter may be removed 3084 * rps_may_expire_flow - check whether an RFS hardware filter may be removed
3074 * @dev: Device on which the filter was set 3085 * @dev: Device on which the filter was set
3075 * @rxq_index: RX queue index 3086 * @rxq_index: RX queue index
3076 * @flow_id: Flow ID passed to ndo_rx_flow_steer() 3087 * @flow_id: Flow ID passed to ndo_rx_flow_steer()
3077 * @filter_id: Filter ID returned by ndo_rx_flow_steer() 3088 * @filter_id: Filter ID returned by ndo_rx_flow_steer()
3078 * 3089 *
3079 * Drivers that implement ndo_rx_flow_steer() should periodically call 3090 * Drivers that implement ndo_rx_flow_steer() should periodically call
3080 * this function for each installed filter and remove the filters for 3091 * this function for each installed filter and remove the filters for
3081 * which it returns %true. 3092 * which it returns %true.
3082 */ 3093 */
3083 bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, 3094 bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
3084 u32 flow_id, u16 filter_id) 3095 u32 flow_id, u16 filter_id)
3085 { 3096 {
3086 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; 3097 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
3087 struct rps_dev_flow_table *flow_table; 3098 struct rps_dev_flow_table *flow_table;
3088 struct rps_dev_flow *rflow; 3099 struct rps_dev_flow *rflow;
3089 bool expire = true; 3100 bool expire = true;
3090 int cpu; 3101 int cpu;
3091 3102
3092 rcu_read_lock(); 3103 rcu_read_lock();
3093 flow_table = rcu_dereference(rxqueue->rps_flow_table); 3104 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3094 if (flow_table && flow_id <= flow_table->mask) { 3105 if (flow_table && flow_id <= flow_table->mask) {
3095 rflow = &flow_table->flows[flow_id]; 3106 rflow = &flow_table->flows[flow_id];
3096 cpu = ACCESS_ONCE(rflow->cpu); 3107 cpu = ACCESS_ONCE(rflow->cpu);
3097 if (rflow->filter == filter_id && cpu != RPS_NO_CPU && 3108 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
3098 ((int)(per_cpu(softnet_data, cpu).input_queue_head - 3109 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
3099 rflow->last_qtail) < 3110 rflow->last_qtail) <
3100 (int)(10 * flow_table->mask))) 3111 (int)(10 * flow_table->mask)))
3101 expire = false; 3112 expire = false;
3102 } 3113 }
3103 rcu_read_unlock(); 3114 rcu_read_unlock();
3104 return expire; 3115 return expire;
3105 } 3116 }
3106 EXPORT_SYMBOL(rps_may_expire_flow); 3117 EXPORT_SYMBOL(rps_may_expire_flow);
3107 3118
3108 #endif /* CONFIG_RFS_ACCEL */ 3119 #endif /* CONFIG_RFS_ACCEL */
3109 3120
3110 /* Called from hardirq (IPI) context */ 3121 /* Called from hardirq (IPI) context */
3111 static void rps_trigger_softirq(void *data) 3122 static void rps_trigger_softirq(void *data)
3112 { 3123 {
3113 struct softnet_data *sd = data; 3124 struct softnet_data *sd = data;
3114 3125
3115 ____napi_schedule(sd, &sd->backlog); 3126 ____napi_schedule(sd, &sd->backlog);
3116 sd->received_rps++; 3127 sd->received_rps++;
3117 } 3128 }
3118 3129
3119 #endif /* CONFIG_RPS */ 3130 #endif /* CONFIG_RPS */
3120 3131
3121 /* 3132 /*
3122 * Check if this softnet_data structure is another cpu one 3133 * Check if this softnet_data structure is another cpu one
3123 * If yes, queue it to our IPI list and return 1 3134 * If yes, queue it to our IPI list and return 1
3124 * If no, return 0 3135 * If no, return 0
3125 */ 3136 */
3126 static int rps_ipi_queued(struct softnet_data *sd) 3137 static int rps_ipi_queued(struct softnet_data *sd)
3127 { 3138 {
3128 #ifdef CONFIG_RPS 3139 #ifdef CONFIG_RPS
3129 struct softnet_data *mysd = &__get_cpu_var(softnet_data); 3140 struct softnet_data *mysd = &__get_cpu_var(softnet_data);
3130 3141
3131 if (sd != mysd) { 3142 if (sd != mysd) {
3132 sd->rps_ipi_next = mysd->rps_ipi_list; 3143 sd->rps_ipi_next = mysd->rps_ipi_list;
3133 mysd->rps_ipi_list = sd; 3144 mysd->rps_ipi_list = sd;
3134 3145
3135 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 3146 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3136 return 1; 3147 return 1;
3137 } 3148 }
3138 #endif /* CONFIG_RPS */ 3149 #endif /* CONFIG_RPS */
3139 return 0; 3150 return 0;
3140 } 3151 }
3141 3152
3142 /* 3153 /*
3143 * enqueue_to_backlog is called to queue an skb to a per CPU backlog 3154 * enqueue_to_backlog is called to queue an skb to a per CPU backlog
3144 * queue (may be a remote CPU queue). 3155 * queue (may be a remote CPU queue).
3145 */ 3156 */
3146 static int enqueue_to_backlog(struct sk_buff *skb, int cpu, 3157 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
3147 unsigned int *qtail) 3158 unsigned int *qtail)
3148 { 3159 {
3149 struct softnet_data *sd; 3160 struct softnet_data *sd;
3150 unsigned long flags; 3161 unsigned long flags;
3151 3162
3152 sd = &per_cpu(softnet_data, cpu); 3163 sd = &per_cpu(softnet_data, cpu);
3153 3164
3154 local_irq_save(flags); 3165 local_irq_save(flags);
3155 3166
3156 rps_lock(sd); 3167 rps_lock(sd);
3157 if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { 3168 if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
3158 if (skb_queue_len(&sd->input_pkt_queue)) { 3169 if (skb_queue_len(&sd->input_pkt_queue)) {
3159 enqueue: 3170 enqueue:
3160 __skb_queue_tail(&sd->input_pkt_queue, skb); 3171 __skb_queue_tail(&sd->input_pkt_queue, skb);
3161 input_queue_tail_incr_save(sd, qtail); 3172 input_queue_tail_incr_save(sd, qtail);
3162 rps_unlock(sd); 3173 rps_unlock(sd);
3163 local_irq_restore(flags); 3174 local_irq_restore(flags);
3164 return NET_RX_SUCCESS; 3175 return NET_RX_SUCCESS;
3165 } 3176 }
3166 3177
3167 /* Schedule NAPI for backlog device 3178 /* Schedule NAPI for backlog device
3168 * We can use non atomic operation since we own the queue lock 3179 * We can use non atomic operation since we own the queue lock
3169 */ 3180 */
3170 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) { 3181 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
3171 if (!rps_ipi_queued(sd)) 3182 if (!rps_ipi_queued(sd))
3172 ____napi_schedule(sd, &sd->backlog); 3183 ____napi_schedule(sd, &sd->backlog);
3173 } 3184 }
3174 goto enqueue; 3185 goto enqueue;
3175 } 3186 }
3176 3187
3177 sd->dropped++; 3188 sd->dropped++;
3178 rps_unlock(sd); 3189 rps_unlock(sd);
3179 3190
3180 local_irq_restore(flags); 3191 local_irq_restore(flags);
3181 3192
3182 atomic_long_inc(&skb->dev->rx_dropped); 3193 atomic_long_inc(&skb->dev->rx_dropped);
3183 kfree_skb(skb); 3194 kfree_skb(skb);
3184 return NET_RX_DROP; 3195 return NET_RX_DROP;
3185 } 3196 }
3186 3197
3187 /** 3198 /**
3188 * netif_rx - post buffer to the network code 3199 * netif_rx - post buffer to the network code
3189 * @skb: buffer to post 3200 * @skb: buffer to post
3190 * 3201 *
3191 * This function receives a packet from a device driver and queues it for 3202 * This function receives a packet from a device driver and queues it for
3192 * the upper (protocol) levels to process. It always succeeds. The buffer 3203 * the upper (protocol) levels to process. It always succeeds. The buffer
3193 * may be dropped during processing for congestion control or by the 3204 * may be dropped during processing for congestion control or by the
3194 * protocol layers. 3205 * protocol layers.
3195 * 3206 *
3196 * return values: 3207 * return values:
3197 * NET_RX_SUCCESS (no congestion) 3208 * NET_RX_SUCCESS (no congestion)
3198 * NET_RX_DROP (packet was dropped) 3209 * NET_RX_DROP (packet was dropped)
3199 * 3210 *
3200 */ 3211 */
3201 3212
3202 int netif_rx(struct sk_buff *skb) 3213 int netif_rx(struct sk_buff *skb)
3203 { 3214 {
3204 int ret; 3215 int ret;
3205 3216
3206 /* if netpoll wants it, pretend we never saw it */ 3217 /* if netpoll wants it, pretend we never saw it */
3207 if (netpoll_rx(skb)) 3218 if (netpoll_rx(skb))
3208 return NET_RX_DROP; 3219 return NET_RX_DROP;
3209 3220
3210 net_timestamp_check(netdev_tstamp_prequeue, skb); 3221 net_timestamp_check(netdev_tstamp_prequeue, skb);
3211 3222
3212 trace_netif_rx(skb); 3223 trace_netif_rx(skb);
3213 #ifdef CONFIG_RPS 3224 #ifdef CONFIG_RPS
3214 if (static_key_false(&rps_needed)) { 3225 if (static_key_false(&rps_needed)) {
3215 struct rps_dev_flow voidflow, *rflow = &voidflow; 3226 struct rps_dev_flow voidflow, *rflow = &voidflow;
3216 int cpu; 3227 int cpu;
3217 3228
3218 preempt_disable(); 3229 preempt_disable();
3219 rcu_read_lock(); 3230 rcu_read_lock();
3220 3231
3221 cpu = get_rps_cpu(skb->dev, skb, &rflow); 3232 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3222 if (cpu < 0) 3233 if (cpu < 0)
3223 cpu = smp_processor_id(); 3234 cpu = smp_processor_id();
3224 3235
3225 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); 3236 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3226 3237
3227 rcu_read_unlock(); 3238 rcu_read_unlock();
3228 preempt_enable(); 3239 preempt_enable();
3229 } else 3240 } else
3230 #endif 3241 #endif
3231 { 3242 {
3232 unsigned int qtail; 3243 unsigned int qtail;
3233 ret = enqueue_to_backlog(skb, get_cpu(), &qtail); 3244 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
3234 put_cpu(); 3245 put_cpu();
3235 } 3246 }
3236 return ret; 3247 return ret;
3237 } 3248 }
3238 EXPORT_SYMBOL(netif_rx); 3249 EXPORT_SYMBOL(netif_rx);
3239 3250
3240 int netif_rx_ni(struct sk_buff *skb) 3251 int netif_rx_ni(struct sk_buff *skb)
3241 { 3252 {
3242 int err; 3253 int err;
3243 3254
3244 preempt_disable(); 3255 preempt_disable();
3245 err = netif_rx(skb); 3256 err = netif_rx(skb);
3246 if (local_softirq_pending()) 3257 if (local_softirq_pending())
3247 do_softirq(); 3258 do_softirq();
3248 preempt_enable(); 3259 preempt_enable();
3249 3260
3250 return err; 3261 return err;
3251 } 3262 }
3252 EXPORT_SYMBOL(netif_rx_ni); 3263 EXPORT_SYMBOL(netif_rx_ni);
3253 3264
3254 static void net_tx_action(struct softirq_action *h) 3265 static void net_tx_action(struct softirq_action *h)
3255 { 3266 {
3256 struct softnet_data *sd = &__get_cpu_var(softnet_data); 3267 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3257 3268
3258 if (sd->completion_queue) { 3269 if (sd->completion_queue) {
3259 struct sk_buff *clist; 3270 struct sk_buff *clist;
3260 3271
3261 local_irq_disable(); 3272 local_irq_disable();
3262 clist = sd->completion_queue; 3273 clist = sd->completion_queue;
3263 sd->completion_queue = NULL; 3274 sd->completion_queue = NULL;
3264 local_irq_enable(); 3275 local_irq_enable();
3265 3276
3266 while (clist) { 3277 while (clist) {
3267 struct sk_buff *skb = clist; 3278 struct sk_buff *skb = clist;
3268 clist = clist->next; 3279 clist = clist->next;
3269 3280
3270 WARN_ON(atomic_read(&skb->users)); 3281 WARN_ON(atomic_read(&skb->users));
3271 trace_kfree_skb(skb, net_tx_action); 3282 trace_kfree_skb(skb, net_tx_action);
3272 __kfree_skb(skb); 3283 __kfree_skb(skb);
3273 } 3284 }
3274 } 3285 }
3275 3286
3276 if (sd->output_queue) { 3287 if (sd->output_queue) {
3277 struct Qdisc *head; 3288 struct Qdisc *head;
3278 3289
3279 local_irq_disable(); 3290 local_irq_disable();
3280 head = sd->output_queue; 3291 head = sd->output_queue;
3281 sd->output_queue = NULL; 3292 sd->output_queue = NULL;
3282 sd->output_queue_tailp = &sd->output_queue; 3293 sd->output_queue_tailp = &sd->output_queue;
3283 local_irq_enable(); 3294 local_irq_enable();
3284 3295
3285 while (head) { 3296 while (head) {
3286 struct Qdisc *q = head; 3297 struct Qdisc *q = head;
3287 spinlock_t *root_lock; 3298 spinlock_t *root_lock;
3288 3299
3289 head = head->next_sched; 3300 head = head->next_sched;
3290 3301
3291 root_lock = qdisc_lock(q); 3302 root_lock = qdisc_lock(q);
3292 if (spin_trylock(root_lock)) { 3303 if (spin_trylock(root_lock)) {
3293 smp_mb__before_clear_bit(); 3304 smp_mb__before_clear_bit();
3294 clear_bit(__QDISC_STATE_SCHED, 3305 clear_bit(__QDISC_STATE_SCHED,
3295 &q->state); 3306 &q->state);
3296 qdisc_run(q); 3307 qdisc_run(q);
3297 spin_unlock(root_lock); 3308 spin_unlock(root_lock);
3298 } else { 3309 } else {
3299 if (!test_bit(__QDISC_STATE_DEACTIVATED, 3310 if (!test_bit(__QDISC_STATE_DEACTIVATED,
3300 &q->state)) { 3311 &q->state)) {
3301 __netif_reschedule(q); 3312 __netif_reschedule(q);
3302 } else { 3313 } else {
3303 smp_mb__before_clear_bit(); 3314 smp_mb__before_clear_bit();
3304 clear_bit(__QDISC_STATE_SCHED, 3315 clear_bit(__QDISC_STATE_SCHED,
3305 &q->state); 3316 &q->state);
3306 } 3317 }
3307 } 3318 }
3308 } 3319 }
3309 } 3320 }
3310 } 3321 }
3311 3322
3312 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ 3323 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3313 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) 3324 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
3314 /* This hook is defined here for ATM LANE */ 3325 /* This hook is defined here for ATM LANE */
3315 int (*br_fdb_test_addr_hook)(struct net_device *dev, 3326 int (*br_fdb_test_addr_hook)(struct net_device *dev,
3316 unsigned char *addr) __read_mostly; 3327 unsigned char *addr) __read_mostly;
3317 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); 3328 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3318 #endif 3329 #endif
3319 3330
3320 #ifdef CONFIG_NET_CLS_ACT 3331 #ifdef CONFIG_NET_CLS_ACT
3321 /* TODO: Maybe we should just force sch_ingress to be compiled in 3332 /* TODO: Maybe we should just force sch_ingress to be compiled in
3322 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions 3333 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
3323 * a compare and 2 stores extra right now if we dont have it on 3334 * a compare and 2 stores extra right now if we dont have it on
3324 * but have CONFIG_NET_CLS_ACT 3335 * but have CONFIG_NET_CLS_ACT
3325 * NOTE: This doesn't stop any functionality; if you dont have 3336 * NOTE: This doesn't stop any functionality; if you dont have
3326 * the ingress scheduler, you just can't add policies on ingress. 3337 * the ingress scheduler, you just can't add policies on ingress.
3327 * 3338 *
3328 */ 3339 */
3329 static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) 3340 static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
3330 { 3341 {
3331 struct net_device *dev = skb->dev; 3342 struct net_device *dev = skb->dev;
3332 u32 ttl = G_TC_RTTL(skb->tc_verd); 3343 u32 ttl = G_TC_RTTL(skb->tc_verd);
3333 int result = TC_ACT_OK; 3344 int result = TC_ACT_OK;
3334 struct Qdisc *q; 3345 struct Qdisc *q;
3335 3346
3336 if (unlikely(MAX_RED_LOOP < ttl++)) { 3347 if (unlikely(MAX_RED_LOOP < ttl++)) {
3337 net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n", 3348 net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
3338 skb->skb_iif, dev->ifindex); 3349 skb->skb_iif, dev->ifindex);
3339 return TC_ACT_SHOT; 3350 return TC_ACT_SHOT;
3340 } 3351 }
3341 3352
3342 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); 3353 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
3343 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); 3354 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3344 3355
3345 q = rxq->qdisc; 3356 q = rxq->qdisc;
3346 if (q != &noop_qdisc) { 3357 if (q != &noop_qdisc) {
3347 spin_lock(qdisc_lock(q)); 3358 spin_lock(qdisc_lock(q));
3348 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) 3359 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
3349 result = qdisc_enqueue_root(skb, q); 3360 result = qdisc_enqueue_root(skb, q);
3350 spin_unlock(qdisc_lock(q)); 3361 spin_unlock(qdisc_lock(q));
3351 } 3362 }
3352 3363
3353 return result; 3364 return result;
3354 } 3365 }
3355 3366
3356 static inline struct sk_buff *handle_ing(struct sk_buff *skb, 3367 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3357 struct packet_type **pt_prev, 3368 struct packet_type **pt_prev,
3358 int *ret, struct net_device *orig_dev) 3369 int *ret, struct net_device *orig_dev)
3359 { 3370 {
3360 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); 3371 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
3361 3372
3362 if (!rxq || rxq->qdisc == &noop_qdisc) 3373 if (!rxq || rxq->qdisc == &noop_qdisc)
3363 goto out; 3374 goto out;
3364 3375
3365 if (*pt_prev) { 3376 if (*pt_prev) {
3366 *ret = deliver_skb(skb, *pt_prev, orig_dev); 3377 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3367 *pt_prev = NULL; 3378 *pt_prev = NULL;
3368 } 3379 }
3369 3380
3370 switch (ing_filter(skb, rxq)) { 3381 switch (ing_filter(skb, rxq)) {
3371 case TC_ACT_SHOT: 3382 case TC_ACT_SHOT:
3372 case TC_ACT_STOLEN: 3383 case TC_ACT_STOLEN:
3373 kfree_skb(skb); 3384 kfree_skb(skb);
3374 return NULL; 3385 return NULL;
3375 } 3386 }
3376 3387
3377 out: 3388 out:
3378 skb->tc_verd = 0; 3389 skb->tc_verd = 0;
3379 return skb; 3390 return skb;
3380 } 3391 }
3381 #endif 3392 #endif
3382 3393
3383 /** 3394 /**
3384 * netdev_rx_handler_register - register receive handler 3395 * netdev_rx_handler_register - register receive handler
3385 * @dev: device to register a handler for 3396 * @dev: device to register a handler for
3386 * @rx_handler: receive handler to register 3397 * @rx_handler: receive handler to register
3387 * @rx_handler_data: data pointer that is used by rx handler 3398 * @rx_handler_data: data pointer that is used by rx handler
3388 * 3399 *
3389 * Register a receive hander for a device. This handler will then be 3400 * Register a receive hander for a device. This handler will then be
3390 * called from __netif_receive_skb. A negative errno code is returned 3401 * called from __netif_receive_skb. A negative errno code is returned
3391 * on a failure. 3402 * on a failure.
3392 * 3403 *
3393 * The caller must hold the rtnl_mutex. 3404 * The caller must hold the rtnl_mutex.
3394 * 3405 *
3395 * For a general description of rx_handler, see enum rx_handler_result. 3406 * For a general description of rx_handler, see enum rx_handler_result.
3396 */ 3407 */
3397 int netdev_rx_handler_register(struct net_device *dev, 3408 int netdev_rx_handler_register(struct net_device *dev,
3398 rx_handler_func_t *rx_handler, 3409 rx_handler_func_t *rx_handler,
3399 void *rx_handler_data) 3410 void *rx_handler_data)
3400 { 3411 {
3401 ASSERT_RTNL(); 3412 ASSERT_RTNL();
3402 3413
3403 if (dev->rx_handler) 3414 if (dev->rx_handler)
3404 return -EBUSY; 3415 return -EBUSY;
3405 3416
3406 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); 3417 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
3407 rcu_assign_pointer(dev->rx_handler, rx_handler); 3418 rcu_assign_pointer(dev->rx_handler, rx_handler);
3408 3419
3409 return 0; 3420 return 0;
3410 } 3421 }
3411 EXPORT_SYMBOL_GPL(netdev_rx_handler_register); 3422 EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3412 3423
3413 /** 3424 /**
3414 * netdev_rx_handler_unregister - unregister receive handler 3425 * netdev_rx_handler_unregister - unregister receive handler
3415 * @dev: device to unregister a handler from 3426 * @dev: device to unregister a handler from
3416 * 3427 *
3417 * Unregister a receive hander from a device. 3428 * Unregister a receive hander from a device.
3418 * 3429 *
3419 * The caller must hold the rtnl_mutex. 3430 * The caller must hold the rtnl_mutex.
3420 */ 3431 */
3421 void netdev_rx_handler_unregister(struct net_device *dev) 3432 void netdev_rx_handler_unregister(struct net_device *dev)
3422 { 3433 {
3423 3434
3424 ASSERT_RTNL(); 3435 ASSERT_RTNL();
3425 RCU_INIT_POINTER(dev->rx_handler, NULL); 3436 RCU_INIT_POINTER(dev->rx_handler, NULL);
3426 RCU_INIT_POINTER(dev->rx_handler_data, NULL); 3437 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
3427 } 3438 }
3428 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); 3439 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3429 3440
3430 /* 3441 /*
3431 * Limit the use of PFMEMALLOC reserves to those protocols that implement 3442 * Limit the use of PFMEMALLOC reserves to those protocols that implement
3432 * the special handling of PFMEMALLOC skbs. 3443 * the special handling of PFMEMALLOC skbs.
3433 */ 3444 */
3434 static bool skb_pfmemalloc_protocol(struct sk_buff *skb) 3445 static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3435 { 3446 {
3436 switch (skb->protocol) { 3447 switch (skb->protocol) {
3437 case __constant_htons(ETH_P_ARP): 3448 case __constant_htons(ETH_P_ARP):
3438 case __constant_htons(ETH_P_IP): 3449 case __constant_htons(ETH_P_IP):
3439 case __constant_htons(ETH_P_IPV6): 3450 case __constant_htons(ETH_P_IPV6):
3440 case __constant_htons(ETH_P_8021Q): 3451 case __constant_htons(ETH_P_8021Q):
3441 return true; 3452 return true;
3442 default: 3453 default:
3443 return false; 3454 return false;
3444 } 3455 }
3445 } 3456 }
3446 3457
3447 static int __netif_receive_skb(struct sk_buff *skb) 3458 static int __netif_receive_skb(struct sk_buff *skb)
3448 { 3459 {
3449 struct packet_type *ptype, *pt_prev; 3460 struct packet_type *ptype, *pt_prev;
3450 rx_handler_func_t *rx_handler; 3461 rx_handler_func_t *rx_handler;
3451 struct net_device *orig_dev; 3462 struct net_device *orig_dev;
3452 struct net_device *null_or_dev; 3463 struct net_device *null_or_dev;
3453 bool deliver_exact = false; 3464 bool deliver_exact = false;
3454 int ret = NET_RX_DROP; 3465 int ret = NET_RX_DROP;
3455 __be16 type; 3466 __be16 type;
3456 unsigned long pflags = current->flags; 3467 unsigned long pflags = current->flags;
3457 3468
3458 net_timestamp_check(!netdev_tstamp_prequeue, skb); 3469 net_timestamp_check(!netdev_tstamp_prequeue, skb);
3459 3470
3460 trace_netif_receive_skb(skb); 3471 trace_netif_receive_skb(skb);
3461 3472
3462 /* 3473 /*
3463 * PFMEMALLOC skbs are special, they should 3474 * PFMEMALLOC skbs are special, they should
3464 * - be delivered to SOCK_MEMALLOC sockets only 3475 * - be delivered to SOCK_MEMALLOC sockets only
3465 * - stay away from userspace 3476 * - stay away from userspace
3466 * - have bounded memory usage 3477 * - have bounded memory usage
3467 * 3478 *
3468 * Use PF_MEMALLOC as this saves us from propagating the allocation 3479 * Use PF_MEMALLOC as this saves us from propagating the allocation
3469 * context down to all allocation sites. 3480 * context down to all allocation sites.
3470 */ 3481 */
3471 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) 3482 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3472 current->flags |= PF_MEMALLOC; 3483 current->flags |= PF_MEMALLOC;
3473 3484
3474 /* if we've gotten here through NAPI, check netpoll */ 3485 /* if we've gotten here through NAPI, check netpoll */
3475 if (netpoll_receive_skb(skb)) 3486 if (netpoll_receive_skb(skb))
3476 goto out; 3487 goto out;
3477 3488
3478 orig_dev = skb->dev; 3489 orig_dev = skb->dev;
3479 3490
3480 skb_reset_network_header(skb); 3491 skb_reset_network_header(skb);
3481 if (!skb_transport_header_was_set(skb)) 3492 if (!skb_transport_header_was_set(skb))
3482 skb_reset_transport_header(skb); 3493 skb_reset_transport_header(skb);
3483 skb_reset_mac_len(skb); 3494 skb_reset_mac_len(skb);
3484 3495
3485 pt_prev = NULL; 3496 pt_prev = NULL;
3486 3497
3487 rcu_read_lock(); 3498 rcu_read_lock();
3488 3499
3489 another_round: 3500 another_round:
3490 skb->skb_iif = skb->dev->ifindex; 3501 skb->skb_iif = skb->dev->ifindex;
3491 3502
3492 __this_cpu_inc(softnet_data.processed); 3503 __this_cpu_inc(softnet_data.processed);
3493 3504
3494 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { 3505 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3495 skb = vlan_untag(skb); 3506 skb = vlan_untag(skb);
3496 if (unlikely(!skb)) 3507 if (unlikely(!skb))
3497 goto unlock; 3508 goto unlock;
3498 } 3509 }
3499 3510
3500 #ifdef CONFIG_NET_CLS_ACT 3511 #ifdef CONFIG_NET_CLS_ACT
3501 if (skb->tc_verd & TC_NCLS) { 3512 if (skb->tc_verd & TC_NCLS) {
3502 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 3513 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
3503 goto ncls; 3514 goto ncls;
3504 } 3515 }
3505 #endif 3516 #endif
3506 3517
3507 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) 3518 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3508 goto skip_taps; 3519 goto skip_taps;
3509 3520
3510 list_for_each_entry_rcu(ptype, &ptype_all, list) { 3521 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3511 if (!ptype->dev || ptype->dev == skb->dev) { 3522 if (!ptype->dev || ptype->dev == skb->dev) {
3512 if (pt_prev) 3523 if (pt_prev)
3513 ret = deliver_skb(skb, pt_prev, orig_dev); 3524 ret = deliver_skb(skb, pt_prev, orig_dev);
3514 pt_prev = ptype; 3525 pt_prev = ptype;
3515 } 3526 }
3516 } 3527 }
3517 3528
3518 skip_taps: 3529 skip_taps:
3519 #ifdef CONFIG_NET_CLS_ACT 3530 #ifdef CONFIG_NET_CLS_ACT
3520 skb = handle_ing(skb, &pt_prev, &ret, orig_dev); 3531 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3521 if (!skb) 3532 if (!skb)
3522 goto unlock; 3533 goto unlock;
3523 ncls: 3534 ncls:
3524 #endif 3535 #endif
3525 3536
3526 if (sk_memalloc_socks() && skb_pfmemalloc(skb) 3537 if (sk_memalloc_socks() && skb_pfmemalloc(skb)
3527 && !skb_pfmemalloc_protocol(skb)) 3538 && !skb_pfmemalloc_protocol(skb))
3528 goto drop; 3539 goto drop;
3529 3540
3530 if (vlan_tx_tag_present(skb)) { 3541 if (vlan_tx_tag_present(skb)) {
3531 if (pt_prev) { 3542 if (pt_prev) {
3532 ret = deliver_skb(skb, pt_prev, orig_dev); 3543 ret = deliver_skb(skb, pt_prev, orig_dev);
3533 pt_prev = NULL; 3544 pt_prev = NULL;
3534 } 3545 }
3535 if (vlan_do_receive(&skb)) 3546 if (vlan_do_receive(&skb))
3536 goto another_round; 3547 goto another_round;
3537 else if (unlikely(!skb)) 3548 else if (unlikely(!skb))
3538 goto unlock; 3549 goto unlock;
3539 } 3550 }
3540 3551
3541 rx_handler = rcu_dereference(skb->dev->rx_handler); 3552 rx_handler = rcu_dereference(skb->dev->rx_handler);
3542 if (rx_handler) { 3553 if (rx_handler) {
3543 if (pt_prev) { 3554 if (pt_prev) {
3544 ret = deliver_skb(skb, pt_prev, orig_dev); 3555 ret = deliver_skb(skb, pt_prev, orig_dev);
3545 pt_prev = NULL; 3556 pt_prev = NULL;
3546 } 3557 }
3547 switch (rx_handler(&skb)) { 3558 switch (rx_handler(&skb)) {
3548 case RX_HANDLER_CONSUMED: 3559 case RX_HANDLER_CONSUMED:
3549 goto unlock; 3560 goto unlock;
3550 case RX_HANDLER_ANOTHER: 3561 case RX_HANDLER_ANOTHER:
3551 goto another_round; 3562 goto another_round;
3552 case RX_HANDLER_EXACT: 3563 case RX_HANDLER_EXACT:
3553 deliver_exact = true; 3564 deliver_exact = true;
3554 case RX_HANDLER_PASS: 3565 case RX_HANDLER_PASS:
3555 break; 3566 break;
3556 default: 3567 default:
3557 BUG(); 3568 BUG();
3558 } 3569 }
3559 } 3570 }
3560 3571
3561 if (vlan_tx_nonzero_tag_present(skb)) 3572 if (vlan_tx_nonzero_tag_present(skb))
3562 skb->pkt_type = PACKET_OTHERHOST; 3573 skb->pkt_type = PACKET_OTHERHOST;
3563 3574
3564 /* deliver only exact match when indicated */ 3575 /* deliver only exact match when indicated */
3565 null_or_dev = deliver_exact ? skb->dev : NULL; 3576 null_or_dev = deliver_exact ? skb->dev : NULL;
3566 3577
3567 type = skb->protocol; 3578 type = skb->protocol;
3568 list_for_each_entry_rcu(ptype, 3579 list_for_each_entry_rcu(ptype,
3569 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 3580 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3570 if (ptype->type == type && 3581 if (ptype->type == type &&
3571 (ptype->dev == null_or_dev || ptype->dev == skb->dev || 3582 (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3572 ptype->dev == orig_dev)) { 3583 ptype->dev == orig_dev)) {
3573 if (pt_prev) 3584 if (pt_prev)
3574 ret = deliver_skb(skb, pt_prev, orig_dev); 3585 ret = deliver_skb(skb, pt_prev, orig_dev);
3575 pt_prev = ptype; 3586 pt_prev = ptype;
3576 } 3587 }
3577 } 3588 }
3578 3589
3579 if (pt_prev) { 3590 if (pt_prev) {
3580 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) 3591 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
3581 goto drop; 3592 goto drop;
3582 else 3593 else
3583 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 3594 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3584 } else { 3595 } else {
3585 drop: 3596 drop:
3586 atomic_long_inc(&skb->dev->rx_dropped); 3597 atomic_long_inc(&skb->dev->rx_dropped);
3587 kfree_skb(skb); 3598 kfree_skb(skb);
3588 /* Jamal, now you will not able to escape explaining 3599 /* Jamal, now you will not able to escape explaining
3589 * me how you were going to use this. :-) 3600 * me how you were going to use this. :-)
3590 */ 3601 */
3591 ret = NET_RX_DROP; 3602 ret = NET_RX_DROP;
3592 } 3603 }
3593 3604
3594 unlock: 3605 unlock:
3595 rcu_read_unlock(); 3606 rcu_read_unlock();
3596 out: 3607 out:
3597 tsk_restore_flags(current, pflags, PF_MEMALLOC); 3608 tsk_restore_flags(current, pflags, PF_MEMALLOC);
3598 return ret; 3609 return ret;
3599 } 3610 }
3600 3611
3601 /** 3612 /**
3602 * netif_receive_skb - process receive buffer from network 3613 * netif_receive_skb - process receive buffer from network
3603 * @skb: buffer to process 3614 * @skb: buffer to process
3604 * 3615 *
3605 * netif_receive_skb() is the main receive data processing function. 3616 * netif_receive_skb() is the main receive data processing function.
3606 * It always succeeds. The buffer may be dropped during processing 3617 * It always succeeds. The buffer may be dropped during processing
3607 * for congestion control or by the protocol layers. 3618 * for congestion control or by the protocol layers.
3608 * 3619 *
3609 * This function may only be called from softirq context and interrupts 3620 * This function may only be called from softirq context and interrupts
3610 * should be enabled. 3621 * should be enabled.
3611 * 3622 *
3612 * Return values (usually ignored): 3623 * Return values (usually ignored):
3613 * NET_RX_SUCCESS: no congestion 3624 * NET_RX_SUCCESS: no congestion
3614 * NET_RX_DROP: packet was dropped 3625 * NET_RX_DROP: packet was dropped
3615 */ 3626 */
3616 int netif_receive_skb(struct sk_buff *skb) 3627 int netif_receive_skb(struct sk_buff *skb)
3617 { 3628 {
3618 net_timestamp_check(netdev_tstamp_prequeue, skb); 3629 net_timestamp_check(netdev_tstamp_prequeue, skb);
3619 3630
3620 if (skb_defer_rx_timestamp(skb)) 3631 if (skb_defer_rx_timestamp(skb))
3621 return NET_RX_SUCCESS; 3632 return NET_RX_SUCCESS;
3622 3633
3623 #ifdef CONFIG_RPS 3634 #ifdef CONFIG_RPS
3624 if (static_key_false(&rps_needed)) { 3635 if (static_key_false(&rps_needed)) {
3625 struct rps_dev_flow voidflow, *rflow = &voidflow; 3636 struct rps_dev_flow voidflow, *rflow = &voidflow;
3626 int cpu, ret; 3637 int cpu, ret;
3627 3638
3628 rcu_read_lock(); 3639 rcu_read_lock();
3629 3640
3630 cpu = get_rps_cpu(skb->dev, skb, &rflow); 3641 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3631 3642
3632 if (cpu >= 0) { 3643 if (cpu >= 0) {
3633 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); 3644 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3634 rcu_read_unlock(); 3645 rcu_read_unlock();
3635 return ret; 3646 return ret;
3636 } 3647 }
3637 rcu_read_unlock(); 3648 rcu_read_unlock();
3638 } 3649 }
3639 #endif 3650 #endif
3640 return __netif_receive_skb(skb); 3651 return __netif_receive_skb(skb);
3641 } 3652 }
3642 EXPORT_SYMBOL(netif_receive_skb); 3653 EXPORT_SYMBOL(netif_receive_skb);
3643 3654
3644 /* Network device is going away, flush any packets still pending 3655 /* Network device is going away, flush any packets still pending
3645 * Called with irqs disabled. 3656 * Called with irqs disabled.
3646 */ 3657 */
3647 static void flush_backlog(void *arg) 3658 static void flush_backlog(void *arg)
3648 { 3659 {
3649 struct net_device *dev = arg; 3660 struct net_device *dev = arg;
3650 struct softnet_data *sd = &__get_cpu_var(softnet_data); 3661 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3651 struct sk_buff *skb, *tmp; 3662 struct sk_buff *skb, *tmp;
3652 3663
3653 rps_lock(sd); 3664 rps_lock(sd);
3654 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { 3665 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
3655 if (skb->dev == dev) { 3666 if (skb->dev == dev) {
3656 __skb_unlink(skb, &sd->input_pkt_queue); 3667 __skb_unlink(skb, &sd->input_pkt_queue);
3657 kfree_skb(skb); 3668 kfree_skb(skb);
3658 input_queue_head_incr(sd); 3669 input_queue_head_incr(sd);
3659 } 3670 }
3660 } 3671 }
3661 rps_unlock(sd); 3672 rps_unlock(sd);
3662 3673
3663 skb_queue_walk_safe(&sd->process_queue, skb, tmp) { 3674 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3664 if (skb->dev == dev) { 3675 if (skb->dev == dev) {
3665 __skb_unlink(skb, &sd->process_queue); 3676 __skb_unlink(skb, &sd->process_queue);
3666 kfree_skb(skb); 3677 kfree_skb(skb);
3667 input_queue_head_incr(sd); 3678 input_queue_head_incr(sd);
3668 } 3679 }
3669 } 3680 }
3670 } 3681 }
3671 3682
3672 static int napi_gro_complete(struct sk_buff *skb) 3683 static int napi_gro_complete(struct sk_buff *skb)
3673 { 3684 {
3674 struct packet_offload *ptype; 3685 struct packet_offload *ptype;
3675 __be16 type = skb->protocol; 3686 __be16 type = skb->protocol;
3676 struct list_head *head = &offload_base; 3687 struct list_head *head = &offload_base;
3677 int err = -ENOENT; 3688 int err = -ENOENT;
3678 3689
3679 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); 3690 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
3680 3691
3681 if (NAPI_GRO_CB(skb)->count == 1) { 3692 if (NAPI_GRO_CB(skb)->count == 1) {
3682 skb_shinfo(skb)->gso_size = 0; 3693 skb_shinfo(skb)->gso_size = 0;
3683 goto out; 3694 goto out;
3684 } 3695 }
3685 3696
3686 rcu_read_lock(); 3697 rcu_read_lock();
3687 list_for_each_entry_rcu(ptype, head, list) { 3698 list_for_each_entry_rcu(ptype, head, list) {
3688 if (ptype->type != type || !ptype->callbacks.gro_complete) 3699 if (ptype->type != type || !ptype->callbacks.gro_complete)
3689 continue; 3700 continue;
3690 3701
3691 err = ptype->callbacks.gro_complete(skb); 3702 err = ptype->callbacks.gro_complete(skb);
3692 break; 3703 break;
3693 } 3704 }
3694 rcu_read_unlock(); 3705 rcu_read_unlock();
3695 3706
3696 if (err) { 3707 if (err) {
3697 WARN_ON(&ptype->list == head); 3708 WARN_ON(&ptype->list == head);
3698 kfree_skb(skb); 3709 kfree_skb(skb);
3699 return NET_RX_SUCCESS; 3710 return NET_RX_SUCCESS;
3700 } 3711 }
3701 3712
3702 out: 3713 out:
3703 return netif_receive_skb(skb); 3714 return netif_receive_skb(skb);
3704 } 3715 }
3705 3716
3706 /* napi->gro_list contains packets ordered by age. 3717 /* napi->gro_list contains packets ordered by age.
3707 * youngest packets at the head of it. 3718 * youngest packets at the head of it.
3708 * Complete skbs in reverse order to reduce latencies. 3719 * Complete skbs in reverse order to reduce latencies.
3709 */ 3720 */
3710 void napi_gro_flush(struct napi_struct *napi, bool flush_old) 3721 void napi_gro_flush(struct napi_struct *napi, bool flush_old)
3711 { 3722 {
3712 struct sk_buff *skb, *prev = NULL; 3723 struct sk_buff *skb, *prev = NULL;
3713 3724
3714 /* scan list and build reverse chain */ 3725 /* scan list and build reverse chain */
3715 for (skb = napi->gro_list; skb != NULL; skb = skb->next) { 3726 for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
3716 skb->prev = prev; 3727 skb->prev = prev;
3717 prev = skb; 3728 prev = skb;
3718 } 3729 }
3719 3730
3720 for (skb = prev; skb; skb = prev) { 3731 for (skb = prev; skb; skb = prev) {
3721 skb->next = NULL; 3732 skb->next = NULL;
3722 3733
3723 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) 3734 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
3724 return; 3735 return;
3725 3736
3726 prev = skb->prev; 3737 prev = skb->prev;
3727 napi_gro_complete(skb); 3738 napi_gro_complete(skb);
3728 napi->gro_count--; 3739 napi->gro_count--;
3729 } 3740 }
3730 3741
3731 napi->gro_list = NULL; 3742 napi->gro_list = NULL;
3732 } 3743 }
3733 EXPORT_SYMBOL(napi_gro_flush); 3744 EXPORT_SYMBOL(napi_gro_flush);
3734 3745
3735 static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) 3746 static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3736 { 3747 {
3737 struct sk_buff *p; 3748 struct sk_buff *p;
3738 unsigned int maclen = skb->dev->hard_header_len; 3749 unsigned int maclen = skb->dev->hard_header_len;
3739 3750
3740 for (p = napi->gro_list; p; p = p->next) { 3751 for (p = napi->gro_list; p; p = p->next) {
3741 unsigned long diffs; 3752 unsigned long diffs;
3742 3753
3743 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; 3754 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3744 diffs |= p->vlan_tci ^ skb->vlan_tci; 3755 diffs |= p->vlan_tci ^ skb->vlan_tci;
3745 if (maclen == ETH_HLEN) 3756 if (maclen == ETH_HLEN)
3746 diffs |= compare_ether_header(skb_mac_header(p), 3757 diffs |= compare_ether_header(skb_mac_header(p),
3747 skb_gro_mac_header(skb)); 3758 skb_gro_mac_header(skb));
3748 else if (!diffs) 3759 else if (!diffs)
3749 diffs = memcmp(skb_mac_header(p), 3760 diffs = memcmp(skb_mac_header(p),
3750 skb_gro_mac_header(skb), 3761 skb_gro_mac_header(skb),
3751 maclen); 3762 maclen);
3752 NAPI_GRO_CB(p)->same_flow = !diffs; 3763 NAPI_GRO_CB(p)->same_flow = !diffs;
3753 NAPI_GRO_CB(p)->flush = 0; 3764 NAPI_GRO_CB(p)->flush = 0;
3754 } 3765 }
3755 } 3766 }
3756 3767
3757 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3768 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3758 { 3769 {
3759 struct sk_buff **pp = NULL; 3770 struct sk_buff **pp = NULL;
3760 struct packet_offload *ptype; 3771 struct packet_offload *ptype;
3761 __be16 type = skb->protocol; 3772 __be16 type = skb->protocol;
3762 struct list_head *head = &offload_base; 3773 struct list_head *head = &offload_base;
3763 int same_flow; 3774 int same_flow;
3764 int mac_len; 3775 int mac_len;
3765 enum gro_result ret; 3776 enum gro_result ret;
3766 3777
3767 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) 3778 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3768 goto normal; 3779 goto normal;
3769 3780
3770 if (skb_is_gso(skb) || skb_has_frag_list(skb)) 3781 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3771 goto normal; 3782 goto normal;
3772 3783
3773 gro_list_prepare(napi, skb); 3784 gro_list_prepare(napi, skb);
3774 3785
3775 rcu_read_lock(); 3786 rcu_read_lock();
3776 list_for_each_entry_rcu(ptype, head, list) { 3787 list_for_each_entry_rcu(ptype, head, list) {
3777 if (ptype->type != type || !ptype->callbacks.gro_receive) 3788 if (ptype->type != type || !ptype->callbacks.gro_receive)
3778 continue; 3789 continue;
3779 3790
3780 skb_set_network_header(skb, skb_gro_offset(skb)); 3791 skb_set_network_header(skb, skb_gro_offset(skb));
3781 mac_len = skb->network_header - skb->mac_header; 3792 mac_len = skb->network_header - skb->mac_header;
3782 skb->mac_len = mac_len; 3793 skb->mac_len = mac_len;
3783 NAPI_GRO_CB(skb)->same_flow = 0; 3794 NAPI_GRO_CB(skb)->same_flow = 0;
3784 NAPI_GRO_CB(skb)->flush = 0; 3795 NAPI_GRO_CB(skb)->flush = 0;
3785 NAPI_GRO_CB(skb)->free = 0; 3796 NAPI_GRO_CB(skb)->free = 0;
3786 3797
3787 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); 3798 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
3788 break; 3799 break;
3789 } 3800 }
3790 rcu_read_unlock(); 3801 rcu_read_unlock();
3791 3802
3792 if (&ptype->list == head) 3803 if (&ptype->list == head)
3793 goto normal; 3804 goto normal;
3794 3805
3795 same_flow = NAPI_GRO_CB(skb)->same_flow; 3806 same_flow = NAPI_GRO_CB(skb)->same_flow;
3796 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; 3807 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
3797 3808
3798 if (pp) { 3809 if (pp) {
3799 struct sk_buff *nskb = *pp; 3810 struct sk_buff *nskb = *pp;
3800 3811
3801 *pp = nskb->next; 3812 *pp = nskb->next;
3802 nskb->next = NULL; 3813 nskb->next = NULL;
3803 napi_gro_complete(nskb); 3814 napi_gro_complete(nskb);
3804 napi->gro_count--; 3815 napi->gro_count--;
3805 } 3816 }
3806 3817
3807 if (same_flow) 3818 if (same_flow)
3808 goto ok; 3819 goto ok;
3809 3820
3810 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) 3821 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
3811 goto normal; 3822 goto normal;
3812 3823
3813 napi->gro_count++; 3824 napi->gro_count++;
3814 NAPI_GRO_CB(skb)->count = 1; 3825 NAPI_GRO_CB(skb)->count = 1;
3815 NAPI_GRO_CB(skb)->age = jiffies; 3826 NAPI_GRO_CB(skb)->age = jiffies;
3816 skb_shinfo(skb)->gso_size = skb_gro_len(skb); 3827 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3817 skb->next = napi->gro_list; 3828 skb->next = napi->gro_list;
3818 napi->gro_list = skb; 3829 napi->gro_list = skb;
3819 ret = GRO_HELD; 3830 ret = GRO_HELD;
3820 3831
3821 pull: 3832 pull:
3822 if (skb_headlen(skb) < skb_gro_offset(skb)) { 3833 if (skb_headlen(skb) < skb_gro_offset(skb)) {
3823 int grow = skb_gro_offset(skb) - skb_headlen(skb); 3834 int grow = skb_gro_offset(skb) - skb_headlen(skb);
3824 3835
3825 BUG_ON(skb->end - skb->tail < grow); 3836 BUG_ON(skb->end - skb->tail < grow);
3826 3837
3827 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); 3838 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3828 3839
3829 skb->tail += grow; 3840 skb->tail += grow;
3830 skb->data_len -= grow; 3841 skb->data_len -= grow;
3831 3842
3832 skb_shinfo(skb)->frags[0].page_offset += grow; 3843 skb_shinfo(skb)->frags[0].page_offset += grow;
3833 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow); 3844 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
3834 3845
3835 if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) { 3846 if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
3836 skb_frag_unref(skb, 0); 3847 skb_frag_unref(skb, 0);
3837 memmove(skb_shinfo(skb)->frags, 3848 memmove(skb_shinfo(skb)->frags,
3838 skb_shinfo(skb)->frags + 1, 3849 skb_shinfo(skb)->frags + 1,
3839 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); 3850 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
3840 } 3851 }
3841 } 3852 }
3842 3853
3843 ok: 3854 ok:
3844 return ret; 3855 return ret;
3845 3856
3846 normal: 3857 normal:
3847 ret = GRO_NORMAL; 3858 ret = GRO_NORMAL;
3848 goto pull; 3859 goto pull;
3849 } 3860 }
3850 3861
3851 3862
3852 static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) 3863 static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3853 { 3864 {
3854 switch (ret) { 3865 switch (ret) {
3855 case GRO_NORMAL: 3866 case GRO_NORMAL:
3856 if (netif_receive_skb(skb)) 3867 if (netif_receive_skb(skb))
3857 ret = GRO_DROP; 3868 ret = GRO_DROP;
3858 break; 3869 break;
3859 3870
3860 case GRO_DROP: 3871 case GRO_DROP:
3861 kfree_skb(skb); 3872 kfree_skb(skb);
3862 break; 3873 break;
3863 3874
3864 case GRO_MERGED_FREE: 3875 case GRO_MERGED_FREE:
3865 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) 3876 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
3866 kmem_cache_free(skbuff_head_cache, skb); 3877 kmem_cache_free(skbuff_head_cache, skb);
3867 else 3878 else
3868 __kfree_skb(skb); 3879 __kfree_skb(skb);
3869 break; 3880 break;
3870 3881
3871 case GRO_HELD: 3882 case GRO_HELD:
3872 case GRO_MERGED: 3883 case GRO_MERGED:
3873 break; 3884 break;
3874 } 3885 }
3875 3886
3876 return ret; 3887 return ret;
3877 } 3888 }
3878 3889
3879 static void skb_gro_reset_offset(struct sk_buff *skb) 3890 static void skb_gro_reset_offset(struct sk_buff *skb)
3880 { 3891 {
3881 const struct skb_shared_info *pinfo = skb_shinfo(skb); 3892 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3882 const skb_frag_t *frag0 = &pinfo->frags[0]; 3893 const skb_frag_t *frag0 = &pinfo->frags[0];
3883 3894
3884 NAPI_GRO_CB(skb)->data_offset = 0; 3895 NAPI_GRO_CB(skb)->data_offset = 0;
3885 NAPI_GRO_CB(skb)->frag0 = NULL; 3896 NAPI_GRO_CB(skb)->frag0 = NULL;
3886 NAPI_GRO_CB(skb)->frag0_len = 0; 3897 NAPI_GRO_CB(skb)->frag0_len = 0;
3887 3898
3888 if (skb->mac_header == skb->tail && 3899 if (skb->mac_header == skb->tail &&
3889 pinfo->nr_frags && 3900 pinfo->nr_frags &&
3890 !PageHighMem(skb_frag_page(frag0))) { 3901 !PageHighMem(skb_frag_page(frag0))) {
3891 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); 3902 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3892 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); 3903 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3893 } 3904 }
3894 } 3905 }
3895 3906
3896 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3907 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3897 { 3908 {
3898 skb_gro_reset_offset(skb); 3909 skb_gro_reset_offset(skb);
3899 3910
3900 return napi_skb_finish(dev_gro_receive(napi, skb), skb); 3911 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
3901 } 3912 }
3902 EXPORT_SYMBOL(napi_gro_receive); 3913 EXPORT_SYMBOL(napi_gro_receive);
3903 3914
3904 static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) 3915 static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3905 { 3916 {
3906 __skb_pull(skb, skb_headlen(skb)); 3917 __skb_pull(skb, skb_headlen(skb));
3907 /* restore the reserve we had after netdev_alloc_skb_ip_align() */ 3918 /* restore the reserve we had after netdev_alloc_skb_ip_align() */
3908 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); 3919 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
3909 skb->vlan_tci = 0; 3920 skb->vlan_tci = 0;
3910 skb->dev = napi->dev; 3921 skb->dev = napi->dev;
3911 skb->skb_iif = 0; 3922 skb->skb_iif = 0;
3912 3923
3913 napi->skb = skb; 3924 napi->skb = skb;
3914 } 3925 }
3915 3926
3916 struct sk_buff *napi_get_frags(struct napi_struct *napi) 3927 struct sk_buff *napi_get_frags(struct napi_struct *napi)
3917 { 3928 {
3918 struct sk_buff *skb = napi->skb; 3929 struct sk_buff *skb = napi->skb;
3919 3930
3920 if (!skb) { 3931 if (!skb) {
3921 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); 3932 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3922 if (skb) 3933 if (skb)
3923 napi->skb = skb; 3934 napi->skb = skb;
3924 } 3935 }
3925 return skb; 3936 return skb;
3926 } 3937 }
3927 EXPORT_SYMBOL(napi_get_frags); 3938 EXPORT_SYMBOL(napi_get_frags);
3928 3939
3929 static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, 3940 static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3930 gro_result_t ret) 3941 gro_result_t ret)
3931 { 3942 {
3932 switch (ret) { 3943 switch (ret) {
3933 case GRO_NORMAL: 3944 case GRO_NORMAL:
3934 case GRO_HELD: 3945 case GRO_HELD:
3935 skb->protocol = eth_type_trans(skb, skb->dev); 3946 skb->protocol = eth_type_trans(skb, skb->dev);
3936 3947
3937 if (ret == GRO_HELD) 3948 if (ret == GRO_HELD)
3938 skb_gro_pull(skb, -ETH_HLEN); 3949 skb_gro_pull(skb, -ETH_HLEN);
3939 else if (netif_receive_skb(skb)) 3950 else if (netif_receive_skb(skb))
3940 ret = GRO_DROP; 3951 ret = GRO_DROP;
3941 break; 3952 break;
3942 3953
3943 case GRO_DROP: 3954 case GRO_DROP:
3944 case GRO_MERGED_FREE: 3955 case GRO_MERGED_FREE:
3945 napi_reuse_skb(napi, skb); 3956 napi_reuse_skb(napi, skb);
3946 break; 3957 break;
3947 3958
3948 case GRO_MERGED: 3959 case GRO_MERGED:
3949 break; 3960 break;
3950 } 3961 }
3951 3962
3952 return ret; 3963 return ret;
3953 } 3964 }
3954 3965
3955 static struct sk_buff *napi_frags_skb(struct napi_struct *napi) 3966 static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
3956 { 3967 {
3957 struct sk_buff *skb = napi->skb; 3968 struct sk_buff *skb = napi->skb;
3958 struct ethhdr *eth; 3969 struct ethhdr *eth;
3959 unsigned int hlen; 3970 unsigned int hlen;
3960 unsigned int off; 3971 unsigned int off;
3961 3972
3962 napi->skb = NULL; 3973 napi->skb = NULL;
3963 3974
3964 skb_reset_mac_header(skb); 3975 skb_reset_mac_header(skb);
3965 skb_gro_reset_offset(skb); 3976 skb_gro_reset_offset(skb);
3966 3977
3967 off = skb_gro_offset(skb); 3978 off = skb_gro_offset(skb);
3968 hlen = off + sizeof(*eth); 3979 hlen = off + sizeof(*eth);
3969 eth = skb_gro_header_fast(skb, off); 3980 eth = skb_gro_header_fast(skb, off);
3970 if (skb_gro_header_hard(skb, hlen)) { 3981 if (skb_gro_header_hard(skb, hlen)) {
3971 eth = skb_gro_header_slow(skb, hlen, off); 3982 eth = skb_gro_header_slow(skb, hlen, off);
3972 if (unlikely(!eth)) { 3983 if (unlikely(!eth)) {
3973 napi_reuse_skb(napi, skb); 3984 napi_reuse_skb(napi, skb);
3974 skb = NULL; 3985 skb = NULL;
3975 goto out; 3986 goto out;
3976 } 3987 }
3977 } 3988 }
3978 3989
3979 skb_gro_pull(skb, sizeof(*eth)); 3990 skb_gro_pull(skb, sizeof(*eth));
3980 3991
3981 /* 3992 /*
3982 * This works because the only protocols we care about don't require 3993 * This works because the only protocols we care about don't require
3983 * special handling. We'll fix it up properly at the end. 3994 * special handling. We'll fix it up properly at the end.
3984 */ 3995 */
3985 skb->protocol = eth->h_proto; 3996 skb->protocol = eth->h_proto;
3986 3997
3987 out: 3998 out:
3988 return skb; 3999 return skb;
3989 } 4000 }
3990 4001
3991 gro_result_t napi_gro_frags(struct napi_struct *napi) 4002 gro_result_t napi_gro_frags(struct napi_struct *napi)
3992 { 4003 {
3993 struct sk_buff *skb = napi_frags_skb(napi); 4004 struct sk_buff *skb = napi_frags_skb(napi);
3994 4005
3995 if (!skb) 4006 if (!skb)
3996 return GRO_DROP; 4007 return GRO_DROP;
3997 4008
3998 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); 4009 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
3999 } 4010 }
4000 EXPORT_SYMBOL(napi_gro_frags); 4011 EXPORT_SYMBOL(napi_gro_frags);
4001 4012
4002 /* 4013 /*
4003 * net_rps_action sends any pending IPI's for rps. 4014 * net_rps_action sends any pending IPI's for rps.
4004 * Note: called with local irq disabled, but exits with local irq enabled. 4015 * Note: called with local irq disabled, but exits with local irq enabled.
4005 */ 4016 */
4006 static void net_rps_action_and_irq_enable(struct softnet_data *sd) 4017 static void net_rps_action_and_irq_enable(struct softnet_data *sd)
4007 { 4018 {
4008 #ifdef CONFIG_RPS 4019 #ifdef CONFIG_RPS
4009 struct softnet_data *remsd = sd->rps_ipi_list; 4020 struct softnet_data *remsd = sd->rps_ipi_list;
4010 4021
4011 if (remsd) { 4022 if (remsd) {
4012 sd->rps_ipi_list = NULL; 4023 sd->rps_ipi_list = NULL;
4013 4024
4014 local_irq_enable(); 4025 local_irq_enable();
4015 4026
4016 /* Send pending IPI's to kick RPS processing on remote cpus. */ 4027 /* Send pending IPI's to kick RPS processing on remote cpus. */
4017 while (remsd) { 4028 while (remsd) {
4018 struct softnet_data *next = remsd->rps_ipi_next; 4029 struct softnet_data *next = remsd->rps_ipi_next;
4019 4030
4020 if (cpu_online(remsd->cpu)) 4031 if (cpu_online(remsd->cpu))
4021 __smp_call_function_single(remsd->cpu, 4032 __smp_call_function_single(remsd->cpu,
4022 &remsd->csd, 0); 4033 &remsd->csd, 0);
4023 remsd = next; 4034 remsd = next;
4024 } 4035 }
4025 } else 4036 } else
4026 #endif 4037 #endif
4027 local_irq_enable(); 4038 local_irq_enable();
4028 } 4039 }
4029 4040
4030 static int process_backlog(struct napi_struct *napi, int quota) 4041 static int process_backlog(struct napi_struct *napi, int quota)
4031 { 4042 {
4032 int work = 0; 4043 int work = 0;
4033 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); 4044 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
4034 4045
4035 #ifdef CONFIG_RPS 4046 #ifdef CONFIG_RPS
4036 /* Check if we have pending ipi, its better to send them now, 4047 /* Check if we have pending ipi, its better to send them now,
4037 * not waiting net_rx_action() end. 4048 * not waiting net_rx_action() end.
4038 */ 4049 */
4039 if (sd->rps_ipi_list) { 4050 if (sd->rps_ipi_list) {
4040 local_irq_disable(); 4051 local_irq_disable();
4041 net_rps_action_and_irq_enable(sd); 4052 net_rps_action_and_irq_enable(sd);
4042 } 4053 }
4043 #endif 4054 #endif
4044 napi->weight = weight_p; 4055 napi->weight = weight_p;
4045 local_irq_disable(); 4056 local_irq_disable();
4046 while (work < quota) { 4057 while (work < quota) {
4047 struct sk_buff *skb; 4058 struct sk_buff *skb;
4048 unsigned int qlen; 4059 unsigned int qlen;
4049 4060
4050 while ((skb = __skb_dequeue(&sd->process_queue))) { 4061 while ((skb = __skb_dequeue(&sd->process_queue))) {
4051 local_irq_enable(); 4062 local_irq_enable();
4052 __netif_receive_skb(skb); 4063 __netif_receive_skb(skb);
4053 local_irq_disable(); 4064 local_irq_disable();
4054 input_queue_head_incr(sd); 4065 input_queue_head_incr(sd);
4055 if (++work >= quota) { 4066 if (++work >= quota) {
4056 local_irq_enable(); 4067 local_irq_enable();
4057 return work; 4068 return work;
4058 } 4069 }
4059 } 4070 }
4060 4071
4061 rps_lock(sd); 4072 rps_lock(sd);
4062 qlen = skb_queue_len(&sd->input_pkt_queue); 4073 qlen = skb_queue_len(&sd->input_pkt_queue);
4063 if (qlen) 4074 if (qlen)
4064 skb_queue_splice_tail_init(&sd->input_pkt_queue, 4075 skb_queue_splice_tail_init(&sd->input_pkt_queue,
4065 &sd->process_queue); 4076 &sd->process_queue);
4066 4077
4067 if (qlen < quota - work) { 4078 if (qlen < quota - work) {
4068 /* 4079 /*
4069 * Inline a custom version of __napi_complete(). 4080 * Inline a custom version of __napi_complete().
4070 * only current cpu owns and manipulates this napi, 4081 * only current cpu owns and manipulates this napi,
4071 * and NAPI_STATE_SCHED is the only possible flag set on backlog. 4082 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
4072 * we can use a plain write instead of clear_bit(), 4083 * we can use a plain write instead of clear_bit(),
4073 * and we dont need an smp_mb() memory barrier. 4084 * and we dont need an smp_mb() memory barrier.
4074 */ 4085 */
4075 list_del(&napi->poll_list); 4086 list_del(&napi->poll_list);
4076 napi->state = 0; 4087 napi->state = 0;
4077 4088
4078 quota = work + qlen; 4089 quota = work + qlen;
4079 } 4090 }
4080 rps_unlock(sd); 4091 rps_unlock(sd);
4081 } 4092 }
4082 local_irq_enable(); 4093 local_irq_enable();
4083 4094
4084 return work; 4095 return work;
4085 } 4096 }
4086 4097
4087 /** 4098 /**
4088 * __napi_schedule - schedule for receive 4099 * __napi_schedule - schedule for receive
4089 * @n: entry to schedule 4100 * @n: entry to schedule
4090 * 4101 *
4091 * The entry's receive function will be scheduled to run 4102 * The entry's receive function will be scheduled to run
4092 */ 4103 */
4093 void __napi_schedule(struct napi_struct *n) 4104 void __napi_schedule(struct napi_struct *n)
4094 { 4105 {
4095 unsigned long flags; 4106 unsigned long flags;
4096 4107
4097 local_irq_save(flags); 4108 local_irq_save(flags);
4098 ____napi_schedule(&__get_cpu_var(softnet_data), n); 4109 ____napi_schedule(&__get_cpu_var(softnet_data), n);
4099 local_irq_restore(flags); 4110 local_irq_restore(flags);
4100 } 4111 }
4101 EXPORT_SYMBOL(__napi_schedule); 4112 EXPORT_SYMBOL(__napi_schedule);
4102 4113
4103 void __napi_complete(struct napi_struct *n) 4114 void __napi_complete(struct napi_struct *n)
4104 { 4115 {
4105 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); 4116 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
4106 BUG_ON(n->gro_list); 4117 BUG_ON(n->gro_list);
4107 4118
4108 list_del(&n->poll_list); 4119 list_del(&n->poll_list);
4109 smp_mb__before_clear_bit(); 4120 smp_mb__before_clear_bit();
4110 clear_bit(NAPI_STATE_SCHED, &n->state); 4121 clear_bit(NAPI_STATE_SCHED, &n->state);
4111 } 4122 }
4112 EXPORT_SYMBOL(__napi_complete); 4123 EXPORT_SYMBOL(__napi_complete);
4113 4124
4114 void napi_complete(struct napi_struct *n) 4125 void napi_complete(struct napi_struct *n)
4115 { 4126 {
4116 unsigned long flags; 4127 unsigned long flags;
4117 4128
4118 /* 4129 /*
4119 * don't let napi dequeue from the cpu poll list 4130 * don't let napi dequeue from the cpu poll list
4120 * just in case its running on a different cpu 4131 * just in case its running on a different cpu
4121 */ 4132 */
4122 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) 4133 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
4123 return; 4134 return;
4124 4135
4125 napi_gro_flush(n, false); 4136 napi_gro_flush(n, false);
4126 local_irq_save(flags); 4137 local_irq_save(flags);
4127 __napi_complete(n); 4138 __napi_complete(n);
4128 local_irq_restore(flags); 4139 local_irq_restore(flags);
4129 } 4140 }
4130 EXPORT_SYMBOL(napi_complete); 4141 EXPORT_SYMBOL(napi_complete);
4131 4142
4132 void netif_napi_add(struct net_device *dev, struct napi_struct *napi, 4143 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
4133 int (*poll)(struct napi_struct *, int), int weight) 4144 int (*poll)(struct napi_struct *, int), int weight)
4134 { 4145 {
4135 INIT_LIST_HEAD(&napi->poll_list); 4146 INIT_LIST_HEAD(&napi->poll_list);
4136 napi->gro_count = 0; 4147 napi->gro_count = 0;
4137 napi->gro_list = NULL; 4148 napi->gro_list = NULL;
4138 napi->skb = NULL; 4149 napi->skb = NULL;
4139 napi->poll = poll; 4150 napi->poll = poll;
4140 napi->weight = weight; 4151 napi->weight = weight;
4141 list_add(&napi->dev_list, &dev->napi_list); 4152 list_add(&napi->dev_list, &dev->napi_list);
4142 napi->dev = dev; 4153 napi->dev = dev;
4143 #ifdef CONFIG_NETPOLL 4154 #ifdef CONFIG_NETPOLL
4144 spin_lock_init(&napi->poll_lock); 4155 spin_lock_init(&napi->poll_lock);
4145 napi->poll_owner = -1; 4156 napi->poll_owner = -1;
4146 #endif 4157 #endif
4147 set_bit(NAPI_STATE_SCHED, &napi->state); 4158 set_bit(NAPI_STATE_SCHED, &napi->state);
4148 } 4159 }
4149 EXPORT_SYMBOL(netif_napi_add); 4160 EXPORT_SYMBOL(netif_napi_add);
4150 4161
4151 void netif_napi_del(struct napi_struct *napi) 4162 void netif_napi_del(struct napi_struct *napi)
4152 { 4163 {
4153 struct sk_buff *skb, *next; 4164 struct sk_buff *skb, *next;
4154 4165
4155 list_del_init(&napi->dev_list); 4166 list_del_init(&napi->dev_list);
4156 napi_free_frags(napi); 4167 napi_free_frags(napi);
4157 4168
4158 for (skb = napi->gro_list; skb; skb = next) { 4169 for (skb = napi->gro_list; skb; skb = next) {
4159 next = skb->next; 4170 next = skb->next;
4160 skb->next = NULL; 4171 skb->next = NULL;
4161 kfree_skb(skb); 4172 kfree_skb(skb);
4162 } 4173 }
4163 4174
4164 napi->gro_list = NULL; 4175 napi->gro_list = NULL;
4165 napi->gro_count = 0; 4176 napi->gro_count = 0;
4166 } 4177 }
4167 EXPORT_SYMBOL(netif_napi_del); 4178 EXPORT_SYMBOL(netif_napi_del);
4168 4179
4169 static void net_rx_action(struct softirq_action *h) 4180 static void net_rx_action(struct softirq_action *h)
4170 { 4181 {
4171 struct softnet_data *sd = &__get_cpu_var(softnet_data); 4182 struct softnet_data *sd = &__get_cpu_var(softnet_data);
4172 unsigned long time_limit = jiffies + 2; 4183 unsigned long time_limit = jiffies + 2;
4173 int budget = netdev_budget; 4184 int budget = netdev_budget;
4174 void *have; 4185 void *have;
4175 4186
4176 local_irq_disable(); 4187 local_irq_disable();
4177 4188
4178 while (!list_empty(&sd->poll_list)) { 4189 while (!list_empty(&sd->poll_list)) {
4179 struct napi_struct *n; 4190 struct napi_struct *n;
4180 int work, weight; 4191 int work, weight;
4181 4192
4182 /* If softirq window is exhuasted then punt. 4193 /* If softirq window is exhuasted then punt.
4183 * Allow this to run for 2 jiffies since which will allow 4194 * Allow this to run for 2 jiffies since which will allow
4184 * an average latency of 1.5/HZ. 4195 * an average latency of 1.5/HZ.
4185 */ 4196 */
4186 if (unlikely(budget <= 0 || time_after(jiffies, time_limit))) 4197 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
4187 goto softnet_break; 4198 goto softnet_break;
4188 4199
4189 local_irq_enable(); 4200 local_irq_enable();
4190 4201
4191 /* Even though interrupts have been re-enabled, this 4202 /* Even though interrupts have been re-enabled, this
4192 * access is safe because interrupts can only add new 4203 * access is safe because interrupts can only add new
4193 * entries to the tail of this list, and only ->poll() 4204 * entries to the tail of this list, and only ->poll()
4194 * calls can remove this head entry from the list. 4205 * calls can remove this head entry from the list.
4195 */ 4206 */
4196 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list); 4207 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
4197 4208
4198 have = netpoll_poll_lock(n); 4209 have = netpoll_poll_lock(n);
4199 4210
4200 weight = n->weight; 4211 weight = n->weight;
4201 4212
4202 /* This NAPI_STATE_SCHED test is for avoiding a race 4213 /* This NAPI_STATE_SCHED test is for avoiding a race
4203 * with netpoll's poll_napi(). Only the entity which 4214 * with netpoll's poll_napi(). Only the entity which
4204 * obtains the lock and sees NAPI_STATE_SCHED set will 4215 * obtains the lock and sees NAPI_STATE_SCHED set will
4205 * actually make the ->poll() call. Therefore we avoid 4216 * actually make the ->poll() call. Therefore we avoid
4206 * accidentally calling ->poll() when NAPI is not scheduled. 4217 * accidentally calling ->poll() when NAPI is not scheduled.
4207 */ 4218 */
4208 work = 0; 4219 work = 0;
4209 if (test_bit(NAPI_STATE_SCHED, &n->state)) { 4220 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
4210 work = n->poll(n, weight); 4221 work = n->poll(n, weight);
4211 trace_napi_poll(n); 4222 trace_napi_poll(n);
4212 } 4223 }
4213 4224
4214 WARN_ON_ONCE(work > weight); 4225 WARN_ON_ONCE(work > weight);
4215 4226
4216 budget -= work; 4227 budget -= work;
4217 4228
4218 local_irq_disable(); 4229 local_irq_disable();
4219 4230
4220 /* Drivers must not modify the NAPI state if they 4231 /* Drivers must not modify the NAPI state if they
4221 * consume the entire weight. In such cases this code 4232 * consume the entire weight. In such cases this code
4222 * still "owns" the NAPI instance and therefore can 4233 * still "owns" the NAPI instance and therefore can
4223 * move the instance around on the list at-will. 4234 * move the instance around on the list at-will.
4224 */ 4235 */
4225 if (unlikely(work == weight)) { 4236 if (unlikely(work == weight)) {
4226 if (unlikely(napi_disable_pending(n))) { 4237 if (unlikely(napi_disable_pending(n))) {
4227 local_irq_enable(); 4238 local_irq_enable();
4228 napi_complete(n); 4239 napi_complete(n);
4229 local_irq_disable(); 4240 local_irq_disable();
4230 } else { 4241 } else {
4231 if (n->gro_list) { 4242 if (n->gro_list) {
4232 /* flush too old packets 4243 /* flush too old packets
4233 * If HZ < 1000, flush all packets. 4244 * If HZ < 1000, flush all packets.
4234 */ 4245 */
4235 local_irq_enable(); 4246 local_irq_enable();
4236 napi_gro_flush(n, HZ >= 1000); 4247 napi_gro_flush(n, HZ >= 1000);
4237 local_irq_disable(); 4248 local_irq_disable();
4238 } 4249 }
4239 list_move_tail(&n->poll_list, &sd->poll_list); 4250 list_move_tail(&n->poll_list, &sd->poll_list);
4240 } 4251 }
4241 } 4252 }
4242 4253
4243 netpoll_poll_unlock(have); 4254 netpoll_poll_unlock(have);
4244 } 4255 }
4245 out: 4256 out:
4246 net_rps_action_and_irq_enable(sd); 4257 net_rps_action_and_irq_enable(sd);
4247 4258
4248 #ifdef CONFIG_NET_DMA 4259 #ifdef CONFIG_NET_DMA
4249 /* 4260 /*
4250 * There may not be any more sk_buffs coming right now, so push 4261 * There may not be any more sk_buffs coming right now, so push
4251 * any pending DMA copies to hardware 4262 * any pending DMA copies to hardware
4252 */ 4263 */
4253 dma_issue_pending_all(); 4264 dma_issue_pending_all();
4254 #endif 4265 #endif
4255 4266
4256 return; 4267 return;
4257 4268
4258 softnet_break: 4269 softnet_break:
4259 sd->time_squeeze++; 4270 sd->time_squeeze++;
4260 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 4271 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4261 goto out; 4272 goto out;
4262 } 4273 }
4263 4274
4264 static gifconf_func_t *gifconf_list[NPROTO]; 4275 static gifconf_func_t *gifconf_list[NPROTO];
4265 4276
4266 /** 4277 /**
4267 * register_gifconf - register a SIOCGIF handler 4278 * register_gifconf - register a SIOCGIF handler
4268 * @family: Address family 4279 * @family: Address family
4269 * @gifconf: Function handler 4280 * @gifconf: Function handler
4270 * 4281 *
4271 * Register protocol dependent address dumping routines. The handler 4282 * Register protocol dependent address dumping routines. The handler
4272 * that is passed must not be freed or reused until it has been replaced 4283 * that is passed must not be freed or reused until it has been replaced
4273 * by another handler. 4284 * by another handler.
4274 */ 4285 */
4275 int register_gifconf(unsigned int family, gifconf_func_t *gifconf) 4286 int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
4276 { 4287 {
4277 if (family >= NPROTO) 4288 if (family >= NPROTO)
4278 return -EINVAL; 4289 return -EINVAL;
4279 gifconf_list[family] = gifconf; 4290 gifconf_list[family] = gifconf;
4280 return 0; 4291 return 0;
4281 } 4292 }
4282 EXPORT_SYMBOL(register_gifconf); 4293 EXPORT_SYMBOL(register_gifconf);
4283 4294
4284 4295
4285 /* 4296 /*
4286 * Map an interface index to its name (SIOCGIFNAME) 4297 * Map an interface index to its name (SIOCGIFNAME)
4287 */ 4298 */
4288 4299
4289 /* 4300 /*
4290 * We need this ioctl for efficient implementation of the 4301 * We need this ioctl for efficient implementation of the
4291 * if_indextoname() function required by the IPv6 API. Without 4302 * if_indextoname() function required by the IPv6 API. Without
4292 * it, we would have to search all the interfaces to find a 4303 * it, we would have to search all the interfaces to find a
4293 * match. --pb 4304 * match. --pb
4294 */ 4305 */
4295 4306
4296 static int dev_ifname(struct net *net, struct ifreq __user *arg) 4307 static int dev_ifname(struct net *net, struct ifreq __user *arg)
4297 { 4308 {
4298 struct net_device *dev; 4309 struct net_device *dev;
4299 struct ifreq ifr; 4310 struct ifreq ifr;
4300 unsigned seq; 4311 unsigned seq;
4301 4312
4302 /* 4313 /*
4303 * Fetch the caller's info block. 4314 * Fetch the caller's info block.
4304 */ 4315 */
4305 4316
4306 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 4317 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4307 return -EFAULT; 4318 return -EFAULT;
4308 4319
4309 retry: 4320 retry:
4310 seq = read_seqcount_begin(&devnet_rename_seq); 4321 seq = read_seqcount_begin(&devnet_rename_seq);
4311 rcu_read_lock(); 4322 rcu_read_lock();
4312 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); 4323 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
4313 if (!dev) { 4324 if (!dev) {
4314 rcu_read_unlock(); 4325 rcu_read_unlock();
4315 return -ENODEV; 4326 return -ENODEV;
4316 } 4327 }
4317 4328
4318 strcpy(ifr.ifr_name, dev->name); 4329 strcpy(ifr.ifr_name, dev->name);
4319 rcu_read_unlock(); 4330 rcu_read_unlock();
4320 if (read_seqcount_retry(&devnet_rename_seq, seq)) 4331 if (read_seqcount_retry(&devnet_rename_seq, seq))
4321 goto retry; 4332 goto retry;
4322 4333
4323 if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) 4334 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
4324 return -EFAULT; 4335 return -EFAULT;
4325 return 0; 4336 return 0;
4326 } 4337 }
4327 4338
4328 /* 4339 /*
4329 * Perform a SIOCGIFCONF call. This structure will change 4340 * Perform a SIOCGIFCONF call. This structure will change
4330 * size eventually, and there is nothing I can do about it. 4341 * size eventually, and there is nothing I can do about it.
4331 * Thus we will need a 'compatibility mode'. 4342 * Thus we will need a 'compatibility mode'.
4332 */ 4343 */
4333 4344
4334 static int dev_ifconf(struct net *net, char __user *arg) 4345 static int dev_ifconf(struct net *net, char __user *arg)
4335 { 4346 {
4336 struct ifconf ifc; 4347 struct ifconf ifc;
4337 struct net_device *dev; 4348 struct net_device *dev;
4338 char __user *pos; 4349 char __user *pos;
4339 int len; 4350 int len;
4340 int total; 4351 int total;
4341 int i; 4352 int i;
4342 4353
4343 /* 4354 /*
4344 * Fetch the caller's info block. 4355 * Fetch the caller's info block.
4345 */ 4356 */
4346 4357
4347 if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) 4358 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
4348 return -EFAULT; 4359 return -EFAULT;
4349 4360
4350 pos = ifc.ifc_buf; 4361 pos = ifc.ifc_buf;
4351 len = ifc.ifc_len; 4362 len = ifc.ifc_len;
4352 4363
4353 /* 4364 /*
4354 * Loop over the interfaces, and write an info block for each. 4365 * Loop over the interfaces, and write an info block for each.
4355 */ 4366 */
4356 4367
4357 total = 0; 4368 total = 0;
4358 for_each_netdev(net, dev) { 4369 for_each_netdev(net, dev) {
4359 for (i = 0; i < NPROTO; i++) { 4370 for (i = 0; i < NPROTO; i++) {
4360 if (gifconf_list[i]) { 4371 if (gifconf_list[i]) {
4361 int done; 4372 int done;
4362 if (!pos) 4373 if (!pos)
4363 done = gifconf_list[i](dev, NULL, 0); 4374 done = gifconf_list[i](dev, NULL, 0);
4364 else 4375 else
4365 done = gifconf_list[i](dev, pos + total, 4376 done = gifconf_list[i](dev, pos + total,
4366 len - total); 4377 len - total);
4367 if (done < 0) 4378 if (done < 0)
4368 return -EFAULT; 4379 return -EFAULT;
4369 total += done; 4380 total += done;
4370 } 4381 }
4371 } 4382 }
4372 } 4383 }
4373 4384
4374 /* 4385 /*
4375 * All done. Write the updated control block back to the caller. 4386 * All done. Write the updated control block back to the caller.
4376 */ 4387 */
4377 ifc.ifc_len = total; 4388 ifc.ifc_len = total;
4378 4389
4379 /* 4390 /*
4380 * Both BSD and Solaris return 0 here, so we do too. 4391 * Both BSD and Solaris return 0 here, so we do too.
4381 */ 4392 */
4382 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; 4393 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
4383 } 4394 }
4384 4395
4385 #ifdef CONFIG_PROC_FS 4396 #ifdef CONFIG_PROC_FS
4386 4397
4387 #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) 4398 #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
4388 4399
4389 #define get_bucket(x) ((x) >> BUCKET_SPACE) 4400 #define get_bucket(x) ((x) >> BUCKET_SPACE)
4390 #define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) 4401 #define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
4391 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 4402 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
4392 4403
4393 static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) 4404 static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
4394 { 4405 {
4395 struct net *net = seq_file_net(seq); 4406 struct net *net = seq_file_net(seq);
4396 struct net_device *dev; 4407 struct net_device *dev;
4397 struct hlist_node *p; 4408 struct hlist_node *p;
4398 struct hlist_head *h; 4409 struct hlist_head *h;
4399 unsigned int count = 0, offset = get_offset(*pos); 4410 unsigned int count = 0, offset = get_offset(*pos);
4400 4411
4401 h = &net->dev_name_head[get_bucket(*pos)]; 4412 h = &net->dev_name_head[get_bucket(*pos)];
4402 hlist_for_each_entry_rcu(dev, p, h, name_hlist) { 4413 hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
4403 if (++count == offset) 4414 if (++count == offset)
4404 return dev; 4415 return dev;
4405 } 4416 }
4406 4417
4407 return NULL; 4418 return NULL;
4408 } 4419 }
4409 4420
4410 static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) 4421 static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
4411 { 4422 {
4412 struct net_device *dev; 4423 struct net_device *dev;
4413 unsigned int bucket; 4424 unsigned int bucket;
4414 4425
4415 do { 4426 do {
4416 dev = dev_from_same_bucket(seq, pos); 4427 dev = dev_from_same_bucket(seq, pos);
4417 if (dev) 4428 if (dev)
4418 return dev; 4429 return dev;
4419 4430
4420 bucket = get_bucket(*pos) + 1; 4431 bucket = get_bucket(*pos) + 1;
4421 *pos = set_bucket_offset(bucket, 1); 4432 *pos = set_bucket_offset(bucket, 1);
4422 } while (bucket < NETDEV_HASHENTRIES); 4433 } while (bucket < NETDEV_HASHENTRIES);
4423 4434
4424 return NULL; 4435 return NULL;
4425 } 4436 }
4426 4437
4427 /* 4438 /*
4428 * This is invoked by the /proc filesystem handler to display a device 4439 * This is invoked by the /proc filesystem handler to display a device
4429 * in detail. 4440 * in detail.
4430 */ 4441 */
4431 void *dev_seq_start(struct seq_file *seq, loff_t *pos) 4442 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
4432 __acquires(RCU) 4443 __acquires(RCU)
4433 { 4444 {
4434 rcu_read_lock(); 4445 rcu_read_lock();
4435 if (!*pos) 4446 if (!*pos)
4436 return SEQ_START_TOKEN; 4447 return SEQ_START_TOKEN;
4437 4448
4438 if (get_bucket(*pos) >= NETDEV_HASHENTRIES) 4449 if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
4439 return NULL; 4450 return NULL;
4440 4451
4441 return dev_from_bucket(seq, pos); 4452 return dev_from_bucket(seq, pos);
4442 } 4453 }
4443 4454
4444 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 4455 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4445 { 4456 {
4446 ++*pos; 4457 ++*pos;
4447 return dev_from_bucket(seq, pos); 4458 return dev_from_bucket(seq, pos);
4448 } 4459 }
4449 4460
4450 void dev_seq_stop(struct seq_file *seq, void *v) 4461 void dev_seq_stop(struct seq_file *seq, void *v)
4451 __releases(RCU) 4462 __releases(RCU)
4452 { 4463 {
4453 rcu_read_unlock(); 4464 rcu_read_unlock();
4454 } 4465 }
4455 4466
4456 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 4467 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
4457 { 4468 {
4458 struct rtnl_link_stats64 temp; 4469 struct rtnl_link_stats64 temp;
4459 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); 4470 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
4460 4471
4461 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " 4472 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
4462 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", 4473 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
4463 dev->name, stats->rx_bytes, stats->rx_packets, 4474 dev->name, stats->rx_bytes, stats->rx_packets,
4464 stats->rx_errors, 4475 stats->rx_errors,
4465 stats->rx_dropped + stats->rx_missed_errors, 4476 stats->rx_dropped + stats->rx_missed_errors,
4466 stats->rx_fifo_errors, 4477 stats->rx_fifo_errors,
4467 stats->rx_length_errors + stats->rx_over_errors + 4478 stats->rx_length_errors + stats->rx_over_errors +
4468 stats->rx_crc_errors + stats->rx_frame_errors, 4479 stats->rx_crc_errors + stats->rx_frame_errors,
4469 stats->rx_compressed, stats->multicast, 4480 stats->rx_compressed, stats->multicast,
4470 stats->tx_bytes, stats->tx_packets, 4481 stats->tx_bytes, stats->tx_packets,
4471 stats->tx_errors, stats->tx_dropped, 4482 stats->tx_errors, stats->tx_dropped,
4472 stats->tx_fifo_errors, stats->collisions, 4483 stats->tx_fifo_errors, stats->collisions,
4473 stats->tx_carrier_errors + 4484 stats->tx_carrier_errors +
4474 stats->tx_aborted_errors + 4485 stats->tx_aborted_errors +
4475 stats->tx_window_errors + 4486 stats->tx_window_errors +
4476 stats->tx_heartbeat_errors, 4487 stats->tx_heartbeat_errors,
4477 stats->tx_compressed); 4488 stats->tx_compressed);
4478 } 4489 }
4479 4490
4480 /* 4491 /*
4481 * Called from the PROCfs module. This now uses the new arbitrary sized 4492 * Called from the PROCfs module. This now uses the new arbitrary sized
4482 * /proc/net interface to create /proc/net/dev 4493 * /proc/net interface to create /proc/net/dev
4483 */ 4494 */
4484 static int dev_seq_show(struct seq_file *seq, void *v) 4495 static int dev_seq_show(struct seq_file *seq, void *v)
4485 { 4496 {
4486 if (v == SEQ_START_TOKEN) 4497 if (v == SEQ_START_TOKEN)
4487 seq_puts(seq, "Inter-| Receive " 4498 seq_puts(seq, "Inter-| Receive "
4488 " | Transmit\n" 4499 " | Transmit\n"
4489 " face |bytes packets errs drop fifo frame " 4500 " face |bytes packets errs drop fifo frame "
4490 "compressed multicast|bytes packets errs " 4501 "compressed multicast|bytes packets errs "
4491 "drop fifo colls carrier compressed\n"); 4502 "drop fifo colls carrier compressed\n");
4492 else 4503 else
4493 dev_seq_printf_stats(seq, v); 4504 dev_seq_printf_stats(seq, v);
4494 return 0; 4505 return 0;
4495 } 4506 }
4496 4507
4497 static struct softnet_data *softnet_get_online(loff_t *pos) 4508 static struct softnet_data *softnet_get_online(loff_t *pos)
4498 { 4509 {
4499 struct softnet_data *sd = NULL; 4510 struct softnet_data *sd = NULL;
4500 4511
4501 while (*pos < nr_cpu_ids) 4512 while (*pos < nr_cpu_ids)
4502 if (cpu_online(*pos)) { 4513 if (cpu_online(*pos)) {
4503 sd = &per_cpu(softnet_data, *pos); 4514 sd = &per_cpu(softnet_data, *pos);
4504 break; 4515 break;
4505 } else 4516 } else
4506 ++*pos; 4517 ++*pos;
4507 return sd; 4518 return sd;
4508 } 4519 }
4509 4520
4510 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) 4521 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
4511 { 4522 {
4512 return softnet_get_online(pos); 4523 return softnet_get_online(pos);
4513 } 4524 }
4514 4525
4515 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) 4526 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4516 { 4527 {
4517 ++*pos; 4528 ++*pos;
4518 return softnet_get_online(pos); 4529 return softnet_get_online(pos);
4519 } 4530 }
4520 4531
4521 static void softnet_seq_stop(struct seq_file *seq, void *v) 4532 static void softnet_seq_stop(struct seq_file *seq, void *v)
4522 { 4533 {
4523 } 4534 }
4524 4535
4525 static int softnet_seq_show(struct seq_file *seq, void *v) 4536 static int softnet_seq_show(struct seq_file *seq, void *v)
4526 { 4537 {
4527 struct softnet_data *sd = v; 4538 struct softnet_data *sd = v;
4528 4539
4529 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 4540 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
4530 sd->processed, sd->dropped, sd->time_squeeze, 0, 4541 sd->processed, sd->dropped, sd->time_squeeze, 0,
4531 0, 0, 0, 0, /* was fastroute */ 4542 0, 0, 0, 0, /* was fastroute */
4532 sd->cpu_collision, sd->received_rps); 4543 sd->cpu_collision, sd->received_rps);
4533 return 0; 4544 return 0;
4534 } 4545 }
4535 4546
4536 static const struct seq_operations dev_seq_ops = { 4547 static const struct seq_operations dev_seq_ops = {
4537 .start = dev_seq_start, 4548 .start = dev_seq_start,
4538 .next = dev_seq_next, 4549 .next = dev_seq_next,
4539 .stop = dev_seq_stop, 4550 .stop = dev_seq_stop,
4540 .show = dev_seq_show, 4551 .show = dev_seq_show,
4541 }; 4552 };
4542 4553
4543 static int dev_seq_open(struct inode *inode, struct file *file) 4554 static int dev_seq_open(struct inode *inode, struct file *file)
4544 { 4555 {
4545 return seq_open_net(inode, file, &dev_seq_ops, 4556 return seq_open_net(inode, file, &dev_seq_ops,
4546 sizeof(struct seq_net_private)); 4557 sizeof(struct seq_net_private));
4547 } 4558 }
4548 4559
4549 static const struct file_operations dev_seq_fops = { 4560 static const struct file_operations dev_seq_fops = {
4550 .owner = THIS_MODULE, 4561 .owner = THIS_MODULE,
4551 .open = dev_seq_open, 4562 .open = dev_seq_open,
4552 .read = seq_read, 4563 .read = seq_read,
4553 .llseek = seq_lseek, 4564 .llseek = seq_lseek,
4554 .release = seq_release_net, 4565 .release = seq_release_net,
4555 }; 4566 };
4556 4567
4557 static const struct seq_operations softnet_seq_ops = { 4568 static const struct seq_operations softnet_seq_ops = {
4558 .start = softnet_seq_start, 4569 .start = softnet_seq_start,
4559 .next = softnet_seq_next, 4570 .next = softnet_seq_next,
4560 .stop = softnet_seq_stop, 4571 .stop = softnet_seq_stop,
4561 .show = softnet_seq_show, 4572 .show = softnet_seq_show,
4562 }; 4573 };
4563 4574
4564 static int softnet_seq_open(struct inode *inode, struct file *file) 4575 static int softnet_seq_open(struct inode *inode, struct file *file)
4565 { 4576 {
4566 return seq_open(file, &softnet_seq_ops); 4577 return seq_open(file, &softnet_seq_ops);
4567 } 4578 }
4568 4579
4569 static const struct file_operations softnet_seq_fops = { 4580 static const struct file_operations softnet_seq_fops = {
4570 .owner = THIS_MODULE, 4581 .owner = THIS_MODULE,
4571 .open = softnet_seq_open, 4582 .open = softnet_seq_open,
4572 .read = seq_read, 4583 .read = seq_read,
4573 .llseek = seq_lseek, 4584 .llseek = seq_lseek,
4574 .release = seq_release, 4585 .release = seq_release,
4575 }; 4586 };
4576 4587
4577 static void *ptype_get_idx(loff_t pos) 4588 static void *ptype_get_idx(loff_t pos)
4578 { 4589 {
4579 struct packet_type *pt = NULL; 4590 struct packet_type *pt = NULL;
4580 loff_t i = 0; 4591 loff_t i = 0;
4581 int t; 4592 int t;
4582 4593
4583 list_for_each_entry_rcu(pt, &ptype_all, list) { 4594 list_for_each_entry_rcu(pt, &ptype_all, list) {
4584 if (i == pos) 4595 if (i == pos)
4585 return pt; 4596 return pt;
4586 ++i; 4597 ++i;
4587 } 4598 }
4588 4599
4589 for (t = 0; t < PTYPE_HASH_SIZE; t++) { 4600 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
4590 list_for_each_entry_rcu(pt, &ptype_base[t], list) { 4601 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
4591 if (i == pos) 4602 if (i == pos)
4592 return pt; 4603 return pt;
4593 ++i; 4604 ++i;
4594 } 4605 }
4595 } 4606 }
4596 return NULL; 4607 return NULL;
4597 } 4608 }
4598 4609
4599 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) 4610 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
4600 __acquires(RCU) 4611 __acquires(RCU)
4601 { 4612 {
4602 rcu_read_lock(); 4613 rcu_read_lock();
4603 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; 4614 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
4604 } 4615 }
4605 4616
4606 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) 4617 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4607 { 4618 {
4608 struct packet_type *pt; 4619 struct packet_type *pt;
4609 struct list_head *nxt; 4620 struct list_head *nxt;
4610 int hash; 4621 int hash;
4611 4622
4612 ++*pos; 4623 ++*pos;
4613 if (v == SEQ_START_TOKEN) 4624 if (v == SEQ_START_TOKEN)
4614 return ptype_get_idx(0); 4625 return ptype_get_idx(0);
4615 4626
4616 pt = v; 4627 pt = v;
4617 nxt = pt->list.next; 4628 nxt = pt->list.next;
4618 if (pt->type == htons(ETH_P_ALL)) { 4629 if (pt->type == htons(ETH_P_ALL)) {
4619 if (nxt != &ptype_all) 4630 if (nxt != &ptype_all)
4620 goto found; 4631 goto found;
4621 hash = 0; 4632 hash = 0;
4622 nxt = ptype_base[0].next; 4633 nxt = ptype_base[0].next;
4623 } else 4634 } else
4624 hash = ntohs(pt->type) & PTYPE_HASH_MASK; 4635 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
4625 4636
4626 while (nxt == &ptype_base[hash]) { 4637 while (nxt == &ptype_base[hash]) {
4627 if (++hash >= PTYPE_HASH_SIZE) 4638 if (++hash >= PTYPE_HASH_SIZE)
4628 return NULL; 4639 return NULL;
4629 nxt = ptype_base[hash].next; 4640 nxt = ptype_base[hash].next;
4630 } 4641 }
4631 found: 4642 found:
4632 return list_entry(nxt, struct packet_type, list); 4643 return list_entry(nxt, struct packet_type, list);
4633 } 4644 }
4634 4645
4635 static void ptype_seq_stop(struct seq_file *seq, void *v) 4646 static void ptype_seq_stop(struct seq_file *seq, void *v)
4636 __releases(RCU) 4647 __releases(RCU)
4637 { 4648 {
4638 rcu_read_unlock(); 4649 rcu_read_unlock();
4639 } 4650 }
4640 4651
4641 static int ptype_seq_show(struct seq_file *seq, void *v) 4652 static int ptype_seq_show(struct seq_file *seq, void *v)
4642 { 4653 {
4643 struct packet_type *pt = v; 4654 struct packet_type *pt = v;
4644 4655
4645 if (v == SEQ_START_TOKEN) 4656 if (v == SEQ_START_TOKEN)
4646 seq_puts(seq, "Type Device Function\n"); 4657 seq_puts(seq, "Type Device Function\n");
4647 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { 4658 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
4648 if (pt->type == htons(ETH_P_ALL)) 4659 if (pt->type == htons(ETH_P_ALL))
4649 seq_puts(seq, "ALL "); 4660 seq_puts(seq, "ALL ");
4650 else 4661 else
4651 seq_printf(seq, "%04x", ntohs(pt->type)); 4662 seq_printf(seq, "%04x", ntohs(pt->type));
4652 4663
4653 seq_printf(seq, " %-8s %pF\n", 4664 seq_printf(seq, " %-8s %pF\n",
4654 pt->dev ? pt->dev->name : "", pt->func); 4665 pt->dev ? pt->dev->name : "", pt->func);
4655 } 4666 }
4656 4667
4657 return 0; 4668 return 0;
4658 } 4669 }
4659 4670
4660 static const struct seq_operations ptype_seq_ops = { 4671 static const struct seq_operations ptype_seq_ops = {
4661 .start = ptype_seq_start, 4672 .start = ptype_seq_start,
4662 .next = ptype_seq_next, 4673 .next = ptype_seq_next,
4663 .stop = ptype_seq_stop, 4674 .stop = ptype_seq_stop,
4664 .show = ptype_seq_show, 4675 .show = ptype_seq_show,
4665 }; 4676 };
4666 4677
4667 static int ptype_seq_open(struct inode *inode, struct file *file) 4678 static int ptype_seq_open(struct inode *inode, struct file *file)
4668 { 4679 {
4669 return seq_open_net(inode, file, &ptype_seq_ops, 4680 return seq_open_net(inode, file, &ptype_seq_ops,
4670 sizeof(struct seq_net_private)); 4681 sizeof(struct seq_net_private));
4671 } 4682 }
4672 4683
4673 static const struct file_operations ptype_seq_fops = { 4684 static const struct file_operations ptype_seq_fops = {
4674 .owner = THIS_MODULE, 4685 .owner = THIS_MODULE,
4675 .open = ptype_seq_open, 4686 .open = ptype_seq_open,
4676 .read = seq_read, 4687 .read = seq_read,
4677 .llseek = seq_lseek, 4688 .llseek = seq_lseek,
4678 .release = seq_release_net, 4689 .release = seq_release_net,
4679 }; 4690 };
4680 4691
4681 4692
4682 static int __net_init dev_proc_net_init(struct net *net) 4693 static int __net_init dev_proc_net_init(struct net *net)
4683 { 4694 {
4684 int rc = -ENOMEM; 4695 int rc = -ENOMEM;
4685 4696
4686 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) 4697 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
4687 goto out; 4698 goto out;
4688 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) 4699 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
4689 goto out_dev; 4700 goto out_dev;
4690 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) 4701 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
4691 goto out_softnet; 4702 goto out_softnet;
4692 4703
4693 if (wext_proc_init(net)) 4704 if (wext_proc_init(net))
4694 goto out_ptype; 4705 goto out_ptype;
4695 rc = 0; 4706 rc = 0;
4696 out: 4707 out:
4697 return rc; 4708 return rc;
4698 out_ptype: 4709 out_ptype:
4699 proc_net_remove(net, "ptype"); 4710 proc_net_remove(net, "ptype");
4700 out_softnet: 4711 out_softnet:
4701 proc_net_remove(net, "softnet_stat"); 4712 proc_net_remove(net, "softnet_stat");
4702 out_dev: 4713 out_dev:
4703 proc_net_remove(net, "dev"); 4714 proc_net_remove(net, "dev");
4704 goto out; 4715 goto out;
4705 } 4716 }
4706 4717
4707 static void __net_exit dev_proc_net_exit(struct net *net) 4718 static void __net_exit dev_proc_net_exit(struct net *net)
4708 { 4719 {
4709 wext_proc_exit(net); 4720 wext_proc_exit(net);
4710 4721
4711 proc_net_remove(net, "ptype"); 4722 proc_net_remove(net, "ptype");
4712 proc_net_remove(net, "softnet_stat"); 4723 proc_net_remove(net, "softnet_stat");
4713 proc_net_remove(net, "dev"); 4724 proc_net_remove(net, "dev");
4714 } 4725 }
4715 4726
4716 static struct pernet_operations __net_initdata dev_proc_ops = { 4727 static struct pernet_operations __net_initdata dev_proc_ops = {
4717 .init = dev_proc_net_init, 4728 .init = dev_proc_net_init,
4718 .exit = dev_proc_net_exit, 4729 .exit = dev_proc_net_exit,
4719 }; 4730 };
4720 4731
4721 static int __init dev_proc_init(void) 4732 static int __init dev_proc_init(void)
4722 { 4733 {
4723 return register_pernet_subsys(&dev_proc_ops); 4734 return register_pernet_subsys(&dev_proc_ops);
4724 } 4735 }
4725 #else 4736 #else
4726 #define dev_proc_init() 0 4737 #define dev_proc_init() 0
4727 #endif /* CONFIG_PROC_FS */ 4738 #endif /* CONFIG_PROC_FS */
4728 4739
4729 4740
4730 struct netdev_upper { 4741 struct netdev_upper {
4731 struct net_device *dev; 4742 struct net_device *dev;
4732 bool master; 4743 bool master;
4733 struct list_head list; 4744 struct list_head list;
4734 struct rcu_head rcu; 4745 struct rcu_head rcu;
4735 struct list_head search_list; 4746 struct list_head search_list;
4736 }; 4747 };
4737 4748
4738 static void __append_search_uppers(struct list_head *search_list, 4749 static void __append_search_uppers(struct list_head *search_list,
4739 struct net_device *dev) 4750 struct net_device *dev)
4740 { 4751 {
4741 struct netdev_upper *upper; 4752 struct netdev_upper *upper;
4742 4753
4743 list_for_each_entry(upper, &dev->upper_dev_list, list) { 4754 list_for_each_entry(upper, &dev->upper_dev_list, list) {
4744 /* check if this upper is not already in search list */ 4755 /* check if this upper is not already in search list */
4745 if (list_empty(&upper->search_list)) 4756 if (list_empty(&upper->search_list))
4746 list_add_tail(&upper->search_list, search_list); 4757 list_add_tail(&upper->search_list, search_list);
4747 } 4758 }
4748 } 4759 }
4749 4760
4750 static bool __netdev_search_upper_dev(struct net_device *dev, 4761 static bool __netdev_search_upper_dev(struct net_device *dev,
4751 struct net_device *upper_dev) 4762 struct net_device *upper_dev)
4752 { 4763 {
4753 LIST_HEAD(search_list); 4764 LIST_HEAD(search_list);
4754 struct netdev_upper *upper; 4765 struct netdev_upper *upper;
4755 struct netdev_upper *tmp; 4766 struct netdev_upper *tmp;
4756 bool ret = false; 4767 bool ret = false;
4757 4768
4758 __append_search_uppers(&search_list, dev); 4769 __append_search_uppers(&search_list, dev);
4759 list_for_each_entry(upper, &search_list, search_list) { 4770 list_for_each_entry(upper, &search_list, search_list) {
4760 if (upper->dev == upper_dev) { 4771 if (upper->dev == upper_dev) {
4761 ret = true; 4772 ret = true;
4762 break; 4773 break;
4763 } 4774 }
4764 __append_search_uppers(&search_list, upper->dev); 4775 __append_search_uppers(&search_list, upper->dev);
4765 } 4776 }
4766 list_for_each_entry_safe(upper, tmp, &search_list, search_list) 4777 list_for_each_entry_safe(upper, tmp, &search_list, search_list)
4767 INIT_LIST_HEAD(&upper->search_list); 4778 INIT_LIST_HEAD(&upper->search_list);
4768 return ret; 4779 return ret;
4769 } 4780 }
4770 4781
4771 static struct netdev_upper *__netdev_find_upper(struct net_device *dev, 4782 static struct netdev_upper *__netdev_find_upper(struct net_device *dev,
4772 struct net_device *upper_dev) 4783 struct net_device *upper_dev)
4773 { 4784 {
4774 struct netdev_upper *upper; 4785 struct netdev_upper *upper;
4775 4786
4776 list_for_each_entry(upper, &dev->upper_dev_list, list) { 4787 list_for_each_entry(upper, &dev->upper_dev_list, list) {
4777 if (upper->dev == upper_dev) 4788 if (upper->dev == upper_dev)
4778 return upper; 4789 return upper;
4779 } 4790 }
4780 return NULL; 4791 return NULL;
4781 } 4792 }
4782 4793
4783 /** 4794 /**
4784 * netdev_has_upper_dev - Check if device is linked to an upper device 4795 * netdev_has_upper_dev - Check if device is linked to an upper device
4785 * @dev: device 4796 * @dev: device
4786 * @upper_dev: upper device to check 4797 * @upper_dev: upper device to check
4787 * 4798 *
4788 * Find out if a device is linked to specified upper device and return true 4799 * Find out if a device is linked to specified upper device and return true
4789 * in case it is. Note that this checks only immediate upper device, 4800 * in case it is. Note that this checks only immediate upper device,
4790 * not through a complete stack of devices. The caller must hold the RTNL lock. 4801 * not through a complete stack of devices. The caller must hold the RTNL lock.
4791 */ 4802 */
4792 bool netdev_has_upper_dev(struct net_device *dev, 4803 bool netdev_has_upper_dev(struct net_device *dev,
4793 struct net_device *upper_dev) 4804 struct net_device *upper_dev)
4794 { 4805 {
4795 ASSERT_RTNL(); 4806 ASSERT_RTNL();
4796 4807
4797 return __netdev_find_upper(dev, upper_dev); 4808 return __netdev_find_upper(dev, upper_dev);
4798 } 4809 }
4799 EXPORT_SYMBOL(netdev_has_upper_dev); 4810 EXPORT_SYMBOL(netdev_has_upper_dev);
4800 4811
4801 /** 4812 /**
4802 * netdev_has_any_upper_dev - Check if device is linked to some device 4813 * netdev_has_any_upper_dev - Check if device is linked to some device
4803 * @dev: device 4814 * @dev: device
4804 * 4815 *
4805 * Find out if a device is linked to an upper device and return true in case 4816 * Find out if a device is linked to an upper device and return true in case
4806 * it is. The caller must hold the RTNL lock. 4817 * it is. The caller must hold the RTNL lock.
4807 */ 4818 */
4808 bool netdev_has_any_upper_dev(struct net_device *dev) 4819 bool netdev_has_any_upper_dev(struct net_device *dev)
4809 { 4820 {
4810 ASSERT_RTNL(); 4821 ASSERT_RTNL();
4811 4822
4812 return !list_empty(&dev->upper_dev_list); 4823 return !list_empty(&dev->upper_dev_list);
4813 } 4824 }
4814 EXPORT_SYMBOL(netdev_has_any_upper_dev); 4825 EXPORT_SYMBOL(netdev_has_any_upper_dev);
4815 4826
4816 /** 4827 /**
4817 * netdev_master_upper_dev_get - Get master upper device 4828 * netdev_master_upper_dev_get - Get master upper device
4818 * @dev: device 4829 * @dev: device
4819 * 4830 *
4820 * Find a master upper device and return pointer to it or NULL in case 4831 * Find a master upper device and return pointer to it or NULL in case
4821 * it's not there. The caller must hold the RTNL lock. 4832 * it's not there. The caller must hold the RTNL lock.
4822 */ 4833 */
4823 struct net_device *netdev_master_upper_dev_get(struct net_device *dev) 4834 struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
4824 { 4835 {
4825 struct netdev_upper *upper; 4836 struct netdev_upper *upper;
4826 4837
4827 ASSERT_RTNL(); 4838 ASSERT_RTNL();
4828 4839
4829 if (list_empty(&dev->upper_dev_list)) 4840 if (list_empty(&dev->upper_dev_list))
4830 return NULL; 4841 return NULL;
4831 4842
4832 upper = list_first_entry(&dev->upper_dev_list, 4843 upper = list_first_entry(&dev->upper_dev_list,
4833 struct netdev_upper, list); 4844 struct netdev_upper, list);
4834 if (likely(upper->master)) 4845 if (likely(upper->master))
4835 return upper->dev; 4846 return upper->dev;
4836 return NULL; 4847 return NULL;
4837 } 4848 }
4838 EXPORT_SYMBOL(netdev_master_upper_dev_get); 4849 EXPORT_SYMBOL(netdev_master_upper_dev_get);
4839 4850
4840 /** 4851 /**
4841 * netdev_master_upper_dev_get_rcu - Get master upper device 4852 * netdev_master_upper_dev_get_rcu - Get master upper device
4842 * @dev: device 4853 * @dev: device
4843 * 4854 *
4844 * Find a master upper device and return pointer to it or NULL in case 4855 * Find a master upper device and return pointer to it or NULL in case
4845 * it's not there. The caller must hold the RCU read lock. 4856 * it's not there. The caller must hold the RCU read lock.
4846 */ 4857 */
4847 struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) 4858 struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4848 { 4859 {
4849 struct netdev_upper *upper; 4860 struct netdev_upper *upper;
4850 4861
4851 upper = list_first_or_null_rcu(&dev->upper_dev_list, 4862 upper = list_first_or_null_rcu(&dev->upper_dev_list,
4852 struct netdev_upper, list); 4863 struct netdev_upper, list);
4853 if (upper && likely(upper->master)) 4864 if (upper && likely(upper->master))
4854 return upper->dev; 4865 return upper->dev;
4855 return NULL; 4866 return NULL;
4856 } 4867 }
4857 EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); 4868 EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4858 4869
4859 static int __netdev_upper_dev_link(struct net_device *dev, 4870 static int __netdev_upper_dev_link(struct net_device *dev,
4860 struct net_device *upper_dev, bool master) 4871 struct net_device *upper_dev, bool master)
4861 { 4872 {
4862 struct netdev_upper *upper; 4873 struct netdev_upper *upper;
4863 4874
4864 ASSERT_RTNL(); 4875 ASSERT_RTNL();
4865 4876
4866 if (dev == upper_dev) 4877 if (dev == upper_dev)
4867 return -EBUSY; 4878 return -EBUSY;
4868 4879
4869 /* To prevent loops, check if dev is not upper device to upper_dev. */ 4880 /* To prevent loops, check if dev is not upper device to upper_dev. */
4870 if (__netdev_search_upper_dev(upper_dev, dev)) 4881 if (__netdev_search_upper_dev(upper_dev, dev))
4871 return -EBUSY; 4882 return -EBUSY;
4872 4883
4873 if (__netdev_find_upper(dev, upper_dev)) 4884 if (__netdev_find_upper(dev, upper_dev))
4874 return -EEXIST; 4885 return -EEXIST;
4875 4886
4876 if (master && netdev_master_upper_dev_get(dev)) 4887 if (master && netdev_master_upper_dev_get(dev))
4877 return -EBUSY; 4888 return -EBUSY;
4878 4889
4879 upper = kmalloc(sizeof(*upper), GFP_KERNEL); 4890 upper = kmalloc(sizeof(*upper), GFP_KERNEL);
4880 if (!upper) 4891 if (!upper)
4881 return -ENOMEM; 4892 return -ENOMEM;
4882 4893
4883 upper->dev = upper_dev; 4894 upper->dev = upper_dev;
4884 upper->master = master; 4895 upper->master = master;
4885 INIT_LIST_HEAD(&upper->search_list); 4896 INIT_LIST_HEAD(&upper->search_list);
4886 4897
4887 /* Ensure that master upper link is always the first item in list. */ 4898 /* Ensure that master upper link is always the first item in list. */
4888 if (master) 4899 if (master)
4889 list_add_rcu(&upper->list, &dev->upper_dev_list); 4900 list_add_rcu(&upper->list, &dev->upper_dev_list);
4890 else 4901 else
4891 list_add_tail_rcu(&upper->list, &dev->upper_dev_list); 4902 list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
4892 dev_hold(upper_dev); 4903 dev_hold(upper_dev);
4893 4904
4894 return 0; 4905 return 0;
4895 } 4906 }
4896 4907
4897 /** 4908 /**
4898 * netdev_upper_dev_link - Add a link to the upper device 4909 * netdev_upper_dev_link - Add a link to the upper device
4899 * @dev: device 4910 * @dev: device
4900 * @upper_dev: new upper device 4911 * @upper_dev: new upper device
4901 * 4912 *
4902 * Adds a link to device which is upper to this one. The caller must hold 4913 * Adds a link to device which is upper to this one. The caller must hold
4903 * the RTNL lock. On a failure a negative errno code is returned. 4914 * the RTNL lock. On a failure a negative errno code is returned.
4904 * On success the reference counts are adjusted and the function 4915 * On success the reference counts are adjusted and the function
4905 * returns zero. 4916 * returns zero.
4906 */ 4917 */
4907 int netdev_upper_dev_link(struct net_device *dev, 4918 int netdev_upper_dev_link(struct net_device *dev,
4908 struct net_device *upper_dev) 4919 struct net_device *upper_dev)
4909 { 4920 {
4910 return __netdev_upper_dev_link(dev, upper_dev, false); 4921 return __netdev_upper_dev_link(dev, upper_dev, false);
4911 } 4922 }
4912 EXPORT_SYMBOL(netdev_upper_dev_link); 4923 EXPORT_SYMBOL(netdev_upper_dev_link);
4913 4924
4914 /** 4925 /**
4915 * netdev_master_upper_dev_link - Add a master link to the upper device 4926 * netdev_master_upper_dev_link - Add a master link to the upper device
4916 * @dev: device 4927 * @dev: device
4917 * @upper_dev: new upper device 4928 * @upper_dev: new upper device
4918 * 4929 *
4919 * Adds a link to device which is upper to this one. In this case, only 4930 * Adds a link to device which is upper to this one. In this case, only
4920 * one master upper device can be linked, although other non-master devices 4931 * one master upper device can be linked, although other non-master devices
4921 * might be linked as well. The caller must hold the RTNL lock. 4932 * might be linked as well. The caller must hold the RTNL lock.
4922 * On a failure a negative errno code is returned. On success the reference 4933 * On a failure a negative errno code is returned. On success the reference
4923 * counts are adjusted and the function returns zero. 4934 * counts are adjusted and the function returns zero.
4924 */ 4935 */
4925 int netdev_master_upper_dev_link(struct net_device *dev, 4936 int netdev_master_upper_dev_link(struct net_device *dev,
4926 struct net_device *upper_dev) 4937 struct net_device *upper_dev)
4927 { 4938 {
4928 return __netdev_upper_dev_link(dev, upper_dev, true); 4939 return __netdev_upper_dev_link(dev, upper_dev, true);
4929 } 4940 }
4930 EXPORT_SYMBOL(netdev_master_upper_dev_link); 4941 EXPORT_SYMBOL(netdev_master_upper_dev_link);
4931 4942
4932 /** 4943 /**
4933 * netdev_upper_dev_unlink - Removes a link to upper device 4944 * netdev_upper_dev_unlink - Removes a link to upper device
4934 * @dev: device 4945 * @dev: device
4935 * @upper_dev: new upper device 4946 * @upper_dev: new upper device
4936 * 4947 *
4937 * Removes a link to device which is upper to this one. The caller must hold 4948 * Removes a link to device which is upper to this one. The caller must hold
4938 * the RTNL lock. 4949 * the RTNL lock.
4939 */ 4950 */
4940 void netdev_upper_dev_unlink(struct net_device *dev, 4951 void netdev_upper_dev_unlink(struct net_device *dev,
4941 struct net_device *upper_dev) 4952 struct net_device *upper_dev)
4942 { 4953 {
4943 struct netdev_upper *upper; 4954 struct netdev_upper *upper;
4944 4955
4945 ASSERT_RTNL(); 4956 ASSERT_RTNL();
4946 4957
4947 upper = __netdev_find_upper(dev, upper_dev); 4958 upper = __netdev_find_upper(dev, upper_dev);
4948 if (!upper) 4959 if (!upper)
4949 return; 4960 return;
4950 list_del_rcu(&upper->list); 4961 list_del_rcu(&upper->list);
4951 dev_put(upper_dev); 4962 dev_put(upper_dev);
4952 kfree_rcu(upper, rcu); 4963 kfree_rcu(upper, rcu);
4953 } 4964 }
4954 EXPORT_SYMBOL(netdev_upper_dev_unlink); 4965 EXPORT_SYMBOL(netdev_upper_dev_unlink);
4955 4966
4956 static void dev_change_rx_flags(struct net_device *dev, int flags) 4967 static void dev_change_rx_flags(struct net_device *dev, int flags)
4957 { 4968 {
4958 const struct net_device_ops *ops = dev->netdev_ops; 4969 const struct net_device_ops *ops = dev->netdev_ops;
4959 4970
4960 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) 4971 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
4961 ops->ndo_change_rx_flags(dev, flags); 4972 ops->ndo_change_rx_flags(dev, flags);
4962 } 4973 }
4963 4974
4964 static int __dev_set_promiscuity(struct net_device *dev, int inc) 4975 static int __dev_set_promiscuity(struct net_device *dev, int inc)
4965 { 4976 {
4966 unsigned int old_flags = dev->flags; 4977 unsigned int old_flags = dev->flags;
4967 kuid_t uid; 4978 kuid_t uid;
4968 kgid_t gid; 4979 kgid_t gid;
4969 4980
4970 ASSERT_RTNL(); 4981 ASSERT_RTNL();
4971 4982
4972 dev->flags |= IFF_PROMISC; 4983 dev->flags |= IFF_PROMISC;
4973 dev->promiscuity += inc; 4984 dev->promiscuity += inc;
4974 if (dev->promiscuity == 0) { 4985 if (dev->promiscuity == 0) {
4975 /* 4986 /*
4976 * Avoid overflow. 4987 * Avoid overflow.
4977 * If inc causes overflow, untouch promisc and return error. 4988 * If inc causes overflow, untouch promisc and return error.
4978 */ 4989 */
4979 if (inc < 0) 4990 if (inc < 0)
4980 dev->flags &= ~IFF_PROMISC; 4991 dev->flags &= ~IFF_PROMISC;
4981 else { 4992 else {
4982 dev->promiscuity -= inc; 4993 dev->promiscuity -= inc;
4983 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n", 4994 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
4984 dev->name); 4995 dev->name);
4985 return -EOVERFLOW; 4996 return -EOVERFLOW;
4986 } 4997 }
4987 } 4998 }
4988 if (dev->flags != old_flags) { 4999 if (dev->flags != old_flags) {
4989 pr_info("device %s %s promiscuous mode\n", 5000 pr_info("device %s %s promiscuous mode\n",
4990 dev->name, 5001 dev->name,
4991 dev->flags & IFF_PROMISC ? "entered" : "left"); 5002 dev->flags & IFF_PROMISC ? "entered" : "left");
4992 if (audit_enabled) { 5003 if (audit_enabled) {
4993 current_uid_gid(&uid, &gid); 5004 current_uid_gid(&uid, &gid);
4994 audit_log(current->audit_context, GFP_ATOMIC, 5005 audit_log(current->audit_context, GFP_ATOMIC,
4995 AUDIT_ANOM_PROMISCUOUS, 5006 AUDIT_ANOM_PROMISCUOUS,
4996 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", 5007 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4997 dev->name, (dev->flags & IFF_PROMISC), 5008 dev->name, (dev->flags & IFF_PROMISC),
4998 (old_flags & IFF_PROMISC), 5009 (old_flags & IFF_PROMISC),
4999 from_kuid(&init_user_ns, audit_get_loginuid(current)), 5010 from_kuid(&init_user_ns, audit_get_loginuid(current)),
5000 from_kuid(&init_user_ns, uid), 5011 from_kuid(&init_user_ns, uid),
5001 from_kgid(&init_user_ns, gid), 5012 from_kgid(&init_user_ns, gid),
5002 audit_get_sessionid(current)); 5013 audit_get_sessionid(current));
5003 } 5014 }
5004 5015
5005 dev_change_rx_flags(dev, IFF_PROMISC); 5016 dev_change_rx_flags(dev, IFF_PROMISC);
5006 } 5017 }
5007 return 0; 5018 return 0;
5008 } 5019 }
5009 5020
5010 /** 5021 /**
5011 * dev_set_promiscuity - update promiscuity count on a device 5022 * dev_set_promiscuity - update promiscuity count on a device
5012 * @dev: device 5023 * @dev: device
5013 * @inc: modifier 5024 * @inc: modifier
5014 * 5025 *
5015 * Add or remove promiscuity from a device. While the count in the device 5026 * Add or remove promiscuity from a device. While the count in the device
5016 * remains above zero the interface remains promiscuous. Once it hits zero 5027 * remains above zero the interface remains promiscuous. Once it hits zero
5017 * the device reverts back to normal filtering operation. A negative inc 5028 * the device reverts back to normal filtering operation. A negative inc
5018 * value is used to drop promiscuity on the device. 5029 * value is used to drop promiscuity on the device.
5019 * Return 0 if successful or a negative errno code on error. 5030 * Return 0 if successful or a negative errno code on error.
5020 */ 5031 */
5021 int dev_set_promiscuity(struct net_device *dev, int inc) 5032 int dev_set_promiscuity(struct net_device *dev, int inc)
5022 { 5033 {
5023 unsigned int old_flags = dev->flags; 5034 unsigned int old_flags = dev->flags;
5024 int err; 5035 int err;
5025 5036
5026 err = __dev_set_promiscuity(dev, inc); 5037 err = __dev_set_promiscuity(dev, inc);
5027 if (err < 0) 5038 if (err < 0)
5028 return err; 5039 return err;
5029 if (dev->flags != old_flags) 5040 if (dev->flags != old_flags)
5030 dev_set_rx_mode(dev); 5041 dev_set_rx_mode(dev);
5031 return err; 5042 return err;
5032 } 5043 }
5033 EXPORT_SYMBOL(dev_set_promiscuity); 5044 EXPORT_SYMBOL(dev_set_promiscuity);
5034 5045
5035 /** 5046 /**
5036 * dev_set_allmulti - update allmulti count on a device 5047 * dev_set_allmulti - update allmulti count on a device
5037 * @dev: device 5048 * @dev: device
5038 * @inc: modifier 5049 * @inc: modifier
5039 * 5050 *
5040 * Add or remove reception of all multicast frames to a device. While the 5051 * Add or remove reception of all multicast frames to a device. While the
5041 * count in the device remains above zero the interface remains listening 5052 * count in the device remains above zero the interface remains listening
5042 * to all interfaces. Once it hits zero the device reverts back to normal 5053 * to all interfaces. Once it hits zero the device reverts back to normal
5043 * filtering operation. A negative @inc value is used to drop the counter 5054 * filtering operation. A negative @inc value is used to drop the counter
5044 * when releasing a resource needing all multicasts. 5055 * when releasing a resource needing all multicasts.
5045 * Return 0 if successful or a negative errno code on error. 5056 * Return 0 if successful or a negative errno code on error.
5046 */ 5057 */
5047 5058
5048 int dev_set_allmulti(struct net_device *dev, int inc) 5059 int dev_set_allmulti(struct net_device *dev, int inc)
5049 { 5060 {
5050 unsigned int old_flags = dev->flags; 5061 unsigned int old_flags = dev->flags;
5051 5062
5052 ASSERT_RTNL(); 5063 ASSERT_RTNL();
5053 5064
5054 dev->flags |= IFF_ALLMULTI; 5065 dev->flags |= IFF_ALLMULTI;
5055 dev->allmulti += inc; 5066 dev->allmulti += inc;
5056 if (dev->allmulti == 0) { 5067 if (dev->allmulti == 0) {
5057 /* 5068 /*
5058 * Avoid overflow. 5069 * Avoid overflow.
5059 * If inc causes overflow, untouch allmulti and return error. 5070 * If inc causes overflow, untouch allmulti and return error.
5060 */ 5071 */
5061 if (inc < 0) 5072 if (inc < 0)
5062 dev->flags &= ~IFF_ALLMULTI; 5073 dev->flags &= ~IFF_ALLMULTI;
5063 else { 5074 else {
5064 dev->allmulti -= inc; 5075 dev->allmulti -= inc;
5065 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n", 5076 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
5066 dev->name); 5077 dev->name);
5067 return -EOVERFLOW; 5078 return -EOVERFLOW;
5068 } 5079 }
5069 } 5080 }
5070 if (dev->flags ^ old_flags) { 5081 if (dev->flags ^ old_flags) {
5071 dev_change_rx_flags(dev, IFF_ALLMULTI); 5082 dev_change_rx_flags(dev, IFF_ALLMULTI);
5072 dev_set_rx_mode(dev); 5083 dev_set_rx_mode(dev);
5073 } 5084 }
5074 return 0; 5085 return 0;
5075 } 5086 }
5076 EXPORT_SYMBOL(dev_set_allmulti); 5087 EXPORT_SYMBOL(dev_set_allmulti);
5077 5088
5078 /* 5089 /*
5079 * Upload unicast and multicast address lists to device and 5090 * Upload unicast and multicast address lists to device and
5080 * configure RX filtering. When the device doesn't support unicast 5091 * configure RX filtering. When the device doesn't support unicast
5081 * filtering it is put in promiscuous mode while unicast addresses 5092 * filtering it is put in promiscuous mode while unicast addresses
5082 * are present. 5093 * are present.
5083 */ 5094 */
5084 void __dev_set_rx_mode(struct net_device *dev) 5095 void __dev_set_rx_mode(struct net_device *dev)
5085 { 5096 {
5086 const struct net_device_ops *ops = dev->netdev_ops; 5097 const struct net_device_ops *ops = dev->netdev_ops;
5087 5098
5088 /* dev_open will call this function so the list will stay sane. */ 5099 /* dev_open will call this function so the list will stay sane. */
5089 if (!(dev->flags&IFF_UP)) 5100 if (!(dev->flags&IFF_UP))
5090 return; 5101 return;
5091 5102
5092 if (!netif_device_present(dev)) 5103 if (!netif_device_present(dev))
5093 return; 5104 return;
5094 5105
5095 if (!(dev->priv_flags & IFF_UNICAST_FLT)) { 5106 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
5096 /* Unicast addresses changes may only happen under the rtnl, 5107 /* Unicast addresses changes may only happen under the rtnl,
5097 * therefore calling __dev_set_promiscuity here is safe. 5108 * therefore calling __dev_set_promiscuity here is safe.
5098 */ 5109 */
5099 if (!netdev_uc_empty(dev) && !dev->uc_promisc) { 5110 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
5100 __dev_set_promiscuity(dev, 1); 5111 __dev_set_promiscuity(dev, 1);
5101 dev->uc_promisc = true; 5112 dev->uc_promisc = true;
5102 } else if (netdev_uc_empty(dev) && dev->uc_promisc) { 5113 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
5103 __dev_set_promiscuity(dev, -1); 5114 __dev_set_promiscuity(dev, -1);
5104 dev->uc_promisc = false; 5115 dev->uc_promisc = false;
5105 } 5116 }
5106 } 5117 }
5107 5118
5108 if (ops->ndo_set_rx_mode) 5119 if (ops->ndo_set_rx_mode)
5109 ops->ndo_set_rx_mode(dev); 5120 ops->ndo_set_rx_mode(dev);
5110 } 5121 }
5111 5122
5112 void dev_set_rx_mode(struct net_device *dev) 5123 void dev_set_rx_mode(struct net_device *dev)
5113 { 5124 {
5114 netif_addr_lock_bh(dev); 5125 netif_addr_lock_bh(dev);
5115 __dev_set_rx_mode(dev); 5126 __dev_set_rx_mode(dev);
5116 netif_addr_unlock_bh(dev); 5127 netif_addr_unlock_bh(dev);
5117 } 5128 }
5118 5129
5119 /** 5130 /**
5120 * dev_get_flags - get flags reported to userspace 5131 * dev_get_flags - get flags reported to userspace
5121 * @dev: device 5132 * @dev: device
5122 * 5133 *
5123 * Get the combination of flag bits exported through APIs to userspace. 5134 * Get the combination of flag bits exported through APIs to userspace.
5124 */ 5135 */
5125 unsigned int dev_get_flags(const struct net_device *dev) 5136 unsigned int dev_get_flags(const struct net_device *dev)
5126 { 5137 {
5127 unsigned int flags; 5138 unsigned int flags;
5128 5139
5129 flags = (dev->flags & ~(IFF_PROMISC | 5140 flags = (dev->flags & ~(IFF_PROMISC |
5130 IFF_ALLMULTI | 5141 IFF_ALLMULTI |
5131 IFF_RUNNING | 5142 IFF_RUNNING |
5132 IFF_LOWER_UP | 5143 IFF_LOWER_UP |
5133 IFF_DORMANT)) | 5144 IFF_DORMANT)) |
5134 (dev->gflags & (IFF_PROMISC | 5145 (dev->gflags & (IFF_PROMISC |
5135 IFF_ALLMULTI)); 5146 IFF_ALLMULTI));
5136 5147
5137 if (netif_running(dev)) { 5148 if (netif_running(dev)) {
5138 if (netif_oper_up(dev)) 5149 if (netif_oper_up(dev))
5139 flags |= IFF_RUNNING; 5150 flags |= IFF_RUNNING;
5140 if (netif_carrier_ok(dev)) 5151 if (netif_carrier_ok(dev))
5141 flags |= IFF_LOWER_UP; 5152 flags |= IFF_LOWER_UP;
5142 if (netif_dormant(dev)) 5153 if (netif_dormant(dev))
5143 flags |= IFF_DORMANT; 5154 flags |= IFF_DORMANT;
5144 } 5155 }
5145 5156
5146 return flags; 5157 return flags;
5147 } 5158 }
5148 EXPORT_SYMBOL(dev_get_flags); 5159 EXPORT_SYMBOL(dev_get_flags);
5149 5160
5150 int __dev_change_flags(struct net_device *dev, unsigned int flags) 5161 int __dev_change_flags(struct net_device *dev, unsigned int flags)
5151 { 5162 {
5152 unsigned int old_flags = dev->flags; 5163 unsigned int old_flags = dev->flags;
5153 int ret; 5164 int ret;
5154 5165
5155 ASSERT_RTNL(); 5166 ASSERT_RTNL();
5156 5167
5157 /* 5168 /*
5158 * Set the flags on our device. 5169 * Set the flags on our device.
5159 */ 5170 */
5160 5171
5161 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | 5172 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
5162 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | 5173 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
5163 IFF_AUTOMEDIA)) | 5174 IFF_AUTOMEDIA)) |
5164 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | 5175 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
5165 IFF_ALLMULTI)); 5176 IFF_ALLMULTI));
5166 5177
5167 /* 5178 /*
5168 * Load in the correct multicast list now the flags have changed. 5179 * Load in the correct multicast list now the flags have changed.
5169 */ 5180 */
5170 5181
5171 if ((old_flags ^ flags) & IFF_MULTICAST) 5182 if ((old_flags ^ flags) & IFF_MULTICAST)
5172 dev_change_rx_flags(dev, IFF_MULTICAST); 5183 dev_change_rx_flags(dev, IFF_MULTICAST);
5173 5184
5174 dev_set_rx_mode(dev); 5185 dev_set_rx_mode(dev);
5175 5186
5176 /* 5187 /*
5177 * Have we downed the interface. We handle IFF_UP ourselves 5188 * Have we downed the interface. We handle IFF_UP ourselves
5178 * according to user attempts to set it, rather than blindly 5189 * according to user attempts to set it, rather than blindly
5179 * setting it. 5190 * setting it.
5180 */ 5191 */
5181 5192
5182 ret = 0; 5193 ret = 0;
5183 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ 5194 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
5184 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev); 5195 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
5185 5196
5186 if (!ret) 5197 if (!ret)
5187 dev_set_rx_mode(dev); 5198 dev_set_rx_mode(dev);
5188 } 5199 }
5189 5200
5190 if ((flags ^ dev->gflags) & IFF_PROMISC) { 5201 if ((flags ^ dev->gflags) & IFF_PROMISC) {
5191 int inc = (flags & IFF_PROMISC) ? 1 : -1; 5202 int inc = (flags & IFF_PROMISC) ? 1 : -1;
5192 5203
5193 dev->gflags ^= IFF_PROMISC; 5204 dev->gflags ^= IFF_PROMISC;
5194 dev_set_promiscuity(dev, inc); 5205 dev_set_promiscuity(dev, inc);
5195 } 5206 }
5196 5207
5197 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI 5208 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
5198 is important. Some (broken) drivers set IFF_PROMISC, when 5209 is important. Some (broken) drivers set IFF_PROMISC, when
5199 IFF_ALLMULTI is requested not asking us and not reporting. 5210 IFF_ALLMULTI is requested not asking us and not reporting.
5200 */ 5211 */
5201 if ((flags ^ dev->gflags) & IFF_ALLMULTI) { 5212 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
5202 int inc = (flags & IFF_ALLMULTI) ? 1 : -1; 5213 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
5203 5214
5204 dev->gflags ^= IFF_ALLMULTI; 5215 dev->gflags ^= IFF_ALLMULTI;
5205 dev_set_allmulti(dev, inc); 5216 dev_set_allmulti(dev, inc);
5206 } 5217 }
5207 5218
5208 return ret; 5219 return ret;
5209 } 5220 }
5210 5221
5211 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) 5222 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
5212 { 5223 {
5213 unsigned int changes = dev->flags ^ old_flags; 5224 unsigned int changes = dev->flags ^ old_flags;
5214 5225
5215 if (changes & IFF_UP) { 5226 if (changes & IFF_UP) {
5216 if (dev->flags & IFF_UP) 5227 if (dev->flags & IFF_UP)
5217 call_netdevice_notifiers(NETDEV_UP, dev); 5228 call_netdevice_notifiers(NETDEV_UP, dev);
5218 else 5229 else
5219 call_netdevice_notifiers(NETDEV_DOWN, dev); 5230 call_netdevice_notifiers(NETDEV_DOWN, dev);
5220 } 5231 }
5221 5232
5222 if (dev->flags & IFF_UP && 5233 if (dev->flags & IFF_UP &&
5223 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) 5234 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
5224 call_netdevice_notifiers(NETDEV_CHANGE, dev); 5235 call_netdevice_notifiers(NETDEV_CHANGE, dev);
5225 } 5236 }
5226 5237
5227 /** 5238 /**
5228 * dev_change_flags - change device settings 5239 * dev_change_flags - change device settings
5229 * @dev: device 5240 * @dev: device
5230 * @flags: device state flags 5241 * @flags: device state flags
5231 * 5242 *
5232 * Change settings on device based state flags. The flags are 5243 * Change settings on device based state flags. The flags are
5233 * in the userspace exported format. 5244 * in the userspace exported format.
5234 */ 5245 */
5235 int dev_change_flags(struct net_device *dev, unsigned int flags) 5246 int dev_change_flags(struct net_device *dev, unsigned int flags)
5236 { 5247 {
5237 int ret; 5248 int ret;
5238 unsigned int changes, old_flags = dev->flags; 5249 unsigned int changes, old_flags = dev->flags;
5239 5250
5240 ret = __dev_change_flags(dev, flags); 5251 ret = __dev_change_flags(dev, flags);
5241 if (ret < 0) 5252 if (ret < 0)
5242 return ret; 5253 return ret;
5243 5254
5244 changes = old_flags ^ dev->flags; 5255 changes = old_flags ^ dev->flags;
5245 if (changes) 5256 if (changes)
5246 rtmsg_ifinfo(RTM_NEWLINK, dev, changes); 5257 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
5247 5258
5248 __dev_notify_flags(dev, old_flags); 5259 __dev_notify_flags(dev, old_flags);
5249 return ret; 5260 return ret;
5250 } 5261 }
5251 EXPORT_SYMBOL(dev_change_flags); 5262 EXPORT_SYMBOL(dev_change_flags);
5252 5263
5253 /** 5264 /**
5254 * dev_set_mtu - Change maximum transfer unit 5265 * dev_set_mtu - Change maximum transfer unit
5255 * @dev: device 5266 * @dev: device
5256 * @new_mtu: new transfer unit 5267 * @new_mtu: new transfer unit
5257 * 5268 *
5258 * Change the maximum transfer size of the network device. 5269 * Change the maximum transfer size of the network device.
5259 */ 5270 */
5260 int dev_set_mtu(struct net_device *dev, int new_mtu) 5271 int dev_set_mtu(struct net_device *dev, int new_mtu)
5261 { 5272 {
5262 const struct net_device_ops *ops = dev->netdev_ops; 5273 const struct net_device_ops *ops = dev->netdev_ops;
5263 int err; 5274 int err;
5264 5275
5265 if (new_mtu == dev->mtu) 5276 if (new_mtu == dev->mtu)
5266 return 0; 5277 return 0;
5267 5278
5268 /* MTU must be positive. */ 5279 /* MTU must be positive. */
5269 if (new_mtu < 0) 5280 if (new_mtu < 0)
5270 return -EINVAL; 5281 return -EINVAL;
5271 5282
5272 if (!netif_device_present(dev)) 5283 if (!netif_device_present(dev))
5273 return -ENODEV; 5284 return -ENODEV;
5274 5285
5275 err = 0; 5286 err = 0;
5276 if (ops->ndo_change_mtu) 5287 if (ops->ndo_change_mtu)
5277 err = ops->ndo_change_mtu(dev, new_mtu); 5288 err = ops->ndo_change_mtu(dev, new_mtu);
5278 else 5289 else
5279 dev->mtu = new_mtu; 5290 dev->mtu = new_mtu;
5280 5291
5281 if (!err) 5292 if (!err)
5282 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); 5293 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5283 return err; 5294 return err;
5284 } 5295 }
5285 EXPORT_SYMBOL(dev_set_mtu); 5296 EXPORT_SYMBOL(dev_set_mtu);
5286 5297
5287 /** 5298 /**
5288 * dev_set_group - Change group this device belongs to 5299 * dev_set_group - Change group this device belongs to
5289 * @dev: device 5300 * @dev: device
5290 * @new_group: group this device should belong to 5301 * @new_group: group this device should belong to
5291 */ 5302 */
5292 void dev_set_group(struct net_device *dev, int new_group) 5303 void dev_set_group(struct net_device *dev, int new_group)
5293 { 5304 {
5294 dev->group = new_group; 5305 dev->group = new_group;
5295 } 5306 }
5296 EXPORT_SYMBOL(dev_set_group); 5307 EXPORT_SYMBOL(dev_set_group);
5297 5308
5298 /** 5309 /**
5299 * dev_set_mac_address - Change Media Access Control Address 5310 * dev_set_mac_address - Change Media Access Control Address
5300 * @dev: device 5311 * @dev: device
5301 * @sa: new address 5312 * @sa: new address
5302 * 5313 *
5303 * Change the hardware (MAC) address of the device 5314 * Change the hardware (MAC) address of the device
5304 */ 5315 */
5305 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) 5316 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
5306 { 5317 {
5307 const struct net_device_ops *ops = dev->netdev_ops; 5318 const struct net_device_ops *ops = dev->netdev_ops;
5308 int err; 5319 int err;
5309 5320
5310 if (!ops->ndo_set_mac_address) 5321 if (!ops->ndo_set_mac_address)
5311 return -EOPNOTSUPP; 5322 return -EOPNOTSUPP;
5312 if (sa->sa_family != dev->type) 5323 if (sa->sa_family != dev->type)
5313 return -EINVAL; 5324 return -EINVAL;
5314 if (!netif_device_present(dev)) 5325 if (!netif_device_present(dev))
5315 return -ENODEV; 5326 return -ENODEV;
5316 err = ops->ndo_set_mac_address(dev, sa); 5327 err = ops->ndo_set_mac_address(dev, sa);
5317 if (err) 5328 if (err)
5318 return err; 5329 return err;
5319 dev->addr_assign_type = NET_ADDR_SET; 5330 dev->addr_assign_type = NET_ADDR_SET;
5320 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); 5331 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
5321 add_device_randomness(dev->dev_addr, dev->addr_len); 5332 add_device_randomness(dev->dev_addr, dev->addr_len);
5322 return 0; 5333 return 0;
5323 } 5334 }
5324 EXPORT_SYMBOL(dev_set_mac_address); 5335 EXPORT_SYMBOL(dev_set_mac_address);
5325 5336
5326 /** 5337 /**
5327 * dev_change_carrier - Change device carrier 5338 * dev_change_carrier - Change device carrier
5328 * @dev: device 5339 * @dev: device
5329 * @new_carries: new value 5340 * @new_carries: new value
5330 * 5341 *
5331 * Change device carrier 5342 * Change device carrier
5332 */ 5343 */
5333 int dev_change_carrier(struct net_device *dev, bool new_carrier) 5344 int dev_change_carrier(struct net_device *dev, bool new_carrier)
5334 { 5345 {
5335 const struct net_device_ops *ops = dev->netdev_ops; 5346 const struct net_device_ops *ops = dev->netdev_ops;
5336 5347
5337 if (!ops->ndo_change_carrier) 5348 if (!ops->ndo_change_carrier)
5338 return -EOPNOTSUPP; 5349 return -EOPNOTSUPP;
5339 if (!netif_device_present(dev)) 5350 if (!netif_device_present(dev))
5340 return -ENODEV; 5351 return -ENODEV;
5341 return ops->ndo_change_carrier(dev, new_carrier); 5352 return ops->ndo_change_carrier(dev, new_carrier);
5342 } 5353 }
5343 EXPORT_SYMBOL(dev_change_carrier); 5354 EXPORT_SYMBOL(dev_change_carrier);
5344 5355
5345 /* 5356 /*
5346 * Perform the SIOCxIFxxx calls, inside rcu_read_lock() 5357 * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
5347 */ 5358 */
5348 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) 5359 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
5349 { 5360 {
5350 int err; 5361 int err;
5351 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name); 5362 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
5352 5363
5353 if (!dev) 5364 if (!dev)
5354 return -ENODEV; 5365 return -ENODEV;
5355 5366
5356 switch (cmd) { 5367 switch (cmd) {
5357 case SIOCGIFFLAGS: /* Get interface flags */ 5368 case SIOCGIFFLAGS: /* Get interface flags */
5358 ifr->ifr_flags = (short) dev_get_flags(dev); 5369 ifr->ifr_flags = (short) dev_get_flags(dev);
5359 return 0; 5370 return 0;
5360 5371
5361 case SIOCGIFMETRIC: /* Get the metric on the interface 5372 case SIOCGIFMETRIC: /* Get the metric on the interface
5362 (currently unused) */ 5373 (currently unused) */
5363 ifr->ifr_metric = 0; 5374 ifr->ifr_metric = 0;
5364 return 0; 5375 return 0;
5365 5376
5366 case SIOCGIFMTU: /* Get the MTU of a device */ 5377 case SIOCGIFMTU: /* Get the MTU of a device */
5367 ifr->ifr_mtu = dev->mtu; 5378 ifr->ifr_mtu = dev->mtu;
5368 return 0; 5379 return 0;
5369 5380
5370 case SIOCGIFHWADDR: 5381 case SIOCGIFHWADDR:
5371 if (!dev->addr_len) 5382 if (!dev->addr_len)
5372 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); 5383 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
5373 else 5384 else
5374 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, 5385 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
5375 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); 5386 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
5376 ifr->ifr_hwaddr.sa_family = dev->type; 5387 ifr->ifr_hwaddr.sa_family = dev->type;
5377 return 0; 5388 return 0;
5378 5389
5379 case SIOCGIFSLAVE: 5390 case SIOCGIFSLAVE:
5380 err = -EINVAL; 5391 err = -EINVAL;
5381 break; 5392 break;
5382 5393
5383 case SIOCGIFMAP: 5394 case SIOCGIFMAP:
5384 ifr->ifr_map.mem_start = dev->mem_start; 5395 ifr->ifr_map.mem_start = dev->mem_start;
5385 ifr->ifr_map.mem_end = dev->mem_end; 5396 ifr->ifr_map.mem_end = dev->mem_end;
5386 ifr->ifr_map.base_addr = dev->base_addr; 5397 ifr->ifr_map.base_addr = dev->base_addr;
5387 ifr->ifr_map.irq = dev->irq; 5398 ifr->ifr_map.irq = dev->irq;
5388 ifr->ifr_map.dma = dev->dma; 5399 ifr->ifr_map.dma = dev->dma;
5389 ifr->ifr_map.port = dev->if_port; 5400 ifr->ifr_map.port = dev->if_port;
5390 return 0; 5401 return 0;
5391 5402
5392 case SIOCGIFINDEX: 5403 case SIOCGIFINDEX:
5393 ifr->ifr_ifindex = dev->ifindex; 5404 ifr->ifr_ifindex = dev->ifindex;
5394 return 0; 5405 return 0;
5395 5406
5396 case SIOCGIFTXQLEN: 5407 case SIOCGIFTXQLEN:
5397 ifr->ifr_qlen = dev->tx_queue_len; 5408 ifr->ifr_qlen = dev->tx_queue_len;
5398 return 0; 5409 return 0;
5399 5410
5400 default: 5411 default:
5401 /* dev_ioctl() should ensure this case 5412 /* dev_ioctl() should ensure this case
5402 * is never reached 5413 * is never reached
5403 */ 5414 */
5404 WARN_ON(1); 5415 WARN_ON(1);
5405 err = -ENOTTY; 5416 err = -ENOTTY;
5406 break; 5417 break;
5407 5418
5408 } 5419 }
5409 return err; 5420 return err;
5410 } 5421 }
5411 5422
5412 /* 5423 /*
5413 * Perform the SIOCxIFxxx calls, inside rtnl_lock() 5424 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
5414 */ 5425 */
5415 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) 5426 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
5416 { 5427 {
5417 int err; 5428 int err;
5418 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); 5429 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
5419 const struct net_device_ops *ops; 5430 const struct net_device_ops *ops;
5420 5431
5421 if (!dev) 5432 if (!dev)
5422 return -ENODEV; 5433 return -ENODEV;
5423 5434
5424 ops = dev->netdev_ops; 5435 ops = dev->netdev_ops;
5425 5436
5426 switch (cmd) { 5437 switch (cmd) {
5427 case SIOCSIFFLAGS: /* Set interface flags */ 5438 case SIOCSIFFLAGS: /* Set interface flags */
5428 return dev_change_flags(dev, ifr->ifr_flags); 5439 return dev_change_flags(dev, ifr->ifr_flags);
5429 5440
5430 case SIOCSIFMETRIC: /* Set the metric on the interface 5441 case SIOCSIFMETRIC: /* Set the metric on the interface
5431 (currently unused) */ 5442 (currently unused) */
5432 return -EOPNOTSUPP; 5443 return -EOPNOTSUPP;
5433 5444
5434 case SIOCSIFMTU: /* Set the MTU of a device */ 5445 case SIOCSIFMTU: /* Set the MTU of a device */
5435 return dev_set_mtu(dev, ifr->ifr_mtu); 5446 return dev_set_mtu(dev, ifr->ifr_mtu);
5436 5447
5437 case SIOCSIFHWADDR: 5448 case SIOCSIFHWADDR:
5438 return dev_set_mac_address(dev, &ifr->ifr_hwaddr); 5449 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
5439 5450
5440 case SIOCSIFHWBROADCAST: 5451 case SIOCSIFHWBROADCAST:
5441 if (ifr->ifr_hwaddr.sa_family != dev->type) 5452 if (ifr->ifr_hwaddr.sa_family != dev->type)
5442 return -EINVAL; 5453 return -EINVAL;
5443 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, 5454 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
5444 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); 5455 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
5445 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); 5456 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
5446 return 0; 5457 return 0;
5447 5458
5448 case SIOCSIFMAP: 5459 case SIOCSIFMAP:
5449 if (ops->ndo_set_config) { 5460 if (ops->ndo_set_config) {
5450 if (!netif_device_present(dev)) 5461 if (!netif_device_present(dev))
5451 return -ENODEV; 5462 return -ENODEV;
5452 return ops->ndo_set_config(dev, &ifr->ifr_map); 5463 return ops->ndo_set_config(dev, &ifr->ifr_map);
5453 } 5464 }
5454 return -EOPNOTSUPP; 5465 return -EOPNOTSUPP;
5455 5466
5456 case SIOCADDMULTI: 5467 case SIOCADDMULTI:
5457 if (!ops->ndo_set_rx_mode || 5468 if (!ops->ndo_set_rx_mode ||
5458 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 5469 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
5459 return -EINVAL; 5470 return -EINVAL;
5460 if (!netif_device_present(dev)) 5471 if (!netif_device_present(dev))
5461 return -ENODEV; 5472 return -ENODEV;
5462 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); 5473 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
5463 5474
5464 case SIOCDELMULTI: 5475 case SIOCDELMULTI:
5465 if (!ops->ndo_set_rx_mode || 5476 if (!ops->ndo_set_rx_mode ||
5466 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 5477 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
5467 return -EINVAL; 5478 return -EINVAL;
5468 if (!netif_device_present(dev)) 5479 if (!netif_device_present(dev))
5469 return -ENODEV; 5480 return -ENODEV;
5470 return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); 5481 return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
5471 5482
5472 case SIOCSIFTXQLEN: 5483 case SIOCSIFTXQLEN:
5473 if (ifr->ifr_qlen < 0) 5484 if (ifr->ifr_qlen < 0)
5474 return -EINVAL; 5485 return -EINVAL;
5475 dev->tx_queue_len = ifr->ifr_qlen; 5486 dev->tx_queue_len = ifr->ifr_qlen;
5476 return 0; 5487 return 0;
5477 5488
5478 case SIOCSIFNAME: 5489 case SIOCSIFNAME:
5479 ifr->ifr_newname[IFNAMSIZ-1] = '\0'; 5490 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
5480 return dev_change_name(dev, ifr->ifr_newname); 5491 return dev_change_name(dev, ifr->ifr_newname);
5481 5492
5482 case SIOCSHWTSTAMP: 5493 case SIOCSHWTSTAMP:
5483 err = net_hwtstamp_validate(ifr); 5494 err = net_hwtstamp_validate(ifr);
5484 if (err) 5495 if (err)
5485 return err; 5496 return err;
5486 /* fall through */ 5497 /* fall through */
5487 5498
5488 /* 5499 /*
5489 * Unknown or private ioctl 5500 * Unknown or private ioctl
5490 */ 5501 */
5491 default: 5502 default:
5492 if ((cmd >= SIOCDEVPRIVATE && 5503 if ((cmd >= SIOCDEVPRIVATE &&
5493 cmd <= SIOCDEVPRIVATE + 15) || 5504 cmd <= SIOCDEVPRIVATE + 15) ||
5494 cmd == SIOCBONDENSLAVE || 5505 cmd == SIOCBONDENSLAVE ||
5495 cmd == SIOCBONDRELEASE || 5506 cmd == SIOCBONDRELEASE ||
5496 cmd == SIOCBONDSETHWADDR || 5507 cmd == SIOCBONDSETHWADDR ||
5497 cmd == SIOCBONDSLAVEINFOQUERY || 5508 cmd == SIOCBONDSLAVEINFOQUERY ||
5498 cmd == SIOCBONDINFOQUERY || 5509 cmd == SIOCBONDINFOQUERY ||
5499 cmd == SIOCBONDCHANGEACTIVE || 5510 cmd == SIOCBONDCHANGEACTIVE ||
5500 cmd == SIOCGMIIPHY || 5511 cmd == SIOCGMIIPHY ||
5501 cmd == SIOCGMIIREG || 5512 cmd == SIOCGMIIREG ||
5502 cmd == SIOCSMIIREG || 5513 cmd == SIOCSMIIREG ||
5503 cmd == SIOCBRADDIF || 5514 cmd == SIOCBRADDIF ||
5504 cmd == SIOCBRDELIF || 5515 cmd == SIOCBRDELIF ||
5505 cmd == SIOCSHWTSTAMP || 5516 cmd == SIOCSHWTSTAMP ||
5506 cmd == SIOCWANDEV) { 5517 cmd == SIOCWANDEV) {
5507 err = -EOPNOTSUPP; 5518 err = -EOPNOTSUPP;
5508 if (ops->ndo_do_ioctl) { 5519 if (ops->ndo_do_ioctl) {
5509 if (netif_device_present(dev)) 5520 if (netif_device_present(dev))
5510 err = ops->ndo_do_ioctl(dev, ifr, cmd); 5521 err = ops->ndo_do_ioctl(dev, ifr, cmd);
5511 else 5522 else
5512 err = -ENODEV; 5523 err = -ENODEV;
5513 } 5524 }
5514 } else 5525 } else
5515 err = -EINVAL; 5526 err = -EINVAL;
5516 5527
5517 } 5528 }
5518 return err; 5529 return err;
5519 } 5530 }
5520 5531
5521 /* 5532 /*
5522 * This function handles all "interface"-type I/O control requests. The actual 5533 * This function handles all "interface"-type I/O control requests. The actual
5523 * 'doing' part of this is dev_ifsioc above. 5534 * 'doing' part of this is dev_ifsioc above.
5524 */ 5535 */
5525 5536
5526 /** 5537 /**
5527 * dev_ioctl - network device ioctl 5538 * dev_ioctl - network device ioctl
5528 * @net: the applicable net namespace 5539 * @net: the applicable net namespace
5529 * @cmd: command to issue 5540 * @cmd: command to issue
5530 * @arg: pointer to a struct ifreq in user space 5541 * @arg: pointer to a struct ifreq in user space
5531 * 5542 *
5532 * Issue ioctl functions to devices. This is normally called by the 5543 * Issue ioctl functions to devices. This is normally called by the
5533 * user space syscall interfaces but can sometimes be useful for 5544 * user space syscall interfaces but can sometimes be useful for
5534 * other purposes. The return value is the return from the syscall if 5545 * other purposes. The return value is the return from the syscall if
5535 * positive or a negative errno code on error. 5546 * positive or a negative errno code on error.
5536 */ 5547 */
5537 5548
5538 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) 5549 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5539 { 5550 {
5540 struct ifreq ifr; 5551 struct ifreq ifr;
5541 int ret; 5552 int ret;
5542 char *colon; 5553 char *colon;
5543 5554
5544 /* One special case: SIOCGIFCONF takes ifconf argument 5555 /* One special case: SIOCGIFCONF takes ifconf argument
5545 and requires shared lock, because it sleeps writing 5556 and requires shared lock, because it sleeps writing
5546 to user space. 5557 to user space.
5547 */ 5558 */
5548 5559
5549 if (cmd == SIOCGIFCONF) { 5560 if (cmd == SIOCGIFCONF) {
5550 rtnl_lock(); 5561 rtnl_lock();
5551 ret = dev_ifconf(net, (char __user *) arg); 5562 ret = dev_ifconf(net, (char __user *) arg);
5552 rtnl_unlock(); 5563 rtnl_unlock();
5553 return ret; 5564 return ret;
5554 } 5565 }
5555 if (cmd == SIOCGIFNAME) 5566 if (cmd == SIOCGIFNAME)
5556 return dev_ifname(net, (struct ifreq __user *)arg); 5567 return dev_ifname(net, (struct ifreq __user *)arg);
5557 5568
5558 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 5569 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
5559 return -EFAULT; 5570 return -EFAULT;
5560 5571
5561 ifr.ifr_name[IFNAMSIZ-1] = 0; 5572 ifr.ifr_name[IFNAMSIZ-1] = 0;
5562 5573
5563 colon = strchr(ifr.ifr_name, ':'); 5574 colon = strchr(ifr.ifr_name, ':');
5564 if (colon) 5575 if (colon)
5565 *colon = 0; 5576 *colon = 0;
5566 5577
5567 /* 5578 /*
5568 * See which interface the caller is talking about. 5579 * See which interface the caller is talking about.
5569 */ 5580 */
5570 5581
5571 switch (cmd) { 5582 switch (cmd) {
5572 /* 5583 /*
5573 * These ioctl calls: 5584 * These ioctl calls:
5574 * - can be done by all. 5585 * - can be done by all.
5575 * - atomic and do not require locking. 5586 * - atomic and do not require locking.
5576 * - return a value 5587 * - return a value
5577 */ 5588 */
5578 case SIOCGIFFLAGS: 5589 case SIOCGIFFLAGS:
5579 case SIOCGIFMETRIC: 5590 case SIOCGIFMETRIC:
5580 case SIOCGIFMTU: 5591 case SIOCGIFMTU:
5581 case SIOCGIFHWADDR: 5592 case SIOCGIFHWADDR:
5582 case SIOCGIFSLAVE: 5593 case SIOCGIFSLAVE:
5583 case SIOCGIFMAP: 5594 case SIOCGIFMAP:
5584 case SIOCGIFINDEX: 5595 case SIOCGIFINDEX:
5585 case SIOCGIFTXQLEN: 5596 case SIOCGIFTXQLEN:
5586 dev_load(net, ifr.ifr_name); 5597 dev_load(net, ifr.ifr_name);
5587 rcu_read_lock(); 5598 rcu_read_lock();
5588 ret = dev_ifsioc_locked(net, &ifr, cmd); 5599 ret = dev_ifsioc_locked(net, &ifr, cmd);
5589 rcu_read_unlock(); 5600 rcu_read_unlock();
5590 if (!ret) { 5601 if (!ret) {
5591 if (colon) 5602 if (colon)
5592 *colon = ':'; 5603 *colon = ':';
5593 if (copy_to_user(arg, &ifr, 5604 if (copy_to_user(arg, &ifr,
5594 sizeof(struct ifreq))) 5605 sizeof(struct ifreq)))
5595 ret = -EFAULT; 5606 ret = -EFAULT;
5596 } 5607 }
5597 return ret; 5608 return ret;
5598 5609
5599 case SIOCETHTOOL: 5610 case SIOCETHTOOL:
5600 dev_load(net, ifr.ifr_name); 5611 dev_load(net, ifr.ifr_name);
5601 rtnl_lock(); 5612 rtnl_lock();
5602 ret = dev_ethtool(net, &ifr); 5613 ret = dev_ethtool(net, &ifr);
5603 rtnl_unlock(); 5614 rtnl_unlock();
5604 if (!ret) { 5615 if (!ret) {
5605 if (colon) 5616 if (colon)
5606 *colon = ':'; 5617 *colon = ':';
5607 if (copy_to_user(arg, &ifr, 5618 if (copy_to_user(arg, &ifr,
5608 sizeof(struct ifreq))) 5619 sizeof(struct ifreq)))
5609 ret = -EFAULT; 5620 ret = -EFAULT;
5610 } 5621 }
5611 return ret; 5622 return ret;
5612 5623
5613 /* 5624 /*
5614 * These ioctl calls: 5625 * These ioctl calls:
5615 * - require superuser power. 5626 * - require superuser power.
5616 * - require strict serialization. 5627 * - require strict serialization.
5617 * - return a value 5628 * - return a value
5618 */ 5629 */
5619 case SIOCGMIIPHY: 5630 case SIOCGMIIPHY:
5620 case SIOCGMIIREG: 5631 case SIOCGMIIREG:
5621 case SIOCSIFNAME: 5632 case SIOCSIFNAME:
5622 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 5633 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
5623 return -EPERM; 5634 return -EPERM;
5624 dev_load(net, ifr.ifr_name); 5635 dev_load(net, ifr.ifr_name);
5625 rtnl_lock(); 5636 rtnl_lock();
5626 ret = dev_ifsioc(net, &ifr, cmd); 5637 ret = dev_ifsioc(net, &ifr, cmd);
5627 rtnl_unlock(); 5638 rtnl_unlock();
5628 if (!ret) { 5639 if (!ret) {
5629 if (colon) 5640 if (colon)
5630 *colon = ':'; 5641 *colon = ':';
5631 if (copy_to_user(arg, &ifr, 5642 if (copy_to_user(arg, &ifr,
5632 sizeof(struct ifreq))) 5643 sizeof(struct ifreq)))
5633 ret = -EFAULT; 5644 ret = -EFAULT;
5634 } 5645 }
5635 return ret; 5646 return ret;
5636 5647
5637 /* 5648 /*
5638 * These ioctl calls: 5649 * These ioctl calls:
5639 * - require superuser power. 5650 * - require superuser power.
5640 * - require strict serialization. 5651 * - require strict serialization.
5641 * - do not return a value 5652 * - do not return a value
5642 */ 5653 */
5643 case SIOCSIFMAP: 5654 case SIOCSIFMAP:
5644 case SIOCSIFTXQLEN: 5655 case SIOCSIFTXQLEN:
5645 if (!capable(CAP_NET_ADMIN)) 5656 if (!capable(CAP_NET_ADMIN))
5646 return -EPERM; 5657 return -EPERM;
5647 /* fall through */ 5658 /* fall through */
5648 /* 5659 /*
5649 * These ioctl calls: 5660 * These ioctl calls:
5650 * - require local superuser power. 5661 * - require local superuser power.
5651 * - require strict serialization. 5662 * - require strict serialization.
5652 * - do not return a value 5663 * - do not return a value
5653 */ 5664 */
5654 case SIOCSIFFLAGS: 5665 case SIOCSIFFLAGS:
5655 case SIOCSIFMETRIC: 5666 case SIOCSIFMETRIC:
5656 case SIOCSIFMTU: 5667 case SIOCSIFMTU:
5657 case SIOCSIFHWADDR: 5668 case SIOCSIFHWADDR:
5658 case SIOCSIFSLAVE: 5669 case SIOCSIFSLAVE:
5659 case SIOCADDMULTI: 5670 case SIOCADDMULTI:
5660 case SIOCDELMULTI: 5671 case SIOCDELMULTI:
5661 case SIOCSIFHWBROADCAST: 5672 case SIOCSIFHWBROADCAST:
5662 case SIOCSMIIREG: 5673 case SIOCSMIIREG:
5663 case SIOCBONDENSLAVE: 5674 case SIOCBONDENSLAVE:
5664 case SIOCBONDRELEASE: 5675 case SIOCBONDRELEASE:
5665 case SIOCBONDSETHWADDR: 5676 case SIOCBONDSETHWADDR:
5666 case SIOCBONDCHANGEACTIVE: 5677 case SIOCBONDCHANGEACTIVE:
5667 case SIOCBRADDIF: 5678 case SIOCBRADDIF:
5668 case SIOCBRDELIF: 5679 case SIOCBRDELIF:
5669 case SIOCSHWTSTAMP: 5680 case SIOCSHWTSTAMP:
5670 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 5681 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
5671 return -EPERM; 5682 return -EPERM;
5672 /* fall through */ 5683 /* fall through */
5673 case SIOCBONDSLAVEINFOQUERY: 5684 case SIOCBONDSLAVEINFOQUERY:
5674 case SIOCBONDINFOQUERY: 5685 case SIOCBONDINFOQUERY:
5675 dev_load(net, ifr.ifr_name); 5686 dev_load(net, ifr.ifr_name);
5676 rtnl_lock(); 5687 rtnl_lock();
5677 ret = dev_ifsioc(net, &ifr, cmd); 5688 ret = dev_ifsioc(net, &ifr, cmd);
5678 rtnl_unlock(); 5689 rtnl_unlock();
5679 return ret; 5690 return ret;
5680 5691
5681 case SIOCGIFMEM: 5692 case SIOCGIFMEM:
5682 /* Get the per device memory space. We can add this but 5693 /* Get the per device memory space. We can add this but
5683 * currently do not support it */ 5694 * currently do not support it */
5684 case SIOCSIFMEM: 5695 case SIOCSIFMEM:
5685 /* Set the per device memory buffer space. 5696 /* Set the per device memory buffer space.
5686 * Not applicable in our case */ 5697 * Not applicable in our case */
5687 case SIOCSIFLINK: 5698 case SIOCSIFLINK:
5688 return -ENOTTY; 5699 return -ENOTTY;
5689 5700
5690 /* 5701 /*
5691 * Unknown or private ioctl. 5702 * Unknown or private ioctl.
5692 */ 5703 */
5693 default: 5704 default:
5694 if (cmd == SIOCWANDEV || 5705 if (cmd == SIOCWANDEV ||
5695 (cmd >= SIOCDEVPRIVATE && 5706 (cmd >= SIOCDEVPRIVATE &&
5696 cmd <= SIOCDEVPRIVATE + 15)) { 5707 cmd <= SIOCDEVPRIVATE + 15)) {
5697 dev_load(net, ifr.ifr_name); 5708 dev_load(net, ifr.ifr_name);
5698 rtnl_lock(); 5709 rtnl_lock();
5699 ret = dev_ifsioc(net, &ifr, cmd); 5710 ret = dev_ifsioc(net, &ifr, cmd);
5700 rtnl_unlock(); 5711 rtnl_unlock();
5701 if (!ret && copy_to_user(arg, &ifr, 5712 if (!ret && copy_to_user(arg, &ifr,
5702 sizeof(struct ifreq))) 5713 sizeof(struct ifreq)))
5703 ret = -EFAULT; 5714 ret = -EFAULT;
5704 return ret; 5715 return ret;
5705 } 5716 }
5706 /* Take care of Wireless Extensions */ 5717 /* Take care of Wireless Extensions */
5707 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) 5718 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
5708 return wext_handle_ioctl(net, &ifr, cmd, arg); 5719 return wext_handle_ioctl(net, &ifr, cmd, arg);
5709 return -ENOTTY; 5720 return -ENOTTY;
5710 } 5721 }
5711 } 5722 }
5712 5723
5713 5724
5714 /** 5725 /**
5715 * dev_new_index - allocate an ifindex 5726 * dev_new_index - allocate an ifindex
5716 * @net: the applicable net namespace 5727 * @net: the applicable net namespace
5717 * 5728 *
5718 * Returns a suitable unique value for a new device interface 5729 * Returns a suitable unique value for a new device interface
5719 * number. The caller must hold the rtnl semaphore or the 5730 * number. The caller must hold the rtnl semaphore or the
5720 * dev_base_lock to be sure it remains unique. 5731 * dev_base_lock to be sure it remains unique.
5721 */ 5732 */
5722 static int dev_new_index(struct net *net) 5733 static int dev_new_index(struct net *net)
5723 { 5734 {
5724 int ifindex = net->ifindex; 5735 int ifindex = net->ifindex;
5725 for (;;) { 5736 for (;;) {
5726 if (++ifindex <= 0) 5737 if (++ifindex <= 0)
5727 ifindex = 1; 5738 ifindex = 1;
5728 if (!__dev_get_by_index(net, ifindex)) 5739 if (!__dev_get_by_index(net, ifindex))
5729 return net->ifindex = ifindex; 5740 return net->ifindex = ifindex;
5730 } 5741 }
5731 } 5742 }
5732 5743
5733 /* Delayed registration/unregisteration */ 5744 /* Delayed registration/unregisteration */
5734 static LIST_HEAD(net_todo_list); 5745 static LIST_HEAD(net_todo_list);
5735 5746
5736 static void net_set_todo(struct net_device *dev) 5747 static void net_set_todo(struct net_device *dev)
5737 { 5748 {
5738 list_add_tail(&dev->todo_list, &net_todo_list); 5749 list_add_tail(&dev->todo_list, &net_todo_list);
5739 } 5750 }
5740 5751
5741 static void rollback_registered_many(struct list_head *head) 5752 static void rollback_registered_many(struct list_head *head)
5742 { 5753 {
5743 struct net_device *dev, *tmp; 5754 struct net_device *dev, *tmp;
5744 5755
5745 BUG_ON(dev_boot_phase); 5756 BUG_ON(dev_boot_phase);
5746 ASSERT_RTNL(); 5757 ASSERT_RTNL();
5747 5758
5748 list_for_each_entry_safe(dev, tmp, head, unreg_list) { 5759 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
5749 /* Some devices call without registering 5760 /* Some devices call without registering
5750 * for initialization unwind. Remove those 5761 * for initialization unwind. Remove those
5751 * devices and proceed with the remaining. 5762 * devices and proceed with the remaining.
5752 */ 5763 */
5753 if (dev->reg_state == NETREG_UNINITIALIZED) { 5764 if (dev->reg_state == NETREG_UNINITIALIZED) {
5754 pr_debug("unregister_netdevice: device %s/%p never was registered\n", 5765 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
5755 dev->name, dev); 5766 dev->name, dev);
5756 5767
5757 WARN_ON(1); 5768 WARN_ON(1);
5758 list_del(&dev->unreg_list); 5769 list_del(&dev->unreg_list);
5759 continue; 5770 continue;
5760 } 5771 }
5761 dev->dismantle = true; 5772 dev->dismantle = true;
5762 BUG_ON(dev->reg_state != NETREG_REGISTERED); 5773 BUG_ON(dev->reg_state != NETREG_REGISTERED);
5763 } 5774 }
5764 5775
5765 /* If device is running, close it first. */ 5776 /* If device is running, close it first. */
5766 dev_close_many(head); 5777 dev_close_many(head);
5767 5778
5768 list_for_each_entry(dev, head, unreg_list) { 5779 list_for_each_entry(dev, head, unreg_list) {
5769 /* And unlink it from device chain. */ 5780 /* And unlink it from device chain. */
5770 unlist_netdevice(dev); 5781 unlist_netdevice(dev);
5771 5782
5772 dev->reg_state = NETREG_UNREGISTERING; 5783 dev->reg_state = NETREG_UNREGISTERING;
5773 } 5784 }
5774 5785
5775 synchronize_net(); 5786 synchronize_net();
5776 5787
5777 list_for_each_entry(dev, head, unreg_list) { 5788 list_for_each_entry(dev, head, unreg_list) {
5778 /* Shutdown queueing discipline. */ 5789 /* Shutdown queueing discipline. */
5779 dev_shutdown(dev); 5790 dev_shutdown(dev);
5780 5791
5781 5792
5782 /* Notify protocols, that we are about to destroy 5793 /* Notify protocols, that we are about to destroy
5783 this device. They should clean all the things. 5794 this device. They should clean all the things.
5784 */ 5795 */
5785 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5796 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5786 5797
5787 if (!dev->rtnl_link_ops || 5798 if (!dev->rtnl_link_ops ||
5788 dev->rtnl_link_state == RTNL_LINK_INITIALIZED) 5799 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5789 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); 5800 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
5790 5801
5791 /* 5802 /*
5792 * Flush the unicast and multicast chains 5803 * Flush the unicast and multicast chains
5793 */ 5804 */
5794 dev_uc_flush(dev); 5805 dev_uc_flush(dev);
5795 dev_mc_flush(dev); 5806 dev_mc_flush(dev);
5796 5807
5797 if (dev->netdev_ops->ndo_uninit) 5808 if (dev->netdev_ops->ndo_uninit)
5798 dev->netdev_ops->ndo_uninit(dev); 5809 dev->netdev_ops->ndo_uninit(dev);
5799 5810
5800 /* Notifier chain MUST detach us all upper devices. */ 5811 /* Notifier chain MUST detach us all upper devices. */
5801 WARN_ON(netdev_has_any_upper_dev(dev)); 5812 WARN_ON(netdev_has_any_upper_dev(dev));
5802 5813
5803 /* Remove entries from kobject tree */ 5814 /* Remove entries from kobject tree */
5804 netdev_unregister_kobject(dev); 5815 netdev_unregister_kobject(dev);
5805 #ifdef CONFIG_XPS 5816 #ifdef CONFIG_XPS
5806 /* Remove XPS queueing entries */ 5817 /* Remove XPS queueing entries */
5807 netif_reset_xps_queues_gt(dev, 0); 5818 netif_reset_xps_queues_gt(dev, 0);
5808 #endif 5819 #endif
5809 } 5820 }
5810 5821
5811 synchronize_net(); 5822 synchronize_net();
5812 5823
5813 list_for_each_entry(dev, head, unreg_list) 5824 list_for_each_entry(dev, head, unreg_list)
5814 dev_put(dev); 5825 dev_put(dev);
5815 } 5826 }
5816 5827
5817 static void rollback_registered(struct net_device *dev) 5828 static void rollback_registered(struct net_device *dev)
5818 { 5829 {
5819 LIST_HEAD(single); 5830 LIST_HEAD(single);
5820 5831
5821 list_add(&dev->unreg_list, &single); 5832 list_add(&dev->unreg_list, &single);
5822 rollback_registered_many(&single); 5833 rollback_registered_many(&single);
5823 list_del(&single); 5834 list_del(&single);
5824 } 5835 }
5825 5836
5826 static netdev_features_t netdev_fix_features(struct net_device *dev, 5837 static netdev_features_t netdev_fix_features(struct net_device *dev,
5827 netdev_features_t features) 5838 netdev_features_t features)
5828 { 5839 {
5829 /* Fix illegal checksum combinations */ 5840 /* Fix illegal checksum combinations */
5830 if ((features & NETIF_F_HW_CSUM) && 5841 if ((features & NETIF_F_HW_CSUM) &&
5831 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5842 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5832 netdev_warn(dev, "mixed HW and IP checksum settings.\n"); 5843 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
5833 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); 5844 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5834 } 5845 }
5835 5846
5836 /* Fix illegal SG+CSUM combinations. */ 5847 /* Fix illegal SG+CSUM combinations. */
5837 if ((features & NETIF_F_SG) && 5848 if ((features & NETIF_F_SG) &&
5838 !(features & NETIF_F_ALL_CSUM)) { 5849 !(features & NETIF_F_ALL_CSUM)) {
5839 netdev_dbg(dev, 5850 netdev_dbg(dev,
5840 "Dropping NETIF_F_SG since no checksum feature.\n"); 5851 "Dropping NETIF_F_SG since no checksum feature.\n");
5841 features &= ~NETIF_F_SG; 5852 features &= ~NETIF_F_SG;
5842 } 5853 }
5843 5854
5844 /* TSO requires that SG is present as well. */ 5855 /* TSO requires that SG is present as well. */
5845 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { 5856 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
5846 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); 5857 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
5847 features &= ~NETIF_F_ALL_TSO; 5858 features &= ~NETIF_F_ALL_TSO;
5848 } 5859 }
5849 5860
5850 /* TSO ECN requires that TSO is present as well. */ 5861 /* TSO ECN requires that TSO is present as well. */
5851 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) 5862 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
5852 features &= ~NETIF_F_TSO_ECN; 5863 features &= ~NETIF_F_TSO_ECN;
5853 5864
5854 /* Software GSO depends on SG. */ 5865 /* Software GSO depends on SG. */
5855 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { 5866 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5856 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); 5867 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
5857 features &= ~NETIF_F_GSO; 5868 features &= ~NETIF_F_GSO;
5858 } 5869 }
5859 5870
5860 /* UFO needs SG and checksumming */ 5871 /* UFO needs SG and checksumming */
5861 if (features & NETIF_F_UFO) { 5872 if (features & NETIF_F_UFO) {
5862 /* maybe split UFO into V4 and V6? */ 5873 /* maybe split UFO into V4 and V6? */
5863 if (!((features & NETIF_F_GEN_CSUM) || 5874 if (!((features & NETIF_F_GEN_CSUM) ||
5864 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) 5875 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5865 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5876 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5866 netdev_dbg(dev, 5877 netdev_dbg(dev,
5867 "Dropping NETIF_F_UFO since no checksum offload features.\n"); 5878 "Dropping NETIF_F_UFO since no checksum offload features.\n");
5868 features &= ~NETIF_F_UFO; 5879 features &= ~NETIF_F_UFO;
5869 } 5880 }
5870 5881
5871 if (!(features & NETIF_F_SG)) { 5882 if (!(features & NETIF_F_SG)) {
5872 netdev_dbg(dev, 5883 netdev_dbg(dev,
5873 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); 5884 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5874 features &= ~NETIF_F_UFO; 5885 features &= ~NETIF_F_UFO;
5875 } 5886 }
5876 } 5887 }
5877 5888
5878 return features; 5889 return features;
5879 } 5890 }
5880 5891
5881 int __netdev_update_features(struct net_device *dev) 5892 int __netdev_update_features(struct net_device *dev)
5882 { 5893 {
5883 netdev_features_t features; 5894 netdev_features_t features;
5884 int err = 0; 5895 int err = 0;
5885 5896
5886 ASSERT_RTNL(); 5897 ASSERT_RTNL();
5887 5898
5888 features = netdev_get_wanted_features(dev); 5899 features = netdev_get_wanted_features(dev);
5889 5900
5890 if (dev->netdev_ops->ndo_fix_features) 5901 if (dev->netdev_ops->ndo_fix_features)
5891 features = dev->netdev_ops->ndo_fix_features(dev, features); 5902 features = dev->netdev_ops->ndo_fix_features(dev, features);
5892 5903
5893 /* driver might be less strict about feature dependencies */ 5904 /* driver might be less strict about feature dependencies */
5894 features = netdev_fix_features(dev, features); 5905 features = netdev_fix_features(dev, features);
5895 5906
5896 if (dev->features == features) 5907 if (dev->features == features)
5897 return 0; 5908 return 0;
5898 5909
5899 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n", 5910 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
5900 &dev->features, &features); 5911 &dev->features, &features);
5901 5912
5902 if (dev->netdev_ops->ndo_set_features) 5913 if (dev->netdev_ops->ndo_set_features)
5903 err = dev->netdev_ops->ndo_set_features(dev, features); 5914 err = dev->netdev_ops->ndo_set_features(dev, features);
5904 5915
5905 if (unlikely(err < 0)) { 5916 if (unlikely(err < 0)) {
5906 netdev_err(dev, 5917 netdev_err(dev,
5907 "set_features() failed (%d); wanted %pNF, left %pNF\n", 5918 "set_features() failed (%d); wanted %pNF, left %pNF\n",
5908 err, &features, &dev->features); 5919 err, &features, &dev->features);
5909 return -1; 5920 return -1;
5910 } 5921 }
5911 5922
5912 if (!err) 5923 if (!err)
5913 dev->features = features; 5924 dev->features = features;
5914 5925
5915 return 1; 5926 return 1;
5916 } 5927 }
5917 5928
5918 /** 5929 /**
5919 * netdev_update_features - recalculate device features 5930 * netdev_update_features - recalculate device features
5920 * @dev: the device to check 5931 * @dev: the device to check
5921 * 5932 *
5922 * Recalculate dev->features set and send notifications if it 5933 * Recalculate dev->features set and send notifications if it
5923 * has changed. Should be called after driver or hardware dependent 5934 * has changed. Should be called after driver or hardware dependent
5924 * conditions might have changed that influence the features. 5935 * conditions might have changed that influence the features.
5925 */ 5936 */
5926 void netdev_update_features(struct net_device *dev) 5937 void netdev_update_features(struct net_device *dev)
5927 { 5938 {
5928 if (__netdev_update_features(dev)) 5939 if (__netdev_update_features(dev))
5929 netdev_features_change(dev); 5940 netdev_features_change(dev);
5930 } 5941 }
5931 EXPORT_SYMBOL(netdev_update_features); 5942 EXPORT_SYMBOL(netdev_update_features);
5932 5943
5933 /** 5944 /**
5934 * netdev_change_features - recalculate device features 5945 * netdev_change_features - recalculate device features
5935 * @dev: the device to check 5946 * @dev: the device to check
5936 * 5947 *
5937 * Recalculate dev->features set and send notifications even 5948 * Recalculate dev->features set and send notifications even
5938 * if they have not changed. Should be called instead of 5949 * if they have not changed. Should be called instead of
5939 * netdev_update_features() if also dev->vlan_features might 5950 * netdev_update_features() if also dev->vlan_features might
5940 * have changed to allow the changes to be propagated to stacked 5951 * have changed to allow the changes to be propagated to stacked
5941 * VLAN devices. 5952 * VLAN devices.
5942 */ 5953 */
5943 void netdev_change_features(struct net_device *dev) 5954 void netdev_change_features(struct net_device *dev)
5944 { 5955 {
5945 __netdev_update_features(dev); 5956 __netdev_update_features(dev);
5946 netdev_features_change(dev); 5957 netdev_features_change(dev);
5947 } 5958 }
5948 EXPORT_SYMBOL(netdev_change_features); 5959 EXPORT_SYMBOL(netdev_change_features);
5949 5960
5950 /** 5961 /**
5951 * netif_stacked_transfer_operstate - transfer operstate 5962 * netif_stacked_transfer_operstate - transfer operstate
5952 * @rootdev: the root or lower level device to transfer state from 5963 * @rootdev: the root or lower level device to transfer state from
5953 * @dev: the device to transfer operstate to 5964 * @dev: the device to transfer operstate to
5954 * 5965 *
5955 * Transfer operational state from root to device. This is normally 5966 * Transfer operational state from root to device. This is normally
5956 * called when a stacking relationship exists between the root 5967 * called when a stacking relationship exists between the root
5957 * device and the device(a leaf device). 5968 * device and the device(a leaf device).
5958 */ 5969 */
5959 void netif_stacked_transfer_operstate(const struct net_device *rootdev, 5970 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5960 struct net_device *dev) 5971 struct net_device *dev)
5961 { 5972 {
5962 if (rootdev->operstate == IF_OPER_DORMANT) 5973 if (rootdev->operstate == IF_OPER_DORMANT)
5963 netif_dormant_on(dev); 5974 netif_dormant_on(dev);
5964 else 5975 else
5965 netif_dormant_off(dev); 5976 netif_dormant_off(dev);
5966 5977
5967 if (netif_carrier_ok(rootdev)) { 5978 if (netif_carrier_ok(rootdev)) {
5968 if (!netif_carrier_ok(dev)) 5979 if (!netif_carrier_ok(dev))
5969 netif_carrier_on(dev); 5980 netif_carrier_on(dev);
5970 } else { 5981 } else {
5971 if (netif_carrier_ok(dev)) 5982 if (netif_carrier_ok(dev))
5972 netif_carrier_off(dev); 5983 netif_carrier_off(dev);
5973 } 5984 }
5974 } 5985 }
5975 EXPORT_SYMBOL(netif_stacked_transfer_operstate); 5986 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5976 5987
5977 #ifdef CONFIG_RPS 5988 #ifdef CONFIG_RPS
5978 static int netif_alloc_rx_queues(struct net_device *dev) 5989 static int netif_alloc_rx_queues(struct net_device *dev)
5979 { 5990 {
5980 unsigned int i, count = dev->num_rx_queues; 5991 unsigned int i, count = dev->num_rx_queues;
5981 struct netdev_rx_queue *rx; 5992 struct netdev_rx_queue *rx;
5982 5993
5983 BUG_ON(count < 1); 5994 BUG_ON(count < 1);
5984 5995
5985 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); 5996 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5986 if (!rx) 5997 if (!rx)
5987 return -ENOMEM; 5998 return -ENOMEM;
5988 5999
5989 dev->_rx = rx; 6000 dev->_rx = rx;
5990 6001
5991 for (i = 0; i < count; i++) 6002 for (i = 0; i < count; i++)
5992 rx[i].dev = dev; 6003 rx[i].dev = dev;
5993 return 0; 6004 return 0;
5994 } 6005 }
5995 #endif 6006 #endif
5996 6007
5997 static void netdev_init_one_queue(struct net_device *dev, 6008 static void netdev_init_one_queue(struct net_device *dev,
5998 struct netdev_queue *queue, void *_unused) 6009 struct netdev_queue *queue, void *_unused)
5999 { 6010 {
6000 /* Initialize queue lock */ 6011 /* Initialize queue lock */
6001 spin_lock_init(&queue->_xmit_lock); 6012 spin_lock_init(&queue->_xmit_lock);
6002 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); 6013 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
6003 queue->xmit_lock_owner = -1; 6014 queue->xmit_lock_owner = -1;
6004 netdev_queue_numa_node_write(queue, NUMA_NO_NODE); 6015 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
6005 queue->dev = dev; 6016 queue->dev = dev;
6006 #ifdef CONFIG_BQL 6017 #ifdef CONFIG_BQL
6007 dql_init(&queue->dql, HZ); 6018 dql_init(&queue->dql, HZ);
6008 #endif 6019 #endif
6009 } 6020 }
6010 6021
6011 static int netif_alloc_netdev_queues(struct net_device *dev) 6022 static int netif_alloc_netdev_queues(struct net_device *dev)
6012 { 6023 {
6013 unsigned int count = dev->num_tx_queues; 6024 unsigned int count = dev->num_tx_queues;
6014 struct netdev_queue *tx; 6025 struct netdev_queue *tx;
6015 6026
6016 BUG_ON(count < 1); 6027 BUG_ON(count < 1);
6017 6028
6018 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); 6029 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
6019 if (!tx) 6030 if (!tx)
6020 return -ENOMEM; 6031 return -ENOMEM;
6021 6032
6022 dev->_tx = tx; 6033 dev->_tx = tx;
6023 6034
6024 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); 6035 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
6025 spin_lock_init(&dev->tx_global_lock); 6036 spin_lock_init(&dev->tx_global_lock);
6026 6037
6027 return 0; 6038 return 0;
6028 } 6039 }
6029 6040
6030 /** 6041 /**
6031 * register_netdevice - register a network device 6042 * register_netdevice - register a network device
6032 * @dev: device to register 6043 * @dev: device to register
6033 * 6044 *
6034 * Take a completed network device structure and add it to the kernel 6045 * Take a completed network device structure and add it to the kernel
6035 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier 6046 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
6036 * chain. 0 is returned on success. A negative errno code is returned 6047 * chain. 0 is returned on success. A negative errno code is returned
6037 * on a failure to set up the device, or if the name is a duplicate. 6048 * on a failure to set up the device, or if the name is a duplicate.
6038 * 6049 *
6039 * Callers must hold the rtnl semaphore. You may want 6050 * Callers must hold the rtnl semaphore. You may want
6040 * register_netdev() instead of this. 6051 * register_netdev() instead of this.
6041 * 6052 *
6042 * BUGS: 6053 * BUGS:
6043 * The locking appears insufficient to guarantee two parallel registers 6054 * The locking appears insufficient to guarantee two parallel registers
6044 * will not get the same name. 6055 * will not get the same name.
6045 */ 6056 */
6046 6057
6047 int register_netdevice(struct net_device *dev) 6058 int register_netdevice(struct net_device *dev)
6048 { 6059 {
6049 int ret; 6060 int ret;
6050 struct net *net = dev_net(dev); 6061 struct net *net = dev_net(dev);
6051 6062
6052 BUG_ON(dev_boot_phase); 6063 BUG_ON(dev_boot_phase);
6053 ASSERT_RTNL(); 6064 ASSERT_RTNL();
6054 6065
6055 might_sleep(); 6066 might_sleep();
6056 6067
6057 /* When net_device's are persistent, this will be fatal. */ 6068 /* When net_device's are persistent, this will be fatal. */
6058 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); 6069 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
6059 BUG_ON(!net); 6070 BUG_ON(!net);
6060 6071
6061 spin_lock_init(&dev->addr_list_lock); 6072 spin_lock_init(&dev->addr_list_lock);
6062 netdev_set_addr_lockdep_class(dev); 6073 netdev_set_addr_lockdep_class(dev);
6063 6074
6064 dev->iflink = -1; 6075 dev->iflink = -1;
6065 6076
6066 ret = dev_get_valid_name(net, dev, dev->name); 6077 ret = dev_get_valid_name(net, dev, dev->name);
6067 if (ret < 0) 6078 if (ret < 0)
6068 goto out; 6079 goto out;
6069 6080
6070 /* Init, if this function is available */ 6081 /* Init, if this function is available */
6071 if (dev->netdev_ops->ndo_init) { 6082 if (dev->netdev_ops->ndo_init) {
6072 ret = dev->netdev_ops->ndo_init(dev); 6083 ret = dev->netdev_ops->ndo_init(dev);
6073 if (ret) { 6084 if (ret) {
6074 if (ret > 0) 6085 if (ret > 0)
6075 ret = -EIO; 6086 ret = -EIO;
6076 goto out; 6087 goto out;
6077 } 6088 }
6078 } 6089 }
6079 6090
6080 if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) && 6091 if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) &&
6081 (!dev->netdev_ops->ndo_vlan_rx_add_vid || 6092 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
6082 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) { 6093 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
6083 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n"); 6094 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
6084 ret = -EINVAL; 6095 ret = -EINVAL;
6085 goto err_uninit; 6096 goto err_uninit;
6086 } 6097 }
6087 6098
6088 ret = -EBUSY; 6099 ret = -EBUSY;
6089 if (!dev->ifindex) 6100 if (!dev->ifindex)
6090 dev->ifindex = dev_new_index(net); 6101 dev->ifindex = dev_new_index(net);
6091 else if (__dev_get_by_index(net, dev->ifindex)) 6102 else if (__dev_get_by_index(net, dev->ifindex))
6092 goto err_uninit; 6103 goto err_uninit;
6093 6104
6094 if (dev->iflink == -1) 6105 if (dev->iflink == -1)
6095 dev->iflink = dev->ifindex; 6106 dev->iflink = dev->ifindex;
6096 6107
6097 /* Transfer changeable features to wanted_features and enable 6108 /* Transfer changeable features to wanted_features and enable
6098 * software offloads (GSO and GRO). 6109 * software offloads (GSO and GRO).
6099 */ 6110 */
6100 dev->hw_features |= NETIF_F_SOFT_FEATURES; 6111 dev->hw_features |= NETIF_F_SOFT_FEATURES;
6101 dev->features |= NETIF_F_SOFT_FEATURES; 6112 dev->features |= NETIF_F_SOFT_FEATURES;
6102 dev->wanted_features = dev->features & dev->hw_features; 6113 dev->wanted_features = dev->features & dev->hw_features;
6103 6114
6104 /* Turn on no cache copy if HW is doing checksum */ 6115 /* Turn on no cache copy if HW is doing checksum */
6105 if (!(dev->flags & IFF_LOOPBACK)) { 6116 if (!(dev->flags & IFF_LOOPBACK)) {
6106 dev->hw_features |= NETIF_F_NOCACHE_COPY; 6117 dev->hw_features |= NETIF_F_NOCACHE_COPY;
6107 if (dev->features & NETIF_F_ALL_CSUM) { 6118 if (dev->features & NETIF_F_ALL_CSUM) {
6108 dev->wanted_features |= NETIF_F_NOCACHE_COPY; 6119 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
6109 dev->features |= NETIF_F_NOCACHE_COPY; 6120 dev->features |= NETIF_F_NOCACHE_COPY;
6110 } 6121 }
6111 } 6122 }
6112 6123
6113 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. 6124 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
6114 */ 6125 */
6115 dev->vlan_features |= NETIF_F_HIGHDMA; 6126 dev->vlan_features |= NETIF_F_HIGHDMA;
6116 6127
6117 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 6128 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
6118 ret = notifier_to_errno(ret); 6129 ret = notifier_to_errno(ret);
6119 if (ret) 6130 if (ret)
6120 goto err_uninit; 6131 goto err_uninit;
6121 6132
6122 ret = netdev_register_kobject(dev); 6133 ret = netdev_register_kobject(dev);
6123 if (ret) 6134 if (ret)
6124 goto err_uninit; 6135 goto err_uninit;
6125 dev->reg_state = NETREG_REGISTERED; 6136 dev->reg_state = NETREG_REGISTERED;
6126 6137
6127 __netdev_update_features(dev); 6138 __netdev_update_features(dev);
6128 6139
6129 /* 6140 /*
6130 * Default initial state at registry is that the 6141 * Default initial state at registry is that the
6131 * device is present. 6142 * device is present.
6132 */ 6143 */
6133 6144
6134 set_bit(__LINK_STATE_PRESENT, &dev->state); 6145 set_bit(__LINK_STATE_PRESENT, &dev->state);
6135 6146
6136 linkwatch_init_dev(dev); 6147 linkwatch_init_dev(dev);
6137 6148
6138 dev_init_scheduler(dev); 6149 dev_init_scheduler(dev);
6139 dev_hold(dev); 6150 dev_hold(dev);
6140 list_netdevice(dev); 6151 list_netdevice(dev);
6141 add_device_randomness(dev->dev_addr, dev->addr_len); 6152 add_device_randomness(dev->dev_addr, dev->addr_len);
6142 6153
6143 /* If the device has permanent device address, driver should 6154 /* If the device has permanent device address, driver should
6144 * set dev_addr and also addr_assign_type should be set to 6155 * set dev_addr and also addr_assign_type should be set to
6145 * NET_ADDR_PERM (default value). 6156 * NET_ADDR_PERM (default value).
6146 */ 6157 */
6147 if (dev->addr_assign_type == NET_ADDR_PERM) 6158 if (dev->addr_assign_type == NET_ADDR_PERM)
6148 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); 6159 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
6149 6160
6150 /* Notify protocols, that a new device appeared. */ 6161 /* Notify protocols, that a new device appeared. */
6151 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); 6162 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
6152 ret = notifier_to_errno(ret); 6163 ret = notifier_to_errno(ret);
6153 if (ret) { 6164 if (ret) {
6154 rollback_registered(dev); 6165 rollback_registered(dev);
6155 dev->reg_state = NETREG_UNREGISTERED; 6166 dev->reg_state = NETREG_UNREGISTERED;
6156 } 6167 }
6157 /* 6168 /*
6158 * Prevent userspace races by waiting until the network 6169 * Prevent userspace races by waiting until the network
6159 * device is fully setup before sending notifications. 6170 * device is fully setup before sending notifications.
6160 */ 6171 */
6161 if (!dev->rtnl_link_ops || 6172 if (!dev->rtnl_link_ops ||
6162 dev->rtnl_link_state == RTNL_LINK_INITIALIZED) 6173 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
6163 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); 6174 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
6164 6175
6165 out: 6176 out:
6166 return ret; 6177 return ret;
6167 6178
6168 err_uninit: 6179 err_uninit:
6169 if (dev->netdev_ops->ndo_uninit) 6180 if (dev->netdev_ops->ndo_uninit)
6170 dev->netdev_ops->ndo_uninit(dev); 6181 dev->netdev_ops->ndo_uninit(dev);
6171 goto out; 6182 goto out;
6172 } 6183 }
6173 EXPORT_SYMBOL(register_netdevice); 6184 EXPORT_SYMBOL(register_netdevice);
6174 6185
6175 /** 6186 /**
6176 * init_dummy_netdev - init a dummy network device for NAPI 6187 * init_dummy_netdev - init a dummy network device for NAPI
6177 * @dev: device to init 6188 * @dev: device to init
6178 * 6189 *
6179 * This takes a network device structure and initialize the minimum 6190 * This takes a network device structure and initialize the minimum
6180 * amount of fields so it can be used to schedule NAPI polls without 6191 * amount of fields so it can be used to schedule NAPI polls without
6181 * registering a full blown interface. This is to be used by drivers 6192 * registering a full blown interface. This is to be used by drivers
6182 * that need to tie several hardware interfaces to a single NAPI 6193 * that need to tie several hardware interfaces to a single NAPI
6183 * poll scheduler due to HW limitations. 6194 * poll scheduler due to HW limitations.
6184 */ 6195 */
6185 int init_dummy_netdev(struct net_device *dev) 6196 int init_dummy_netdev(struct net_device *dev)
6186 { 6197 {
6187 /* Clear everything. Note we don't initialize spinlocks 6198 /* Clear everything. Note we don't initialize spinlocks
6188 * are they aren't supposed to be taken by any of the 6199 * are they aren't supposed to be taken by any of the
6189 * NAPI code and this dummy netdev is supposed to be 6200 * NAPI code and this dummy netdev is supposed to be
6190 * only ever used for NAPI polls 6201 * only ever used for NAPI polls
6191 */ 6202 */
6192 memset(dev, 0, sizeof(struct net_device)); 6203 memset(dev, 0, sizeof(struct net_device));
6193 6204
6194 /* make sure we BUG if trying to hit standard 6205 /* make sure we BUG if trying to hit standard
6195 * register/unregister code path 6206 * register/unregister code path
6196 */ 6207 */
6197 dev->reg_state = NETREG_DUMMY; 6208 dev->reg_state = NETREG_DUMMY;
6198 6209
6199 /* NAPI wants this */ 6210 /* NAPI wants this */
6200 INIT_LIST_HEAD(&dev->napi_list); 6211 INIT_LIST_HEAD(&dev->napi_list);
6201 6212
6202 /* a dummy interface is started by default */ 6213 /* a dummy interface is started by default */
6203 set_bit(__LINK_STATE_PRESENT, &dev->state); 6214 set_bit(__LINK_STATE_PRESENT, &dev->state);
6204 set_bit(__LINK_STATE_START, &dev->state); 6215 set_bit(__LINK_STATE_START, &dev->state);
6205 6216
6206 /* Note : We dont allocate pcpu_refcnt for dummy devices, 6217 /* Note : We dont allocate pcpu_refcnt for dummy devices,
6207 * because users of this 'device' dont need to change 6218 * because users of this 'device' dont need to change
6208 * its refcount. 6219 * its refcount.
6209 */ 6220 */
6210 6221
6211 return 0; 6222 return 0;
6212 } 6223 }
6213 EXPORT_SYMBOL_GPL(init_dummy_netdev); 6224 EXPORT_SYMBOL_GPL(init_dummy_netdev);
6214 6225
6215 6226
6216 /** 6227 /**
6217 * register_netdev - register a network device 6228 * register_netdev - register a network device
6218 * @dev: device to register 6229 * @dev: device to register
6219 * 6230 *
6220 * Take a completed network device structure and add it to the kernel 6231 * Take a completed network device structure and add it to the kernel
6221 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier 6232 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
6222 * chain. 0 is returned on success. A negative errno code is returned 6233 * chain. 0 is returned on success. A negative errno code is returned
6223 * on a failure to set up the device, or if the name is a duplicate. 6234 * on a failure to set up the device, or if the name is a duplicate.
6224 * 6235 *
6225 * This is a wrapper around register_netdevice that takes the rtnl semaphore 6236 * This is a wrapper around register_netdevice that takes the rtnl semaphore
6226 * and expands the device name if you passed a format string to 6237 * and expands the device name if you passed a format string to
6227 * alloc_netdev. 6238 * alloc_netdev.
6228 */ 6239 */
6229 int register_netdev(struct net_device *dev) 6240 int register_netdev(struct net_device *dev)
6230 { 6241 {
6231 int err; 6242 int err;
6232 6243
6233 rtnl_lock(); 6244 rtnl_lock();
6234 err = register_netdevice(dev); 6245 err = register_netdevice(dev);
6235 rtnl_unlock(); 6246 rtnl_unlock();
6236 return err; 6247 return err;
6237 } 6248 }
6238 EXPORT_SYMBOL(register_netdev); 6249 EXPORT_SYMBOL(register_netdev);
6239 6250
6240 int netdev_refcnt_read(const struct net_device *dev) 6251 int netdev_refcnt_read(const struct net_device *dev)
6241 { 6252 {
6242 int i, refcnt = 0; 6253 int i, refcnt = 0;
6243 6254
6244 for_each_possible_cpu(i) 6255 for_each_possible_cpu(i)
6245 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i); 6256 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
6246 return refcnt; 6257 return refcnt;
6247 } 6258 }
6248 EXPORT_SYMBOL(netdev_refcnt_read); 6259 EXPORT_SYMBOL(netdev_refcnt_read);
6249 6260
6250 /** 6261 /**
6251 * netdev_wait_allrefs - wait until all references are gone. 6262 * netdev_wait_allrefs - wait until all references are gone.
6252 * @dev: target net_device 6263 * @dev: target net_device
6253 * 6264 *
6254 * This is called when unregistering network devices. 6265 * This is called when unregistering network devices.
6255 * 6266 *
6256 * Any protocol or device that holds a reference should register 6267 * Any protocol or device that holds a reference should register
6257 * for netdevice notification, and cleanup and put back the 6268 * for netdevice notification, and cleanup and put back the
6258 * reference if they receive an UNREGISTER event. 6269 * reference if they receive an UNREGISTER event.
6259 * We can get stuck here if buggy protocols don't correctly 6270 * We can get stuck here if buggy protocols don't correctly
6260 * call dev_put. 6271 * call dev_put.
6261 */ 6272 */
6262 static void netdev_wait_allrefs(struct net_device *dev) 6273 static void netdev_wait_allrefs(struct net_device *dev)
6263 { 6274 {
6264 unsigned long rebroadcast_time, warning_time; 6275 unsigned long rebroadcast_time, warning_time;
6265 int refcnt; 6276 int refcnt;
6266 6277
6267 linkwatch_forget_dev(dev); 6278 linkwatch_forget_dev(dev);
6268 6279
6269 rebroadcast_time = warning_time = jiffies; 6280 rebroadcast_time = warning_time = jiffies;
6270 refcnt = netdev_refcnt_read(dev); 6281 refcnt = netdev_refcnt_read(dev);
6271 6282
6272 while (refcnt != 0) { 6283 while (refcnt != 0) {
6273 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 6284 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
6274 rtnl_lock(); 6285 rtnl_lock();
6275 6286
6276 /* Rebroadcast unregister notification */ 6287 /* Rebroadcast unregister notification */
6277 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 6288 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6278 6289
6279 __rtnl_unlock(); 6290 __rtnl_unlock();
6280 rcu_barrier(); 6291 rcu_barrier();
6281 rtnl_lock(); 6292 rtnl_lock();
6282 6293
6283 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); 6294 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6284 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 6295 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
6285 &dev->state)) { 6296 &dev->state)) {
6286 /* We must not have linkwatch events 6297 /* We must not have linkwatch events
6287 * pending on unregister. If this 6298 * pending on unregister. If this
6288 * happens, we simply run the queue 6299 * happens, we simply run the queue
6289 * unscheduled, resulting in a noop 6300 * unscheduled, resulting in a noop
6290 * for this device. 6301 * for this device.
6291 */ 6302 */
6292 linkwatch_run_queue(); 6303 linkwatch_run_queue();
6293 } 6304 }
6294 6305
6295 __rtnl_unlock(); 6306 __rtnl_unlock();
6296 6307
6297 rebroadcast_time = jiffies; 6308 rebroadcast_time = jiffies;
6298 } 6309 }
6299 6310
6300 msleep(250); 6311 msleep(250);
6301 6312
6302 refcnt = netdev_refcnt_read(dev); 6313 refcnt = netdev_refcnt_read(dev);
6303 6314
6304 if (time_after(jiffies, warning_time + 10 * HZ)) { 6315 if (time_after(jiffies, warning_time + 10 * HZ)) {
6305 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", 6316 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
6306 dev->name, refcnt); 6317 dev->name, refcnt);
6307 warning_time = jiffies; 6318 warning_time = jiffies;
6308 } 6319 }
6309 } 6320 }
6310 } 6321 }
6311 6322
6312 /* The sequence is: 6323 /* The sequence is:
6313 * 6324 *
6314 * rtnl_lock(); 6325 * rtnl_lock();
6315 * ... 6326 * ...
6316 * register_netdevice(x1); 6327 * register_netdevice(x1);
6317 * register_netdevice(x2); 6328 * register_netdevice(x2);
6318 * ... 6329 * ...
6319 * unregister_netdevice(y1); 6330 * unregister_netdevice(y1);
6320 * unregister_netdevice(y2); 6331 * unregister_netdevice(y2);
6321 * ... 6332 * ...
6322 * rtnl_unlock(); 6333 * rtnl_unlock();
6323 * free_netdev(y1); 6334 * free_netdev(y1);
6324 * free_netdev(y2); 6335 * free_netdev(y2);
6325 * 6336 *
6326 * We are invoked by rtnl_unlock(). 6337 * We are invoked by rtnl_unlock().
6327 * This allows us to deal with problems: 6338 * This allows us to deal with problems:
6328 * 1) We can delete sysfs objects which invoke hotplug 6339 * 1) We can delete sysfs objects which invoke hotplug
6329 * without deadlocking with linkwatch via keventd. 6340 * without deadlocking with linkwatch via keventd.
6330 * 2) Since we run with the RTNL semaphore not held, we can sleep 6341 * 2) Since we run with the RTNL semaphore not held, we can sleep
6331 * safely in order to wait for the netdev refcnt to drop to zero. 6342 * safely in order to wait for the netdev refcnt to drop to zero.
6332 * 6343 *
6333 * We must not return until all unregister events added during 6344 * We must not return until all unregister events added during
6334 * the interval the lock was held have been completed. 6345 * the interval the lock was held have been completed.
6335 */ 6346 */
6336 void netdev_run_todo(void) 6347 void netdev_run_todo(void)
6337 { 6348 {
6338 struct list_head list; 6349 struct list_head list;
6339 6350
6340 /* Snapshot list, allow later requests */ 6351 /* Snapshot list, allow later requests */
6341 list_replace_init(&net_todo_list, &list); 6352 list_replace_init(&net_todo_list, &list);
6342 6353
6343 __rtnl_unlock(); 6354 __rtnl_unlock();
6344 6355
6345 6356
6346 /* Wait for rcu callbacks to finish before next phase */ 6357 /* Wait for rcu callbacks to finish before next phase */
6347 if (!list_empty(&list)) 6358 if (!list_empty(&list))
6348 rcu_barrier(); 6359 rcu_barrier();
6349 6360
6350 while (!list_empty(&list)) { 6361 while (!list_empty(&list)) {
6351 struct net_device *dev 6362 struct net_device *dev
6352 = list_first_entry(&list, struct net_device, todo_list); 6363 = list_first_entry(&list, struct net_device, todo_list);
6353 list_del(&dev->todo_list); 6364 list_del(&dev->todo_list);
6354 6365
6355 rtnl_lock(); 6366 rtnl_lock();
6356 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); 6367 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6357 __rtnl_unlock(); 6368 __rtnl_unlock();
6358 6369
6359 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { 6370 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
6360 pr_err("network todo '%s' but state %d\n", 6371 pr_err("network todo '%s' but state %d\n",
6361 dev->name, dev->reg_state); 6372 dev->name, dev->reg_state);
6362 dump_stack(); 6373 dump_stack();
6363 continue; 6374 continue;
6364 } 6375 }
6365 6376
6366 dev->reg_state = NETREG_UNREGISTERED; 6377 dev->reg_state = NETREG_UNREGISTERED;
6367 6378
6368 on_each_cpu(flush_backlog, dev, 1); 6379 on_each_cpu(flush_backlog, dev, 1);
6369 6380
6370 netdev_wait_allrefs(dev); 6381 netdev_wait_allrefs(dev);
6371 6382
6372 /* paranoia */ 6383 /* paranoia */
6373 BUG_ON(netdev_refcnt_read(dev)); 6384 BUG_ON(netdev_refcnt_read(dev));
6374 WARN_ON(rcu_access_pointer(dev->ip_ptr)); 6385 WARN_ON(rcu_access_pointer(dev->ip_ptr));
6375 WARN_ON(rcu_access_pointer(dev->ip6_ptr)); 6386 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
6376 WARN_ON(dev->dn_ptr); 6387 WARN_ON(dev->dn_ptr);
6377 6388
6378 if (dev->destructor) 6389 if (dev->destructor)
6379 dev->destructor(dev); 6390 dev->destructor(dev);
6380 6391
6381 /* Free network device */ 6392 /* Free network device */
6382 kobject_put(&dev->dev.kobj); 6393 kobject_put(&dev->dev.kobj);
6383 } 6394 }
6384 } 6395 }
6385 6396
6386 /* Convert net_device_stats to rtnl_link_stats64. They have the same 6397 /* Convert net_device_stats to rtnl_link_stats64. They have the same
6387 * fields in the same order, with only the type differing. 6398 * fields in the same order, with only the type differing.
6388 */ 6399 */
6389 void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, 6400 void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
6390 const struct net_device_stats *netdev_stats) 6401 const struct net_device_stats *netdev_stats)
6391 { 6402 {
6392 #if BITS_PER_LONG == 64 6403 #if BITS_PER_LONG == 64
6393 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); 6404 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
6394 memcpy(stats64, netdev_stats, sizeof(*stats64)); 6405 memcpy(stats64, netdev_stats, sizeof(*stats64));
6395 #else 6406 #else
6396 size_t i, n = sizeof(*stats64) / sizeof(u64); 6407 size_t i, n = sizeof(*stats64) / sizeof(u64);
6397 const unsigned long *src = (const unsigned long *)netdev_stats; 6408 const unsigned long *src = (const unsigned long *)netdev_stats;
6398 u64 *dst = (u64 *)stats64; 6409 u64 *dst = (u64 *)stats64;
6399 6410
6400 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) != 6411 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
6401 sizeof(*stats64) / sizeof(u64)); 6412 sizeof(*stats64) / sizeof(u64));
6402 for (i = 0; i < n; i++) 6413 for (i = 0; i < n; i++)
6403 dst[i] = src[i]; 6414 dst[i] = src[i];
6404 #endif 6415 #endif
6405 } 6416 }
6406 EXPORT_SYMBOL(netdev_stats_to_stats64); 6417 EXPORT_SYMBOL(netdev_stats_to_stats64);
6407 6418
6408 /** 6419 /**
6409 * dev_get_stats - get network device statistics 6420 * dev_get_stats - get network device statistics
6410 * @dev: device to get statistics from 6421 * @dev: device to get statistics from
6411 * @storage: place to store stats 6422 * @storage: place to store stats
6412 * 6423 *
6413 * Get network statistics from device. Return @storage. 6424 * Get network statistics from device. Return @storage.
6414 * The device driver may provide its own method by setting 6425 * The device driver may provide its own method by setting
6415 * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats; 6426 * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
6416 * otherwise the internal statistics structure is used. 6427 * otherwise the internal statistics structure is used.
6417 */ 6428 */
6418 struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, 6429 struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
6419 struct rtnl_link_stats64 *storage) 6430 struct rtnl_link_stats64 *storage)
6420 { 6431 {
6421 const struct net_device_ops *ops = dev->netdev_ops; 6432 const struct net_device_ops *ops = dev->netdev_ops;
6422 6433
6423 if (ops->ndo_get_stats64) { 6434 if (ops->ndo_get_stats64) {
6424 memset(storage, 0, sizeof(*storage)); 6435 memset(storage, 0, sizeof(*storage));
6425 ops->ndo_get_stats64(dev, storage); 6436 ops->ndo_get_stats64(dev, storage);
6426 } else if (ops->ndo_get_stats) { 6437 } else if (ops->ndo_get_stats) {
6427 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); 6438 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
6428 } else { 6439 } else {
6429 netdev_stats_to_stats64(storage, &dev->stats); 6440 netdev_stats_to_stats64(storage, &dev->stats);
6430 } 6441 }
6431 storage->rx_dropped += atomic_long_read(&dev->rx_dropped); 6442 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
6432 return storage; 6443 return storage;
6433 } 6444 }
6434 EXPORT_SYMBOL(dev_get_stats); 6445 EXPORT_SYMBOL(dev_get_stats);
6435 6446
6436 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) 6447 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
6437 { 6448 {
6438 struct netdev_queue *queue = dev_ingress_queue(dev); 6449 struct netdev_queue *queue = dev_ingress_queue(dev);
6439 6450
6440 #ifdef CONFIG_NET_CLS_ACT 6451 #ifdef CONFIG_NET_CLS_ACT
6441 if (queue) 6452 if (queue)
6442 return queue; 6453 return queue;
6443 queue = kzalloc(sizeof(*queue), GFP_KERNEL); 6454 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
6444 if (!queue) 6455 if (!queue)
6445 return NULL; 6456 return NULL;
6446 netdev_init_one_queue(dev, queue, NULL); 6457 netdev_init_one_queue(dev, queue, NULL);
6447 queue->qdisc = &noop_qdisc; 6458 queue->qdisc = &noop_qdisc;
6448 queue->qdisc_sleeping = &noop_qdisc; 6459 queue->qdisc_sleeping = &noop_qdisc;
6449 rcu_assign_pointer(dev->ingress_queue, queue); 6460 rcu_assign_pointer(dev->ingress_queue, queue);
6450 #endif 6461 #endif
6451 return queue; 6462 return queue;
6452 } 6463 }
6453 6464
6454 static const struct ethtool_ops default_ethtool_ops; 6465 static const struct ethtool_ops default_ethtool_ops;
6455 6466
6456 void netdev_set_default_ethtool_ops(struct net_device *dev, 6467 void netdev_set_default_ethtool_ops(struct net_device *dev,
6457 const struct ethtool_ops *ops) 6468 const struct ethtool_ops *ops)
6458 { 6469 {
6459 if (dev->ethtool_ops == &default_ethtool_ops) 6470 if (dev->ethtool_ops == &default_ethtool_ops)
6460 dev->ethtool_ops = ops; 6471 dev->ethtool_ops = ops;
6461 } 6472 }
6462 EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); 6473 EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
6463 6474
6464 /** 6475 /**
6465 * alloc_netdev_mqs - allocate network device 6476 * alloc_netdev_mqs - allocate network device
6466 * @sizeof_priv: size of private data to allocate space for 6477 * @sizeof_priv: size of private data to allocate space for
6467 * @name: device name format string 6478 * @name: device name format string
6468 * @setup: callback to initialize device 6479 * @setup: callback to initialize device
6469 * @txqs: the number of TX subqueues to allocate 6480 * @txqs: the number of TX subqueues to allocate
6470 * @rxqs: the number of RX subqueues to allocate 6481 * @rxqs: the number of RX subqueues to allocate
6471 * 6482 *
6472 * Allocates a struct net_device with private data area for driver use 6483 * Allocates a struct net_device with private data area for driver use
6473 * and performs basic initialization. Also allocates subquue structs 6484 * and performs basic initialization. Also allocates subquue structs
6474 * for each queue on the device. 6485 * for each queue on the device.
6475 */ 6486 */
6476 struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, 6487 struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6477 void (*setup)(struct net_device *), 6488 void (*setup)(struct net_device *),
6478 unsigned int txqs, unsigned int rxqs) 6489 unsigned int txqs, unsigned int rxqs)
6479 { 6490 {
6480 struct net_device *dev; 6491 struct net_device *dev;
6481 size_t alloc_size; 6492 size_t alloc_size;
6482 struct net_device *p; 6493 struct net_device *p;
6483 6494
6484 BUG_ON(strlen(name) >= sizeof(dev->name)); 6495 BUG_ON(strlen(name) >= sizeof(dev->name));
6485 6496
6486 if (txqs < 1) { 6497 if (txqs < 1) {
6487 pr_err("alloc_netdev: Unable to allocate device with zero queues\n"); 6498 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
6488 return NULL; 6499 return NULL;
6489 } 6500 }
6490 6501
6491 #ifdef CONFIG_RPS 6502 #ifdef CONFIG_RPS
6492 if (rxqs < 1) { 6503 if (rxqs < 1) {
6493 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); 6504 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
6494 return NULL; 6505 return NULL;
6495 } 6506 }
6496 #endif 6507 #endif
6497 6508
6498 alloc_size = sizeof(struct net_device); 6509 alloc_size = sizeof(struct net_device);
6499 if (sizeof_priv) { 6510 if (sizeof_priv) {
6500 /* ensure 32-byte alignment of private area */ 6511 /* ensure 32-byte alignment of private area */
6501 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN); 6512 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
6502 alloc_size += sizeof_priv; 6513 alloc_size += sizeof_priv;
6503 } 6514 }
6504 /* ensure 32-byte alignment of whole construct */ 6515 /* ensure 32-byte alignment of whole construct */
6505 alloc_size += NETDEV_ALIGN - 1; 6516 alloc_size += NETDEV_ALIGN - 1;
6506 6517
6507 p = kzalloc(alloc_size, GFP_KERNEL); 6518 p = kzalloc(alloc_size, GFP_KERNEL);
6508 if (!p) 6519 if (!p)
6509 return NULL; 6520 return NULL;
6510 6521
6511 dev = PTR_ALIGN(p, NETDEV_ALIGN); 6522 dev = PTR_ALIGN(p, NETDEV_ALIGN);
6512 dev->padded = (char *)dev - (char *)p; 6523 dev->padded = (char *)dev - (char *)p;
6513 6524
6514 dev->pcpu_refcnt = alloc_percpu(int); 6525 dev->pcpu_refcnt = alloc_percpu(int);
6515 if (!dev->pcpu_refcnt) 6526 if (!dev->pcpu_refcnt)
6516 goto free_p; 6527 goto free_p;
6517 6528
6518 if (dev_addr_init(dev)) 6529 if (dev_addr_init(dev))
6519 goto free_pcpu; 6530 goto free_pcpu;
6520 6531
6521 dev_mc_init(dev); 6532 dev_mc_init(dev);
6522 dev_uc_init(dev); 6533 dev_uc_init(dev);
6523 6534
6524 dev_net_set(dev, &init_net); 6535 dev_net_set(dev, &init_net);
6525 6536
6526 dev->gso_max_size = GSO_MAX_SIZE; 6537 dev->gso_max_size = GSO_MAX_SIZE;
6527 dev->gso_max_segs = GSO_MAX_SEGS; 6538 dev->gso_max_segs = GSO_MAX_SEGS;
6528 6539
6529 INIT_LIST_HEAD(&dev->napi_list); 6540 INIT_LIST_HEAD(&dev->napi_list);
6530 INIT_LIST_HEAD(&dev->unreg_list); 6541 INIT_LIST_HEAD(&dev->unreg_list);
6531 INIT_LIST_HEAD(&dev->link_watch_list); 6542 INIT_LIST_HEAD(&dev->link_watch_list);
6532 INIT_LIST_HEAD(&dev->upper_dev_list); 6543 INIT_LIST_HEAD(&dev->upper_dev_list);
6533 dev->priv_flags = IFF_XMIT_DST_RELEASE; 6544 dev->priv_flags = IFF_XMIT_DST_RELEASE;
6534 setup(dev); 6545 setup(dev);
6535 6546
6536 dev->num_tx_queues = txqs; 6547 dev->num_tx_queues = txqs;
6537 dev->real_num_tx_queues = txqs; 6548 dev->real_num_tx_queues = txqs;
6538 if (netif_alloc_netdev_queues(dev)) 6549 if (netif_alloc_netdev_queues(dev))
6539 goto free_all; 6550 goto free_all;
6540 6551
6541 #ifdef CONFIG_RPS 6552 #ifdef CONFIG_RPS
6542 dev->num_rx_queues = rxqs; 6553 dev->num_rx_queues = rxqs;
6543 dev->real_num_rx_queues = rxqs; 6554 dev->real_num_rx_queues = rxqs;
6544 if (netif_alloc_rx_queues(dev)) 6555 if (netif_alloc_rx_queues(dev))
6545 goto free_all; 6556 goto free_all;
6546 #endif 6557 #endif
6547 6558
6548 strcpy(dev->name, name); 6559 strcpy(dev->name, name);
6549 dev->group = INIT_NETDEV_GROUP; 6560 dev->group = INIT_NETDEV_GROUP;
6550 if (!dev->ethtool_ops) 6561 if (!dev->ethtool_ops)
6551 dev->ethtool_ops = &default_ethtool_ops; 6562 dev->ethtool_ops = &default_ethtool_ops;
6552 return dev; 6563 return dev;
6553 6564
6554 free_all: 6565 free_all:
6555 free_netdev(dev); 6566 free_netdev(dev);
6556 return NULL; 6567 return NULL;
6557 6568
6558 free_pcpu: 6569 free_pcpu:
6559 free_percpu(dev->pcpu_refcnt); 6570 free_percpu(dev->pcpu_refcnt);
6560 kfree(dev->_tx); 6571 kfree(dev->_tx);
6561 #ifdef CONFIG_RPS 6572 #ifdef CONFIG_RPS
6562 kfree(dev->_rx); 6573 kfree(dev->_rx);
6563 #endif 6574 #endif
6564 6575
6565 free_p: 6576 free_p:
6566 kfree(p); 6577 kfree(p);
6567 return NULL; 6578 return NULL;
6568 } 6579 }
6569 EXPORT_SYMBOL(alloc_netdev_mqs); 6580 EXPORT_SYMBOL(alloc_netdev_mqs);
6570 6581
6571 /** 6582 /**
6572 * free_netdev - free network device 6583 * free_netdev - free network device
6573 * @dev: device 6584 * @dev: device
6574 * 6585 *
6575 * This function does the last stage of destroying an allocated device 6586 * This function does the last stage of destroying an allocated device
6576 * interface. The reference to the device object is released. 6587 * interface. The reference to the device object is released.
6577 * If this is the last reference then it will be freed. 6588 * If this is the last reference then it will be freed.
6578 */ 6589 */
6579 void free_netdev(struct net_device *dev) 6590 void free_netdev(struct net_device *dev)
6580 { 6591 {
6581 struct napi_struct *p, *n; 6592 struct napi_struct *p, *n;
6582 6593
6583 release_net(dev_net(dev)); 6594 release_net(dev_net(dev));
6584 6595
6585 kfree(dev->_tx); 6596 kfree(dev->_tx);
6586 #ifdef CONFIG_RPS 6597 #ifdef CONFIG_RPS
6587 kfree(dev->_rx); 6598 kfree(dev->_rx);
6588 #endif 6599 #endif
6589 6600
6590 kfree(rcu_dereference_protected(dev->ingress_queue, 1)); 6601 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
6591 6602
6592 /* Flush device addresses */ 6603 /* Flush device addresses */
6593 dev_addr_flush(dev); 6604 dev_addr_flush(dev);
6594 6605
6595 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) 6606 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
6596 netif_napi_del(p); 6607 netif_napi_del(p);
6597 6608
6598 free_percpu(dev->pcpu_refcnt); 6609 free_percpu(dev->pcpu_refcnt);
6599 dev->pcpu_refcnt = NULL; 6610 dev->pcpu_refcnt = NULL;
6600 6611
6601 /* Compatibility with error handling in drivers */ 6612 /* Compatibility with error handling in drivers */
6602 if (dev->reg_state == NETREG_UNINITIALIZED) { 6613 if (dev->reg_state == NETREG_UNINITIALIZED) {
6603 kfree((char *)dev - dev->padded); 6614 kfree((char *)dev - dev->padded);
6604 return; 6615 return;
6605 } 6616 }
6606 6617
6607 BUG_ON(dev->reg_state != NETREG_UNREGISTERED); 6618 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
6608 dev->reg_state = NETREG_RELEASED; 6619 dev->reg_state = NETREG_RELEASED;
6609 6620
6610 /* will free via device release */ 6621 /* will free via device release */
6611 put_device(&dev->dev); 6622 put_device(&dev->dev);
6612 } 6623 }
6613 EXPORT_SYMBOL(free_netdev); 6624 EXPORT_SYMBOL(free_netdev);
6614 6625
6615 /** 6626 /**
6616 * synchronize_net - Synchronize with packet receive processing 6627 * synchronize_net - Synchronize with packet receive processing
6617 * 6628 *
6618 * Wait for packets currently being received to be done. 6629 * Wait for packets currently being received to be done.
6619 * Does not block later packets from starting. 6630 * Does not block later packets from starting.
6620 */ 6631 */
6621 void synchronize_net(void) 6632 void synchronize_net(void)
6622 { 6633 {
6623 might_sleep(); 6634 might_sleep();
6624 if (rtnl_is_locked()) 6635 if (rtnl_is_locked())
6625 synchronize_rcu_expedited(); 6636 synchronize_rcu_expedited();
6626 else 6637 else
6627 synchronize_rcu(); 6638 synchronize_rcu();
6628 } 6639 }
6629 EXPORT_SYMBOL(synchronize_net); 6640 EXPORT_SYMBOL(synchronize_net);
6630 6641
6631 /** 6642 /**
6632 * unregister_netdevice_queue - remove device from the kernel 6643 * unregister_netdevice_queue - remove device from the kernel
6633 * @dev: device 6644 * @dev: device
6634 * @head: list 6645 * @head: list
6635 * 6646 *
6636 * This function shuts down a device interface and removes it 6647 * This function shuts down a device interface and removes it
6637 * from the kernel tables. 6648 * from the kernel tables.
6638 * If head not NULL, device is queued to be unregistered later. 6649 * If head not NULL, device is queued to be unregistered later.
6639 * 6650 *
6640 * Callers must hold the rtnl semaphore. You may want 6651 * Callers must hold the rtnl semaphore. You may want
6641 * unregister_netdev() instead of this. 6652 * unregister_netdev() instead of this.
6642 */ 6653 */
6643 6654
6644 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) 6655 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
6645 { 6656 {
6646 ASSERT_RTNL(); 6657 ASSERT_RTNL();
6647 6658
6648 if (head) { 6659 if (head) {
6649 list_move_tail(&dev->unreg_list, head); 6660 list_move_tail(&dev->unreg_list, head);
6650 } else { 6661 } else {
6651 rollback_registered(dev); 6662 rollback_registered(dev);
6652 /* Finish processing unregister after unlock */ 6663 /* Finish processing unregister after unlock */
6653 net_set_todo(dev); 6664 net_set_todo(dev);
6654 } 6665 }
6655 } 6666 }
6656 EXPORT_SYMBOL(unregister_netdevice_queue); 6667 EXPORT_SYMBOL(unregister_netdevice_queue);
6657 6668
6658 /** 6669 /**
6659 * unregister_netdevice_many - unregister many devices 6670 * unregister_netdevice_many - unregister many devices
6660 * @head: list of devices 6671 * @head: list of devices
6661 */ 6672 */
6662 void unregister_netdevice_many(struct list_head *head) 6673 void unregister_netdevice_many(struct list_head *head)
6663 { 6674 {
6664 struct net_device *dev; 6675 struct net_device *dev;
6665 6676
6666 if (!list_empty(head)) { 6677 if (!list_empty(head)) {
6667 rollback_registered_many(head); 6678 rollback_registered_many(head);
6668 list_for_each_entry(dev, head, unreg_list) 6679 list_for_each_entry(dev, head, unreg_list)
6669 net_set_todo(dev); 6680 net_set_todo(dev);
6670 } 6681 }
6671 } 6682 }
6672 EXPORT_SYMBOL(unregister_netdevice_many); 6683 EXPORT_SYMBOL(unregister_netdevice_many);
6673 6684
6674 /** 6685 /**
6675 * unregister_netdev - remove device from the kernel 6686 * unregister_netdev - remove device from the kernel
6676 * @dev: device 6687 * @dev: device
6677 * 6688 *
6678 * This function shuts down a device interface and removes it 6689 * This function shuts down a device interface and removes it
6679 * from the kernel tables. 6690 * from the kernel tables.
6680 * 6691 *
6681 * This is just a wrapper for unregister_netdevice that takes 6692 * This is just a wrapper for unregister_netdevice that takes
6682 * the rtnl semaphore. In general you want to use this and not 6693 * the rtnl semaphore. In general you want to use this and not
6683 * unregister_netdevice. 6694 * unregister_netdevice.
6684 */ 6695 */
6685 void unregister_netdev(struct net_device *dev) 6696 void unregister_netdev(struct net_device *dev)
6686 { 6697 {
6687 rtnl_lock(); 6698 rtnl_lock();
6688 unregister_netdevice(dev); 6699 unregister_netdevice(dev);
6689 rtnl_unlock(); 6700 rtnl_unlock();
6690 } 6701 }
6691 EXPORT_SYMBOL(unregister_netdev); 6702 EXPORT_SYMBOL(unregister_netdev);
6692 6703
6693 /** 6704 /**
6694 * dev_change_net_namespace - move device to different nethost namespace 6705 * dev_change_net_namespace - move device to different nethost namespace
6695 * @dev: device 6706 * @dev: device
6696 * @net: network namespace 6707 * @net: network namespace
6697 * @pat: If not NULL name pattern to try if the current device name 6708 * @pat: If not NULL name pattern to try if the current device name
6698 * is already taken in the destination network namespace. 6709 * is already taken in the destination network namespace.
6699 * 6710 *
6700 * This function shuts down a device interface and moves it 6711 * This function shuts down a device interface and moves it
6701 * to a new network namespace. On success 0 is returned, on 6712 * to a new network namespace. On success 0 is returned, on
6702 * a failure a netagive errno code is returned. 6713 * a failure a netagive errno code is returned.
6703 * 6714 *
6704 * Callers must hold the rtnl semaphore. 6715 * Callers must hold the rtnl semaphore.
6705 */ 6716 */
6706 6717
6707 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) 6718 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
6708 { 6719 {
6709 int err; 6720 int err;
6710 6721
6711 ASSERT_RTNL(); 6722 ASSERT_RTNL();
6712 6723
6713 /* Don't allow namespace local devices to be moved. */ 6724 /* Don't allow namespace local devices to be moved. */
6714 err = -EINVAL; 6725 err = -EINVAL;
6715 if (dev->features & NETIF_F_NETNS_LOCAL) 6726 if (dev->features & NETIF_F_NETNS_LOCAL)
6716 goto out; 6727 goto out;
6717 6728
6718 /* Ensure the device has been registrered */ 6729 /* Ensure the device has been registrered */
6719 if (dev->reg_state != NETREG_REGISTERED) 6730 if (dev->reg_state != NETREG_REGISTERED)
6720 goto out; 6731 goto out;
6721 6732
6722 /* Get out if there is nothing todo */ 6733 /* Get out if there is nothing todo */
6723 err = 0; 6734 err = 0;
6724 if (net_eq(dev_net(dev), net)) 6735 if (net_eq(dev_net(dev), net))
6725 goto out; 6736 goto out;
6726 6737
6727 /* Pick the destination device name, and ensure 6738 /* Pick the destination device name, and ensure
6728 * we can use it in the destination network namespace. 6739 * we can use it in the destination network namespace.
6729 */ 6740 */
6730 err = -EEXIST; 6741 err = -EEXIST;
6731 if (__dev_get_by_name(net, dev->name)) { 6742 if (__dev_get_by_name(net, dev->name)) {
6732 /* We get here if we can't use the current device name */ 6743 /* We get here if we can't use the current device name */
6733 if (!pat) 6744 if (!pat)
6734 goto out; 6745 goto out;
6735 if (dev_get_valid_name(net, dev, pat) < 0) 6746 if (dev_get_valid_name(net, dev, pat) < 0)
6736 goto out; 6747 goto out;
6737 } 6748 }
6738 6749
6739 /* 6750 /*
6740 * And now a mini version of register_netdevice unregister_netdevice. 6751 * And now a mini version of register_netdevice unregister_netdevice.
6741 */ 6752 */
6742 6753
6743 /* If device is running close it first. */ 6754 /* If device is running close it first. */
6744 dev_close(dev); 6755 dev_close(dev);
6745 6756
6746 /* And unlink it from device chain */ 6757 /* And unlink it from device chain */
6747 err = -ENODEV; 6758 err = -ENODEV;
6748 unlist_netdevice(dev); 6759 unlist_netdevice(dev);
6749 6760
6750 synchronize_net(); 6761 synchronize_net();
6751 6762
6752 /* Shutdown queueing discipline. */ 6763 /* Shutdown queueing discipline. */
6753 dev_shutdown(dev); 6764 dev_shutdown(dev);
6754 6765
6755 /* Notify protocols, that we are about to destroy 6766 /* Notify protocols, that we are about to destroy
6756 this device. They should clean all the things. 6767 this device. They should clean all the things.
6757 6768
6758 Note that dev->reg_state stays at NETREG_REGISTERED. 6769 Note that dev->reg_state stays at NETREG_REGISTERED.
6759 This is wanted because this way 8021q and macvlan know 6770 This is wanted because this way 8021q and macvlan know
6760 the device is just moving and can keep their slaves up. 6771 the device is just moving and can keep their slaves up.
6761 */ 6772 */
6762 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 6773 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6763 rcu_barrier(); 6774 rcu_barrier();
6764 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); 6775 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6765 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); 6776 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
6766 6777
6767 /* 6778 /*
6768 * Flush the unicast and multicast chains 6779 * Flush the unicast and multicast chains
6769 */ 6780 */
6770 dev_uc_flush(dev); 6781 dev_uc_flush(dev);
6771 dev_mc_flush(dev); 6782 dev_mc_flush(dev);
6772 6783
6773 /* Send a netdev-removed uevent to the old namespace */ 6784 /* Send a netdev-removed uevent to the old namespace */
6774 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); 6785 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
6775 6786
6776 /* Actually switch the network namespace */ 6787 /* Actually switch the network namespace */
6777 dev_net_set(dev, net); 6788 dev_net_set(dev, net);
6778 6789
6779 /* If there is an ifindex conflict assign a new one */ 6790 /* If there is an ifindex conflict assign a new one */
6780 if (__dev_get_by_index(net, dev->ifindex)) { 6791 if (__dev_get_by_index(net, dev->ifindex)) {
6781 int iflink = (dev->iflink == dev->ifindex); 6792 int iflink = (dev->iflink == dev->ifindex);
6782 dev->ifindex = dev_new_index(net); 6793 dev->ifindex = dev_new_index(net);
6783 if (iflink) 6794 if (iflink)
6784 dev->iflink = dev->ifindex; 6795 dev->iflink = dev->ifindex;
6785 } 6796 }
6786 6797
6787 /* Send a netdev-add uevent to the new namespace */ 6798 /* Send a netdev-add uevent to the new namespace */
6788 kobject_uevent(&dev->dev.kobj, KOBJ_ADD); 6799 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
6789 6800
6790 /* Fixup kobjects */ 6801 /* Fixup kobjects */
6791 err = device_rename(&dev->dev, dev->name); 6802 err = device_rename(&dev->dev, dev->name);
6792 WARN_ON(err); 6803 WARN_ON(err);
6793 6804
6794 /* Add the device back in the hashes */ 6805 /* Add the device back in the hashes */
6795 list_netdevice(dev); 6806 list_netdevice(dev);
6796 6807
6797 /* Notify protocols, that a new device appeared. */ 6808 /* Notify protocols, that a new device appeared. */
6798 call_netdevice_notifiers(NETDEV_REGISTER, dev); 6809 call_netdevice_notifiers(NETDEV_REGISTER, dev);
6799 6810
6800 /* 6811 /*
6801 * Prevent userspace races by waiting until the network 6812 * Prevent userspace races by waiting until the network
6802 * device is fully setup before sending notifications. 6813 * device is fully setup before sending notifications.
6803 */ 6814 */
6804 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); 6815 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
6805 6816
6806 synchronize_net(); 6817 synchronize_net();
6807 err = 0; 6818 err = 0;
6808 out: 6819 out:
6809 return err; 6820 return err;
6810 } 6821 }
6811 EXPORT_SYMBOL_GPL(dev_change_net_namespace); 6822 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
6812 6823
6813 static int dev_cpu_callback(struct notifier_block *nfb, 6824 static int dev_cpu_callback(struct notifier_block *nfb,
6814 unsigned long action, 6825 unsigned long action,
6815 void *ocpu) 6826 void *ocpu)
6816 { 6827 {
6817 struct sk_buff **list_skb; 6828 struct sk_buff **list_skb;
6818 struct sk_buff *skb; 6829 struct sk_buff *skb;
6819 unsigned int cpu, oldcpu = (unsigned long)ocpu; 6830 unsigned int cpu, oldcpu = (unsigned long)ocpu;
6820 struct softnet_data *sd, *oldsd; 6831 struct softnet_data *sd, *oldsd;
6821 6832
6822 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) 6833 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
6823 return NOTIFY_OK; 6834 return NOTIFY_OK;
6824 6835
6825 local_irq_disable(); 6836 local_irq_disable();
6826 cpu = smp_processor_id(); 6837 cpu = smp_processor_id();
6827 sd = &per_cpu(softnet_data, cpu); 6838 sd = &per_cpu(softnet_data, cpu);
6828 oldsd = &per_cpu(softnet_data, oldcpu); 6839 oldsd = &per_cpu(softnet_data, oldcpu);
6829 6840
6830 /* Find end of our completion_queue. */ 6841 /* Find end of our completion_queue. */
6831 list_skb = &sd->completion_queue; 6842 list_skb = &sd->completion_queue;
6832 while (*list_skb) 6843 while (*list_skb)
6833 list_skb = &(*list_skb)->next; 6844 list_skb = &(*list_skb)->next;
6834 /* Append completion queue from offline CPU. */ 6845 /* Append completion queue from offline CPU. */
6835 *list_skb = oldsd->completion_queue; 6846 *list_skb = oldsd->completion_queue;
6836 oldsd->completion_queue = NULL; 6847 oldsd->completion_queue = NULL;
6837 6848
6838 /* Append output queue from offline CPU. */ 6849 /* Append output queue from offline CPU. */
6839 if (oldsd->output_queue) { 6850 if (oldsd->output_queue) {
6840 *sd->output_queue_tailp = oldsd->output_queue; 6851 *sd->output_queue_tailp = oldsd->output_queue;
6841 sd->output_queue_tailp = oldsd->output_queue_tailp; 6852 sd->output_queue_tailp = oldsd->output_queue_tailp;
6842 oldsd->output_queue = NULL; 6853 oldsd->output_queue = NULL;
6843 oldsd->output_queue_tailp = &oldsd->output_queue; 6854 oldsd->output_queue_tailp = &oldsd->output_queue;
6844 } 6855 }
6845 /* Append NAPI poll list from offline CPU. */ 6856 /* Append NAPI poll list from offline CPU. */
6846 if (!list_empty(&oldsd->poll_list)) { 6857 if (!list_empty(&oldsd->poll_list)) {
6847 list_splice_init(&oldsd->poll_list, &sd->poll_list); 6858 list_splice_init(&oldsd->poll_list, &sd->poll_list);
6848 raise_softirq_irqoff(NET_RX_SOFTIRQ); 6859 raise_softirq_irqoff(NET_RX_SOFTIRQ);
6849 } 6860 }
6850 6861
6851 raise_softirq_irqoff(NET_TX_SOFTIRQ); 6862 raise_softirq_irqoff(NET_TX_SOFTIRQ);
6852 local_irq_enable(); 6863 local_irq_enable();
6853 6864
6854 /* Process offline CPU's input_pkt_queue */ 6865 /* Process offline CPU's input_pkt_queue */
6855 while ((skb = __skb_dequeue(&oldsd->process_queue))) { 6866 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
6856 netif_rx(skb); 6867 netif_rx(skb);
6857 input_queue_head_incr(oldsd); 6868 input_queue_head_incr(oldsd);
6858 } 6869 }
6859 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { 6870 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
6860 netif_rx(skb); 6871 netif_rx(skb);
6861 input_queue_head_incr(oldsd); 6872 input_queue_head_incr(oldsd);
6862 } 6873 }
6863 6874
6864 return NOTIFY_OK; 6875 return NOTIFY_OK;
6865 } 6876 }
6866 6877
6867 6878
6868 /** 6879 /**
6869 * netdev_increment_features - increment feature set by one 6880 * netdev_increment_features - increment feature set by one
6870 * @all: current feature set 6881 * @all: current feature set
6871 * @one: new feature set 6882 * @one: new feature set
6872 * @mask: mask feature set 6883 * @mask: mask feature set
6873 * 6884 *
6874 * Computes a new feature set after adding a device with feature set 6885 * Computes a new feature set after adding a device with feature set
6875 * @one to the master device with current feature set @all. Will not 6886 * @one to the master device with current feature set @all. Will not
6876 * enable anything that is off in @mask. Returns the new feature set. 6887 * enable anything that is off in @mask. Returns the new feature set.
6877 */ 6888 */
6878 netdev_features_t netdev_increment_features(netdev_features_t all, 6889 netdev_features_t netdev_increment_features(netdev_features_t all,
6879 netdev_features_t one, netdev_features_t mask) 6890 netdev_features_t one, netdev_features_t mask)
6880 { 6891 {
6881 if (mask & NETIF_F_GEN_CSUM) 6892 if (mask & NETIF_F_GEN_CSUM)
6882 mask |= NETIF_F_ALL_CSUM; 6893 mask |= NETIF_F_ALL_CSUM;
6883 mask |= NETIF_F_VLAN_CHALLENGED; 6894 mask |= NETIF_F_VLAN_CHALLENGED;
6884 6895
6885 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; 6896 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
6886 all &= one | ~NETIF_F_ALL_FOR_ALL; 6897 all &= one | ~NETIF_F_ALL_FOR_ALL;
6887 6898
6888 /* If one device supports hw checksumming, set for all. */ 6899 /* If one device supports hw checksumming, set for all. */
6889 if (all & NETIF_F_GEN_CSUM) 6900 if (all & NETIF_F_GEN_CSUM)
6890 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); 6901 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
6891 6902
6892 return all; 6903 return all;
6893 } 6904 }
6894 EXPORT_SYMBOL(netdev_increment_features); 6905 EXPORT_SYMBOL(netdev_increment_features);
6895 6906
6896 static struct hlist_head *netdev_create_hash(void) 6907 static struct hlist_head *netdev_create_hash(void)
6897 { 6908 {
6898 int i; 6909 int i;
6899 struct hlist_head *hash; 6910 struct hlist_head *hash;
6900 6911
6901 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL); 6912 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
6902 if (hash != NULL) 6913 if (hash != NULL)
6903 for (i = 0; i < NETDEV_HASHENTRIES; i++) 6914 for (i = 0; i < NETDEV_HASHENTRIES; i++)
6904 INIT_HLIST_HEAD(&hash[i]); 6915 INIT_HLIST_HEAD(&hash[i]);
6905 6916
6906 return hash; 6917 return hash;
6907 } 6918 }
6908 6919
6909 /* Initialize per network namespace state */ 6920 /* Initialize per network namespace state */
6910 static int __net_init netdev_init(struct net *net) 6921 static int __net_init netdev_init(struct net *net)
6911 { 6922 {
6912 if (net != &init_net) 6923 if (net != &init_net)
6913 INIT_LIST_HEAD(&net->dev_base_head); 6924 INIT_LIST_HEAD(&net->dev_base_head);
6914 6925
6915 net->dev_name_head = netdev_create_hash(); 6926 net->dev_name_head = netdev_create_hash();
6916 if (net->dev_name_head == NULL) 6927 if (net->dev_name_head == NULL)
6917 goto err_name; 6928 goto err_name;
6918 6929
6919 net->dev_index_head = netdev_create_hash(); 6930 net->dev_index_head = netdev_create_hash();
6920 if (net->dev_index_head == NULL) 6931 if (net->dev_index_head == NULL)
6921 goto err_idx; 6932 goto err_idx;
6922 6933
6923 return 0; 6934 return 0;
6924 6935
6925 err_idx: 6936 err_idx:
6926 kfree(net->dev_name_head); 6937 kfree(net->dev_name_head);
6927 err_name: 6938 err_name:
6928 return -ENOMEM; 6939 return -ENOMEM;
6929 } 6940 }
6930 6941
6931 /** 6942 /**
6932 * netdev_drivername - network driver for the device 6943 * netdev_drivername - network driver for the device
6933 * @dev: network device 6944 * @dev: network device
6934 * 6945 *
6935 * Determine network driver for device. 6946 * Determine network driver for device.
6936 */ 6947 */
6937 const char *netdev_drivername(const struct net_device *dev) 6948 const char *netdev_drivername(const struct net_device *dev)
6938 { 6949 {
6939 const struct device_driver *driver; 6950 const struct device_driver *driver;
6940 const struct device *parent; 6951 const struct device *parent;
6941 const char *empty = ""; 6952 const char *empty = "";
6942 6953
6943 parent = dev->dev.parent; 6954 parent = dev->dev.parent;
6944 if (!parent) 6955 if (!parent)
6945 return empty; 6956 return empty;
6946 6957
6947 driver = parent->driver; 6958 driver = parent->driver;
6948 if (driver && driver->name) 6959 if (driver && driver->name)
6949 return driver->name; 6960 return driver->name;
6950 return empty; 6961 return empty;
6951 } 6962 }
6952 6963
6953 static int __netdev_printk(const char *level, const struct net_device *dev, 6964 static int __netdev_printk(const char *level, const struct net_device *dev,
6954 struct va_format *vaf) 6965 struct va_format *vaf)
6955 { 6966 {
6956 int r; 6967 int r;
6957 6968
6958 if (dev && dev->dev.parent) { 6969 if (dev && dev->dev.parent) {
6959 r = dev_printk_emit(level[1] - '0', 6970 r = dev_printk_emit(level[1] - '0',
6960 dev->dev.parent, 6971 dev->dev.parent,
6961 "%s %s %s: %pV", 6972 "%s %s %s: %pV",
6962 dev_driver_string(dev->dev.parent), 6973 dev_driver_string(dev->dev.parent),
6963 dev_name(dev->dev.parent), 6974 dev_name(dev->dev.parent),
6964 netdev_name(dev), vaf); 6975 netdev_name(dev), vaf);
6965 } else if (dev) { 6976 } else if (dev) {
6966 r = printk("%s%s: %pV", level, netdev_name(dev), vaf); 6977 r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
6967 } else { 6978 } else {
6968 r = printk("%s(NULL net_device): %pV", level, vaf); 6979 r = printk("%s(NULL net_device): %pV", level, vaf);
6969 } 6980 }
6970 6981
6971 return r; 6982 return r;
6972 } 6983 }
6973 6984
6974 int netdev_printk(const char *level, const struct net_device *dev, 6985 int netdev_printk(const char *level, const struct net_device *dev,
6975 const char *format, ...) 6986 const char *format, ...)
6976 { 6987 {
6977 struct va_format vaf; 6988 struct va_format vaf;
6978 va_list args; 6989 va_list args;
6979 int r; 6990 int r;
6980 6991
6981 va_start(args, format); 6992 va_start(args, format);
6982 6993
6983 vaf.fmt = format; 6994 vaf.fmt = format;
6984 vaf.va = &args; 6995 vaf.va = &args;
6985 6996
6986 r = __netdev_printk(level, dev, &vaf); 6997 r = __netdev_printk(level, dev, &vaf);
6987 6998
6988 va_end(args); 6999 va_end(args);
6989 7000
6990 return r; 7001 return r;
6991 } 7002 }
6992 EXPORT_SYMBOL(netdev_printk); 7003 EXPORT_SYMBOL(netdev_printk);
6993 7004
6994 #define define_netdev_printk_level(func, level) \ 7005 #define define_netdev_printk_level(func, level) \
6995 int func(const struct net_device *dev, const char *fmt, ...) \ 7006 int func(const struct net_device *dev, const char *fmt, ...) \
6996 { \ 7007 { \
6997 int r; \ 7008 int r; \
6998 struct va_format vaf; \ 7009 struct va_format vaf; \
6999 va_list args; \ 7010 va_list args; \
7000 \ 7011 \
7001 va_start(args, fmt); \ 7012 va_start(args, fmt); \
7002 \ 7013 \
7003 vaf.fmt = fmt; \ 7014 vaf.fmt = fmt; \
7004 vaf.va = &args; \ 7015 vaf.va = &args; \
7005 \ 7016 \
7006 r = __netdev_printk(level, dev, &vaf); \ 7017 r = __netdev_printk(level, dev, &vaf); \
7007 \ 7018 \
7008 va_end(args); \ 7019 va_end(args); \
7009 \ 7020 \
7010 return r; \ 7021 return r; \
7011 } \ 7022 } \
7012 EXPORT_SYMBOL(func); 7023 EXPORT_SYMBOL(func);
7013 7024
7014 define_netdev_printk_level(netdev_emerg, KERN_EMERG); 7025 define_netdev_printk_level(netdev_emerg, KERN_EMERG);
7015 define_netdev_printk_level(netdev_alert, KERN_ALERT); 7026 define_netdev_printk_level(netdev_alert, KERN_ALERT);
7016 define_netdev_printk_level(netdev_crit, KERN_CRIT); 7027 define_netdev_printk_level(netdev_crit, KERN_CRIT);
7017 define_netdev_printk_level(netdev_err, KERN_ERR); 7028 define_netdev_printk_level(netdev_err, KERN_ERR);
7018 define_netdev_printk_level(netdev_warn, KERN_WARNING); 7029 define_netdev_printk_level(netdev_warn, KERN_WARNING);
7019 define_netdev_printk_level(netdev_notice, KERN_NOTICE); 7030 define_netdev_printk_level(netdev_notice, KERN_NOTICE);
7020 define_netdev_printk_level(netdev_info, KERN_INFO); 7031 define_netdev_printk_level(netdev_info, KERN_INFO);
7021 7032
7022 static void __net_exit netdev_exit(struct net *net) 7033 static void __net_exit netdev_exit(struct net *net)
7023 { 7034 {
7024 kfree(net->dev_name_head); 7035 kfree(net->dev_name_head);
7025 kfree(net->dev_index_head); 7036 kfree(net->dev_index_head);
7026 } 7037 }
7027 7038
7028 static struct pernet_operations __net_initdata netdev_net_ops = { 7039 static struct pernet_operations __net_initdata netdev_net_ops = {
7029 .init = netdev_init, 7040 .init = netdev_init,
7030 .exit = netdev_exit, 7041 .exit = netdev_exit,
7031 }; 7042 };
7032 7043
7033 static void __net_exit default_device_exit(struct net *net) 7044 static void __net_exit default_device_exit(struct net *net)
7034 { 7045 {
7035 struct net_device *dev, *aux; 7046 struct net_device *dev, *aux;
7036 /* 7047 /*
7037 * Push all migratable network devices back to the 7048 * Push all migratable network devices back to the
7038 * initial network namespace 7049 * initial network namespace
7039 */ 7050 */
7040 rtnl_lock(); 7051 rtnl_lock();
7041 for_each_netdev_safe(net, dev, aux) { 7052 for_each_netdev_safe(net, dev, aux) {
7042 int err; 7053 int err;
7043 char fb_name[IFNAMSIZ]; 7054 char fb_name[IFNAMSIZ];
7044 7055
7045 /* Ignore unmoveable devices (i.e. loopback) */ 7056 /* Ignore unmoveable devices (i.e. loopback) */
7046 if (dev->features & NETIF_F_NETNS_LOCAL) 7057 if (dev->features & NETIF_F_NETNS_LOCAL)
7047 continue; 7058 continue;
7048 7059
7049 /* Leave virtual devices for the generic cleanup */ 7060 /* Leave virtual devices for the generic cleanup */
7050 if (dev->rtnl_link_ops) 7061 if (dev->rtnl_link_ops)
7051 continue; 7062 continue;
7052 7063
7053 /* Push remaining network devices to init_net */ 7064 /* Push remaining network devices to init_net */
7054 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); 7065 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
7055 err = dev_change_net_namespace(dev, &init_net, fb_name); 7066 err = dev_change_net_namespace(dev, &init_net, fb_name);
7056 if (err) { 7067 if (err) {
7057 pr_emerg("%s: failed to move %s to init_net: %d\n", 7068 pr_emerg("%s: failed to move %s to init_net: %d\n",
7058 __func__, dev->name, err); 7069 __func__, dev->name, err);
7059 BUG(); 7070 BUG();
7060 } 7071 }
7061 } 7072 }
7062 rtnl_unlock(); 7073 rtnl_unlock();
7063 } 7074 }
7064 7075
7065 static void __net_exit default_device_exit_batch(struct list_head *net_list) 7076 static void __net_exit default_device_exit_batch(struct list_head *net_list)
7066 { 7077 {
7067 /* At exit all network devices most be removed from a network 7078 /* At exit all network devices most be removed from a network
7068 * namespace. Do this in the reverse order of registration. 7079 * namespace. Do this in the reverse order of registration.
7069 * Do this across as many network namespaces as possible to 7080 * Do this across as many network namespaces as possible to
7070 * improve batching efficiency. 7081 * improve batching efficiency.
7071 */ 7082 */
7072 struct net_device *dev; 7083 struct net_device *dev;
7073 struct net *net; 7084 struct net *net;
7074 LIST_HEAD(dev_kill_list); 7085 LIST_HEAD(dev_kill_list);
7075 7086
7076 rtnl_lock(); 7087 rtnl_lock();
7077 list_for_each_entry(net, net_list, exit_list) { 7088 list_for_each_entry(net, net_list, exit_list) {
7078 for_each_netdev_reverse(net, dev) { 7089 for_each_netdev_reverse(net, dev) {
7079 if (dev->rtnl_link_ops) 7090 if (dev->rtnl_link_ops)
7080 dev->rtnl_link_ops->dellink(dev, &dev_kill_list); 7091 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
7081 else 7092 else
7082 unregister_netdevice_queue(dev, &dev_kill_list); 7093 unregister_netdevice_queue(dev, &dev_kill_list);
7083 } 7094 }
7084 } 7095 }
7085 unregister_netdevice_many(&dev_kill_list); 7096 unregister_netdevice_many(&dev_kill_list);
7086 list_del(&dev_kill_list); 7097 list_del(&dev_kill_list);
7087 rtnl_unlock(); 7098 rtnl_unlock();
7088 } 7099 }
7089 7100
7090 static struct pernet_operations __net_initdata default_device_ops = { 7101 static struct pernet_operations __net_initdata default_device_ops = {
7091 .exit = default_device_exit, 7102 .exit = default_device_exit,
7092 .exit_batch = default_device_exit_batch, 7103 .exit_batch = default_device_exit_batch,
7093 }; 7104 };
7094 7105
7095 /* 7106 /*
7096 * Initialize the DEV module. At boot time this walks the device list and 7107 * Initialize the DEV module. At boot time this walks the device list and
7097 * unhooks any devices that fail to initialise (normally hardware not 7108 * unhooks any devices that fail to initialise (normally hardware not
7098 * present) and leaves us with a valid list of present and active devices. 7109 * present) and leaves us with a valid list of present and active devices.
7099 * 7110 *
7100 */ 7111 */
7101 7112
7102 /* 7113 /*
7103 * This is called single threaded during boot, so no need 7114 * This is called single threaded during boot, so no need
7104 * to take the rtnl semaphore. 7115 * to take the rtnl semaphore.
7105 */ 7116 */
7106 static int __init net_dev_init(void) 7117 static int __init net_dev_init(void)
7107 { 7118 {
7108 int i, rc = -ENOMEM; 7119 int i, rc = -ENOMEM;
7109 7120
7110 BUG_ON(!dev_boot_phase); 7121 BUG_ON(!dev_boot_phase);
7111 7122
7112 if (dev_proc_init()) 7123 if (dev_proc_init())
7113 goto out; 7124 goto out;
7114 7125
7115 if (netdev_kobject_init()) 7126 if (netdev_kobject_init())
7116 goto out; 7127 goto out;
7117 7128
7118 INIT_LIST_HEAD(&ptype_all); 7129 INIT_LIST_HEAD(&ptype_all);
7119 for (i = 0; i < PTYPE_HASH_SIZE; i++) 7130 for (i = 0; i < PTYPE_HASH_SIZE; i++)
7120 INIT_LIST_HEAD(&ptype_base[i]); 7131 INIT_LIST_HEAD(&ptype_base[i]);
7121 7132
7122 INIT_LIST_HEAD(&offload_base); 7133 INIT_LIST_HEAD(&offload_base);
7123 7134
7124 if (register_pernet_subsys(&netdev_net_ops)) 7135 if (register_pernet_subsys(&netdev_net_ops))
7125 goto out; 7136 goto out;
7126 7137
7127 /* 7138 /*
7128 * Initialise the packet receive queues. 7139 * Initialise the packet receive queues.
7129 */ 7140 */
7130 7141
7131 for_each_possible_cpu(i) { 7142 for_each_possible_cpu(i) {
7132 struct softnet_data *sd = &per_cpu(softnet_data, i); 7143 struct softnet_data *sd = &per_cpu(softnet_data, i);
7133 7144
7134 memset(sd, 0, sizeof(*sd)); 7145 memset(sd, 0, sizeof(*sd));
7135 skb_queue_head_init(&sd->input_pkt_queue); 7146 skb_queue_head_init(&sd->input_pkt_queue);
7136 skb_queue_head_init(&sd->process_queue); 7147 skb_queue_head_init(&sd->process_queue);
7137 sd->completion_queue = NULL; 7148 sd->completion_queue = NULL;
7138 INIT_LIST_HEAD(&sd->poll_list); 7149 INIT_LIST_HEAD(&sd->poll_list);
7139 sd->output_queue = NULL; 7150 sd->output_queue = NULL;
7140 sd->output_queue_tailp = &sd->output_queue; 7151 sd->output_queue_tailp = &sd->output_queue;
7141 #ifdef CONFIG_RPS 7152 #ifdef CONFIG_RPS
7142 sd->csd.func = rps_trigger_softirq; 7153 sd->csd.func = rps_trigger_softirq;
7143 sd->csd.info = sd; 7154 sd->csd.info = sd;
7144 sd->csd.flags = 0; 7155 sd->csd.flags = 0;
7145 sd->cpu = i; 7156 sd->cpu = i;
7146 #endif 7157 #endif
7147 7158
7148 sd->backlog.poll = process_backlog; 7159 sd->backlog.poll = process_backlog;
7149 sd->backlog.weight = weight_p; 7160 sd->backlog.weight = weight_p;
7150 sd->backlog.gro_list = NULL; 7161 sd->backlog.gro_list = NULL;
7151 sd->backlog.gro_count = 0; 7162 sd->backlog.gro_count = 0;
7152 } 7163 }
7153 7164
7154 dev_boot_phase = 0; 7165 dev_boot_phase = 0;
7155 7166
7156 /* The loopback device is special if any other network devices 7167 /* The loopback device is special if any other network devices
7157 * is present in a network namespace the loopback device must 7168 * is present in a network namespace the loopback device must
7158 * be present. Since we now dynamically allocate and free the 7169 * be present. Since we now dynamically allocate and free the
7159 * loopback device ensure this invariant is maintained by 7170 * loopback device ensure this invariant is maintained by
7160 * keeping the loopback device as the first device on the 7171 * keeping the loopback device as the first device on the
7161 * list of network devices. Ensuring the loopback devices 7172 * list of network devices. Ensuring the loopback devices
7162 * is the first device that appears and the last network device 7173 * is the first device that appears and the last network device
7163 * that disappears. 7174 * that disappears.
7164 */ 7175 */
7165 if (register_pernet_device(&loopback_net_ops)) 7176 if (register_pernet_device(&loopback_net_ops))
7166 goto out; 7177 goto out;
7167 7178
7168 if (register_pernet_device(&default_device_ops)) 7179 if (register_pernet_device(&default_device_ops))
7169 goto out; 7180 goto out;
7170 7181
7171 open_softirq(NET_TX_SOFTIRQ, net_tx_action); 7182 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
7172 open_softirq(NET_RX_SOFTIRQ, net_rx_action); 7183 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
7173 7184
7174 hotcpu_notifier(dev_cpu_callback, 0); 7185 hotcpu_notifier(dev_cpu_callback, 0);
7175 dst_init(); 7186 dst_init();
7176 dev_mcast_init(); 7187 dev_mcast_init();
7177 rc = 0; 7188 rc = 0;
7178 out: 7189 out:
7179 return rc; 7190 return rc;
7180 } 7191 }
7181 7192
7182 subsys_initcall(net_dev_init); 7193 subsys_initcall(net_dev_init);
7183 7194
net/openvswitch/datapath.c
1 /* 1 /*
2 * Copyright (c) 2007-2012 Nicira, Inc. 2 * Copyright (c) 2007-2012 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation. 6 * License as published by the Free Software Foundation.
7 * 7 *
8 * This program is distributed in the hope that it will be useful, but 8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details. 11 * General Public License for more details.
12 * 12 *
13 * You should have received a copy of the GNU General Public License 13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software 14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA 16 * 02110-1301, USA
17 */ 17 */
18 18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 20
21 #include <linux/init.h> 21 #include <linux/init.h>
22 #include <linux/module.h> 22 #include <linux/module.h>
23 #include <linux/if_arp.h> 23 #include <linux/if_arp.h>
24 #include <linux/if_vlan.h> 24 #include <linux/if_vlan.h>
25 #include <linux/in.h> 25 #include <linux/in.h>
26 #include <linux/ip.h> 26 #include <linux/ip.h>
27 #include <linux/jhash.h> 27 #include <linux/jhash.h>
28 #include <linux/delay.h> 28 #include <linux/delay.h>
29 #include <linux/time.h> 29 #include <linux/time.h>
30 #include <linux/etherdevice.h> 30 #include <linux/etherdevice.h>
31 #include <linux/genetlink.h> 31 #include <linux/genetlink.h>
32 #include <linux/kernel.h> 32 #include <linux/kernel.h>
33 #include <linux/kthread.h> 33 #include <linux/kthread.h>
34 #include <linux/mutex.h> 34 #include <linux/mutex.h>
35 #include <linux/percpu.h> 35 #include <linux/percpu.h>
36 #include <linux/rcupdate.h> 36 #include <linux/rcupdate.h>
37 #include <linux/tcp.h> 37 #include <linux/tcp.h>
38 #include <linux/udp.h> 38 #include <linux/udp.h>
39 #include <linux/ethtool.h> 39 #include <linux/ethtool.h>
40 #include <linux/wait.h> 40 #include <linux/wait.h>
41 #include <asm/div64.h> 41 #include <asm/div64.h>
42 #include <linux/highmem.h> 42 #include <linux/highmem.h>
43 #include <linux/netfilter_bridge.h> 43 #include <linux/netfilter_bridge.h>
44 #include <linux/netfilter_ipv4.h> 44 #include <linux/netfilter_ipv4.h>
45 #include <linux/inetdevice.h> 45 #include <linux/inetdevice.h>
46 #include <linux/list.h> 46 #include <linux/list.h>
47 #include <linux/openvswitch.h> 47 #include <linux/openvswitch.h>
48 #include <linux/rculist.h> 48 #include <linux/rculist.h>
49 #include <linux/dmi.h> 49 #include <linux/dmi.h>
50 #include <linux/workqueue.h> 50 #include <linux/workqueue.h>
51 #include <net/genetlink.h> 51 #include <net/genetlink.h>
52 #include <net/net_namespace.h> 52 #include <net/net_namespace.h>
53 #include <net/netns/generic.h> 53 #include <net/netns/generic.h>
54 54
55 #include "datapath.h" 55 #include "datapath.h"
56 #include "flow.h" 56 #include "flow.h"
57 #include "vport-internal_dev.h" 57 #include "vport-internal_dev.h"
58 58
59 /** 59 /**
60 * struct ovs_net - Per net-namespace data for ovs. 60 * struct ovs_net - Per net-namespace data for ovs.
61 * @dps: List of datapaths to enable dumping them all out. 61 * @dps: List of datapaths to enable dumping them all out.
62 * Protected by genl_mutex. 62 * Protected by genl_mutex.
63 */ 63 */
64 struct ovs_net { 64 struct ovs_net {
65 struct list_head dps; 65 struct list_head dps;
66 }; 66 };
67 67
68 static int ovs_net_id __read_mostly; 68 static int ovs_net_id __read_mostly;
69 69
70 #define REHASH_FLOW_INTERVAL (10 * 60 * HZ) 70 #define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
71 static void rehash_flow_table(struct work_struct *work); 71 static void rehash_flow_table(struct work_struct *work);
72 static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); 72 static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
73 73
74 /** 74 /**
75 * DOC: Locking: 75 * DOC: Locking:
76 * 76 *
77 * Writes to device state (add/remove datapath, port, set operations on vports, 77 * Writes to device state (add/remove datapath, port, set operations on vports,
78 * etc.) are protected by RTNL. 78 * etc.) are protected by RTNL.
79 * 79 *
80 * Writes to other state (flow table modifications, set miscellaneous datapath 80 * Writes to other state (flow table modifications, set miscellaneous datapath
81 * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside 81 * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
82 * genl_mutex. 82 * genl_mutex.
83 * 83 *
84 * Reads are protected by RCU. 84 * Reads are protected by RCU.
85 * 85 *
86 * There are a few special cases (mostly stats) that have their own 86 * There are a few special cases (mostly stats) that have their own
87 * synchronization but they nest under all of above and don't interact with 87 * synchronization but they nest under all of above and don't interact with
88 * each other. 88 * each other.
89 */ 89 */
90 90
91 static struct vport *new_vport(const struct vport_parms *); 91 static struct vport *new_vport(const struct vport_parms *);
92 static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, 92 static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
93 const struct dp_upcall_info *); 93 const struct dp_upcall_info *);
94 static int queue_userspace_packet(struct net *, int dp_ifindex, 94 static int queue_userspace_packet(struct net *, int dp_ifindex,
95 struct sk_buff *, 95 struct sk_buff *,
96 const struct dp_upcall_info *); 96 const struct dp_upcall_info *);
97 97
98 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ 98 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
99 static struct datapath *get_dp(struct net *net, int dp_ifindex) 99 static struct datapath *get_dp(struct net *net, int dp_ifindex)
100 { 100 {
101 struct datapath *dp = NULL; 101 struct datapath *dp = NULL;
102 struct net_device *dev; 102 struct net_device *dev;
103 103
104 rcu_read_lock(); 104 rcu_read_lock();
105 dev = dev_get_by_index_rcu(net, dp_ifindex); 105 dev = dev_get_by_index_rcu(net, dp_ifindex);
106 if (dev) { 106 if (dev) {
107 struct vport *vport = ovs_internal_dev_get_vport(dev); 107 struct vport *vport = ovs_internal_dev_get_vport(dev);
108 if (vport) 108 if (vport)
109 dp = vport->dp; 109 dp = vport->dp;
110 } 110 }
111 rcu_read_unlock(); 111 rcu_read_unlock();
112 112
113 return dp; 113 return dp;
114 } 114 }
115 115
116 /* Must be called with rcu_read_lock or RTNL lock. */ 116 /* Must be called with rcu_read_lock or RTNL lock. */
117 const char *ovs_dp_name(const struct datapath *dp) 117 const char *ovs_dp_name(const struct datapath *dp)
118 { 118 {
119 struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL); 119 struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL);
120 return vport->ops->get_name(vport); 120 return vport->ops->get_name(vport);
121 } 121 }
122 122
123 static int get_dpifindex(struct datapath *dp) 123 static int get_dpifindex(struct datapath *dp)
124 { 124 {
125 struct vport *local; 125 struct vport *local;
126 int ifindex; 126 int ifindex;
127 127
128 rcu_read_lock(); 128 rcu_read_lock();
129 129
130 local = ovs_vport_rcu(dp, OVSP_LOCAL); 130 local = ovs_vport_rcu(dp, OVSP_LOCAL);
131 if (local) 131 if (local)
132 ifindex = local->ops->get_ifindex(local); 132 ifindex = local->ops->get_ifindex(local);
133 else 133 else
134 ifindex = 0; 134 ifindex = 0;
135 135
136 rcu_read_unlock(); 136 rcu_read_unlock();
137 137
138 return ifindex; 138 return ifindex;
139 } 139 }
140 140
141 static void destroy_dp_rcu(struct rcu_head *rcu) 141 static void destroy_dp_rcu(struct rcu_head *rcu)
142 { 142 {
143 struct datapath *dp = container_of(rcu, struct datapath, rcu); 143 struct datapath *dp = container_of(rcu, struct datapath, rcu);
144 144
145 ovs_flow_tbl_destroy((__force struct flow_table *)dp->table); 145 ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
146 free_percpu(dp->stats_percpu); 146 free_percpu(dp->stats_percpu);
147 release_net(ovs_dp_get_net(dp)); 147 release_net(ovs_dp_get_net(dp));
148 kfree(dp->ports); 148 kfree(dp->ports);
149 kfree(dp); 149 kfree(dp);
150 } 150 }
151 151
152 static struct hlist_head *vport_hash_bucket(const struct datapath *dp, 152 static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
153 u16 port_no) 153 u16 port_no)
154 { 154 {
155 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; 155 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
156 } 156 }
157 157
158 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) 158 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
159 { 159 {
160 struct vport *vport; 160 struct vport *vport;
161 struct hlist_node *n; 161 struct hlist_node *n;
162 struct hlist_head *head; 162 struct hlist_head *head;
163 163
164 head = vport_hash_bucket(dp, port_no); 164 head = vport_hash_bucket(dp, port_no);
165 hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) { 165 hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) {
166 if (vport->port_no == port_no) 166 if (vport->port_no == port_no)
167 return vport; 167 return vport;
168 } 168 }
169 return NULL; 169 return NULL;
170 } 170 }
171 171
172 /* Called with RTNL lock and genl_lock. */ 172 /* Called with RTNL lock and genl_lock. */
173 static struct vport *new_vport(const struct vport_parms *parms) 173 static struct vport *new_vport(const struct vport_parms *parms)
174 { 174 {
175 struct vport *vport; 175 struct vport *vport;
176 176
177 vport = ovs_vport_add(parms); 177 vport = ovs_vport_add(parms);
178 if (!IS_ERR(vport)) { 178 if (!IS_ERR(vport)) {
179 struct datapath *dp = parms->dp; 179 struct datapath *dp = parms->dp;
180 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); 180 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
181 181
182 hlist_add_head_rcu(&vport->dp_hash_node, head); 182 hlist_add_head_rcu(&vport->dp_hash_node, head);
183 } 183 }
184 184
185 return vport; 185 return vport;
186 } 186 }
187 187
188 /* Called with RTNL lock. */ 188 /* Called with RTNL lock. */
189 void ovs_dp_detach_port(struct vport *p) 189 void ovs_dp_detach_port(struct vport *p)
190 { 190 {
191 ASSERT_RTNL(); 191 ASSERT_RTNL();
192 192
193 /* First drop references to device. */ 193 /* First drop references to device. */
194 hlist_del_rcu(&p->dp_hash_node); 194 hlist_del_rcu(&p->dp_hash_node);
195 195
196 /* Then destroy it. */ 196 /* Then destroy it. */
197 ovs_vport_del(p); 197 ovs_vport_del(p);
198 } 198 }
199 199
200 /* Must be called with rcu_read_lock. */ 200 /* Must be called with rcu_read_lock. */
201 void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) 201 void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
202 { 202 {
203 struct datapath *dp = p->dp; 203 struct datapath *dp = p->dp;
204 struct sw_flow *flow; 204 struct sw_flow *flow;
205 struct dp_stats_percpu *stats; 205 struct dp_stats_percpu *stats;
206 struct sw_flow_key key; 206 struct sw_flow_key key;
207 u64 *stats_counter; 207 u64 *stats_counter;
208 int error; 208 int error;
209 int key_len; 209 int key_len;
210 210
211 stats = this_cpu_ptr(dp->stats_percpu); 211 stats = this_cpu_ptr(dp->stats_percpu);
212 212
213 /* Extract flow from 'skb' into 'key'. */ 213 /* Extract flow from 'skb' into 'key'. */
214 error = ovs_flow_extract(skb, p->port_no, &key, &key_len); 214 error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
215 if (unlikely(error)) { 215 if (unlikely(error)) {
216 kfree_skb(skb); 216 kfree_skb(skb);
217 return; 217 return;
218 } 218 }
219 219
220 /* Look up flow. */ 220 /* Look up flow. */
221 flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len); 221 flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
222 if (unlikely(!flow)) { 222 if (unlikely(!flow)) {
223 struct dp_upcall_info upcall; 223 struct dp_upcall_info upcall;
224 224
225 upcall.cmd = OVS_PACKET_CMD_MISS; 225 upcall.cmd = OVS_PACKET_CMD_MISS;
226 upcall.key = &key; 226 upcall.key = &key;
227 upcall.userdata = NULL; 227 upcall.userdata = NULL;
228 upcall.portid = p->upcall_portid; 228 upcall.portid = p->upcall_portid;
229 ovs_dp_upcall(dp, skb, &upcall); 229 ovs_dp_upcall(dp, skb, &upcall);
230 consume_skb(skb); 230 consume_skb(skb);
231 stats_counter = &stats->n_missed; 231 stats_counter = &stats->n_missed;
232 goto out; 232 goto out;
233 } 233 }
234 234
235 OVS_CB(skb)->flow = flow; 235 OVS_CB(skb)->flow = flow;
236 236
237 stats_counter = &stats->n_hit; 237 stats_counter = &stats->n_hit;
238 ovs_flow_used(OVS_CB(skb)->flow, skb); 238 ovs_flow_used(OVS_CB(skb)->flow, skb);
239 ovs_execute_actions(dp, skb); 239 ovs_execute_actions(dp, skb);
240 240
241 out: 241 out:
242 /* Update datapath statistics. */ 242 /* Update datapath statistics. */
243 u64_stats_update_begin(&stats->sync); 243 u64_stats_update_begin(&stats->sync);
244 (*stats_counter)++; 244 (*stats_counter)++;
245 u64_stats_update_end(&stats->sync); 245 u64_stats_update_end(&stats->sync);
246 } 246 }
247 247
248 static struct genl_family dp_packet_genl_family = { 248 static struct genl_family dp_packet_genl_family = {
249 .id = GENL_ID_GENERATE, 249 .id = GENL_ID_GENERATE,
250 .hdrsize = sizeof(struct ovs_header), 250 .hdrsize = sizeof(struct ovs_header),
251 .name = OVS_PACKET_FAMILY, 251 .name = OVS_PACKET_FAMILY,
252 .version = OVS_PACKET_VERSION, 252 .version = OVS_PACKET_VERSION,
253 .maxattr = OVS_PACKET_ATTR_MAX, 253 .maxattr = OVS_PACKET_ATTR_MAX,
254 .netnsok = true 254 .netnsok = true
255 }; 255 };
256 256
257 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, 257 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
258 const struct dp_upcall_info *upcall_info) 258 const struct dp_upcall_info *upcall_info)
259 { 259 {
260 struct dp_stats_percpu *stats; 260 struct dp_stats_percpu *stats;
261 int dp_ifindex; 261 int dp_ifindex;
262 int err; 262 int err;
263 263
264 if (upcall_info->portid == 0) { 264 if (upcall_info->portid == 0) {
265 err = -ENOTCONN; 265 err = -ENOTCONN;
266 goto err; 266 goto err;
267 } 267 }
268 268
269 dp_ifindex = get_dpifindex(dp); 269 dp_ifindex = get_dpifindex(dp);
270 if (!dp_ifindex) { 270 if (!dp_ifindex) {
271 err = -ENODEV; 271 err = -ENODEV;
272 goto err; 272 goto err;
273 } 273 }
274 274
275 if (!skb_is_gso(skb)) 275 if (!skb_is_gso(skb))
276 err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); 276 err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
277 else 277 else
278 err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); 278 err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
279 if (err) 279 if (err)
280 goto err; 280 goto err;
281 281
282 return 0; 282 return 0;
283 283
284 err: 284 err:
285 stats = this_cpu_ptr(dp->stats_percpu); 285 stats = this_cpu_ptr(dp->stats_percpu);
286 286
287 u64_stats_update_begin(&stats->sync); 287 u64_stats_update_begin(&stats->sync);
288 stats->n_lost++; 288 stats->n_lost++;
289 u64_stats_update_end(&stats->sync); 289 u64_stats_update_end(&stats->sync);
290 290
291 return err; 291 return err;
292 } 292 }
293 293
294 static int queue_gso_packets(struct net *net, int dp_ifindex, 294 static int queue_gso_packets(struct net *net, int dp_ifindex,
295 struct sk_buff *skb, 295 struct sk_buff *skb,
296 const struct dp_upcall_info *upcall_info) 296 const struct dp_upcall_info *upcall_info)
297 { 297 {
298 unsigned short gso_type = skb_shinfo(skb)->gso_type; 298 unsigned short gso_type = skb_shinfo(skb)->gso_type;
299 struct dp_upcall_info later_info; 299 struct dp_upcall_info later_info;
300 struct sw_flow_key later_key; 300 struct sw_flow_key later_key;
301 struct sk_buff *segs, *nskb; 301 struct sk_buff *segs, *nskb;
302 int err; 302 int err;
303 303
304 segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); 304 segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false);
305 if (IS_ERR(segs)) 305 if (IS_ERR(segs))
306 return PTR_ERR(segs); 306 return PTR_ERR(segs);
307 307
308 /* Queue all of the segments. */ 308 /* Queue all of the segments. */
309 skb = segs; 309 skb = segs;
310 do { 310 do {
311 err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info); 311 err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
312 if (err) 312 if (err)
313 break; 313 break;
314 314
315 if (skb == segs && gso_type & SKB_GSO_UDP) { 315 if (skb == segs && gso_type & SKB_GSO_UDP) {
316 /* The initial flow key extracted by ovs_flow_extract() 316 /* The initial flow key extracted by ovs_flow_extract()
317 * in this case is for a first fragment, so we need to 317 * in this case is for a first fragment, so we need to
318 * properly mark later fragments. 318 * properly mark later fragments.
319 */ 319 */
320 later_key = *upcall_info->key; 320 later_key = *upcall_info->key;
321 later_key.ip.frag = OVS_FRAG_TYPE_LATER; 321 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
322 322
323 later_info = *upcall_info; 323 later_info = *upcall_info;
324 later_info.key = &later_key; 324 later_info.key = &later_key;
325 upcall_info = &later_info; 325 upcall_info = &later_info;
326 } 326 }
327 } while ((skb = skb->next)); 327 } while ((skb = skb->next));
328 328
329 /* Free all of the segments. */ 329 /* Free all of the segments. */
330 skb = segs; 330 skb = segs;
331 do { 331 do {
332 nskb = skb->next; 332 nskb = skb->next;
333 if (err) 333 if (err)
334 kfree_skb(skb); 334 kfree_skb(skb);
335 else 335 else
336 consume_skb(skb); 336 consume_skb(skb);
337 } while ((skb = nskb)); 337 } while ((skb = nskb));
338 return err; 338 return err;
339 } 339 }
340 340
341 static int queue_userspace_packet(struct net *net, int dp_ifindex, 341 static int queue_userspace_packet(struct net *net, int dp_ifindex,
342 struct sk_buff *skb, 342 struct sk_buff *skb,
343 const struct dp_upcall_info *upcall_info) 343 const struct dp_upcall_info *upcall_info)
344 { 344 {
345 struct ovs_header *upcall; 345 struct ovs_header *upcall;
346 struct sk_buff *nskb = NULL; 346 struct sk_buff *nskb = NULL;
347 struct sk_buff *user_skb; /* to be queued to userspace */ 347 struct sk_buff *user_skb; /* to be queued to userspace */
348 struct nlattr *nla; 348 struct nlattr *nla;
349 unsigned int len; 349 unsigned int len;
350 int err; 350 int err;
351 351
352 if (vlan_tx_tag_present(skb)) { 352 if (vlan_tx_tag_present(skb)) {
353 nskb = skb_clone(skb, GFP_ATOMIC); 353 nskb = skb_clone(skb, GFP_ATOMIC);
354 if (!nskb) 354 if (!nskb)
355 return -ENOMEM; 355 return -ENOMEM;
356 356
357 nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb)); 357 nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb));
358 if (!nskb) 358 if (!nskb)
359 return -ENOMEM; 359 return -ENOMEM;
360 360
361 nskb->vlan_tci = 0; 361 nskb->vlan_tci = 0;
362 skb = nskb; 362 skb = nskb;
363 } 363 }
364 364
365 if (nla_attr_size(skb->len) > USHRT_MAX) { 365 if (nla_attr_size(skb->len) > USHRT_MAX) {
366 err = -EFBIG; 366 err = -EFBIG;
367 goto out; 367 goto out;
368 } 368 }
369 369
370 len = sizeof(struct ovs_header); 370 len = sizeof(struct ovs_header);
371 len += nla_total_size(skb->len); 371 len += nla_total_size(skb->len);
372 len += nla_total_size(FLOW_BUFSIZE); 372 len += nla_total_size(FLOW_BUFSIZE);
373 if (upcall_info->cmd == OVS_PACKET_CMD_ACTION) 373 if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
374 len += nla_total_size(8); 374 len += nla_total_size(8);
375 375
376 user_skb = genlmsg_new(len, GFP_ATOMIC); 376 user_skb = genlmsg_new(len, GFP_ATOMIC);
377 if (!user_skb) { 377 if (!user_skb) {
378 err = -ENOMEM; 378 err = -ENOMEM;
379 goto out; 379 goto out;
380 } 380 }
381 381
382 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 382 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
383 0, upcall_info->cmd); 383 0, upcall_info->cmd);
384 upcall->dp_ifindex = dp_ifindex; 384 upcall->dp_ifindex = dp_ifindex;
385 385
386 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); 386 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
387 ovs_flow_to_nlattrs(upcall_info->key, user_skb); 387 ovs_flow_to_nlattrs(upcall_info->key, user_skb);
388 nla_nest_end(user_skb, nla); 388 nla_nest_end(user_skb, nla);
389 389
390 if (upcall_info->userdata) 390 if (upcall_info->userdata)
391 nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, 391 nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
392 nla_get_u64(upcall_info->userdata)); 392 nla_get_u64(upcall_info->userdata));
393 393
394 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); 394 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
395 395
396 skb_copy_and_csum_dev(skb, nla_data(nla)); 396 skb_copy_and_csum_dev(skb, nla_data(nla));
397 397
398 err = genlmsg_unicast(net, user_skb, upcall_info->portid); 398 err = genlmsg_unicast(net, user_skb, upcall_info->portid);
399 399
400 out: 400 out:
401 kfree_skb(nskb); 401 kfree_skb(nskb);
402 return err; 402 return err;
403 } 403 }
404 404
405 /* Called with genl_mutex. */ 405 /* Called with genl_mutex. */
406 static int flush_flows(struct datapath *dp) 406 static int flush_flows(struct datapath *dp)
407 { 407 {
408 struct flow_table *old_table; 408 struct flow_table *old_table;
409 struct flow_table *new_table; 409 struct flow_table *new_table;
410 410
411 old_table = genl_dereference(dp->table); 411 old_table = genl_dereference(dp->table);
412 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); 412 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
413 if (!new_table) 413 if (!new_table)
414 return -ENOMEM; 414 return -ENOMEM;
415 415
416 rcu_assign_pointer(dp->table, new_table); 416 rcu_assign_pointer(dp->table, new_table);
417 417
418 ovs_flow_tbl_deferred_destroy(old_table); 418 ovs_flow_tbl_deferred_destroy(old_table);
419 return 0; 419 return 0;
420 } 420 }
421 421
422 static int validate_actions(const struct nlattr *attr, 422 static int validate_actions(const struct nlattr *attr,
423 const struct sw_flow_key *key, int depth); 423 const struct sw_flow_key *key, int depth);
424 424
425 static int validate_sample(const struct nlattr *attr, 425 static int validate_sample(const struct nlattr *attr,
426 const struct sw_flow_key *key, int depth) 426 const struct sw_flow_key *key, int depth)
427 { 427 {
428 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 428 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
429 const struct nlattr *probability, *actions; 429 const struct nlattr *probability, *actions;
430 const struct nlattr *a; 430 const struct nlattr *a;
431 int rem; 431 int rem;
432 432
433 memset(attrs, 0, sizeof(attrs)); 433 memset(attrs, 0, sizeof(attrs));
434 nla_for_each_nested(a, attr, rem) { 434 nla_for_each_nested(a, attr, rem) {
435 int type = nla_type(a); 435 int type = nla_type(a);
436 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) 436 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
437 return -EINVAL; 437 return -EINVAL;
438 attrs[type] = a; 438 attrs[type] = a;
439 } 439 }
440 if (rem) 440 if (rem)
441 return -EINVAL; 441 return -EINVAL;
442 442
443 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; 443 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
444 if (!probability || nla_len(probability) != sizeof(u32)) 444 if (!probability || nla_len(probability) != sizeof(u32))
445 return -EINVAL; 445 return -EINVAL;
446 446
447 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; 447 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
448 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) 448 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
449 return -EINVAL; 449 return -EINVAL;
450 return validate_actions(actions, key, depth + 1); 450 return validate_actions(actions, key, depth + 1);
451 } 451 }
452 452
453 static int validate_tp_port(const struct sw_flow_key *flow_key) 453 static int validate_tp_port(const struct sw_flow_key *flow_key)
454 { 454 {
455 if (flow_key->eth.type == htons(ETH_P_IP)) { 455 if (flow_key->eth.type == htons(ETH_P_IP)) {
456 if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) 456 if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
457 return 0; 457 return 0;
458 } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { 458 } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
459 if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) 459 if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
460 return 0; 460 return 0;
461 } 461 }
462 462
463 return -EINVAL; 463 return -EINVAL;
464 } 464 }
465 465
466 static int validate_set(const struct nlattr *a, 466 static int validate_set(const struct nlattr *a,
467 const struct sw_flow_key *flow_key) 467 const struct sw_flow_key *flow_key)
468 { 468 {
469 const struct nlattr *ovs_key = nla_data(a); 469 const struct nlattr *ovs_key = nla_data(a);
470 int key_type = nla_type(ovs_key); 470 int key_type = nla_type(ovs_key);
471 471
472 /* There can be only one key in a action */ 472 /* There can be only one key in a action */
473 if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) 473 if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
474 return -EINVAL; 474 return -EINVAL;
475 475
476 if (key_type > OVS_KEY_ATTR_MAX || 476 if (key_type > OVS_KEY_ATTR_MAX ||
477 nla_len(ovs_key) != ovs_key_lens[key_type]) 477 nla_len(ovs_key) != ovs_key_lens[key_type])
478 return -EINVAL; 478 return -EINVAL;
479 479
480 switch (key_type) { 480 switch (key_type) {
481 const struct ovs_key_ipv4 *ipv4_key; 481 const struct ovs_key_ipv4 *ipv4_key;
482 const struct ovs_key_ipv6 *ipv6_key; 482 const struct ovs_key_ipv6 *ipv6_key;
483 483
484 case OVS_KEY_ATTR_PRIORITY: 484 case OVS_KEY_ATTR_PRIORITY:
485 case OVS_KEY_ATTR_SKB_MARK: 485 case OVS_KEY_ATTR_SKB_MARK:
486 case OVS_KEY_ATTR_ETHERNET: 486 case OVS_KEY_ATTR_ETHERNET:
487 break; 487 break;
488 488
489 case OVS_KEY_ATTR_IPV4: 489 case OVS_KEY_ATTR_IPV4:
490 if (flow_key->eth.type != htons(ETH_P_IP)) 490 if (flow_key->eth.type != htons(ETH_P_IP))
491 return -EINVAL; 491 return -EINVAL;
492 492
493 if (!flow_key->ip.proto) 493 if (!flow_key->ip.proto)
494 return -EINVAL; 494 return -EINVAL;
495 495
496 ipv4_key = nla_data(ovs_key); 496 ipv4_key = nla_data(ovs_key);
497 if (ipv4_key->ipv4_proto != flow_key->ip.proto) 497 if (ipv4_key->ipv4_proto != flow_key->ip.proto)
498 return -EINVAL; 498 return -EINVAL;
499 499
500 if (ipv4_key->ipv4_frag != flow_key->ip.frag) 500 if (ipv4_key->ipv4_frag != flow_key->ip.frag)
501 return -EINVAL; 501 return -EINVAL;
502 502
503 break; 503 break;
504 504
505 case OVS_KEY_ATTR_IPV6: 505 case OVS_KEY_ATTR_IPV6:
506 if (flow_key->eth.type != htons(ETH_P_IPV6)) 506 if (flow_key->eth.type != htons(ETH_P_IPV6))
507 return -EINVAL; 507 return -EINVAL;
508 508
509 if (!flow_key->ip.proto) 509 if (!flow_key->ip.proto)
510 return -EINVAL; 510 return -EINVAL;
511 511
512 ipv6_key = nla_data(ovs_key); 512 ipv6_key = nla_data(ovs_key);
513 if (ipv6_key->ipv6_proto != flow_key->ip.proto) 513 if (ipv6_key->ipv6_proto != flow_key->ip.proto)
514 return -EINVAL; 514 return -EINVAL;
515 515
516 if (ipv6_key->ipv6_frag != flow_key->ip.frag) 516 if (ipv6_key->ipv6_frag != flow_key->ip.frag)
517 return -EINVAL; 517 return -EINVAL;
518 518
519 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) 519 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
520 return -EINVAL; 520 return -EINVAL;
521 521
522 break; 522 break;
523 523
524 case OVS_KEY_ATTR_TCP: 524 case OVS_KEY_ATTR_TCP:
525 if (flow_key->ip.proto != IPPROTO_TCP) 525 if (flow_key->ip.proto != IPPROTO_TCP)
526 return -EINVAL; 526 return -EINVAL;
527 527
528 return validate_tp_port(flow_key); 528 return validate_tp_port(flow_key);
529 529
530 case OVS_KEY_ATTR_UDP: 530 case OVS_KEY_ATTR_UDP:
531 if (flow_key->ip.proto != IPPROTO_UDP) 531 if (flow_key->ip.proto != IPPROTO_UDP)
532 return -EINVAL; 532 return -EINVAL;
533 533
534 return validate_tp_port(flow_key); 534 return validate_tp_port(flow_key);
535 535
536 default: 536 default:
537 return -EINVAL; 537 return -EINVAL;
538 } 538 }
539 539
540 return 0; 540 return 0;
541 } 541 }
542 542
543 static int validate_userspace(const struct nlattr *attr) 543 static int validate_userspace(const struct nlattr *attr)
544 { 544 {
545 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 545 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
546 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 546 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
547 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 }, 547 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
548 }; 548 };
549 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 549 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
550 int error; 550 int error;
551 551
552 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, 552 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
553 attr, userspace_policy); 553 attr, userspace_policy);
554 if (error) 554 if (error)
555 return error; 555 return error;
556 556
557 if (!a[OVS_USERSPACE_ATTR_PID] || 557 if (!a[OVS_USERSPACE_ATTR_PID] ||
558 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) 558 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
559 return -EINVAL; 559 return -EINVAL;
560 560
561 return 0; 561 return 0;
562 } 562 }
563 563
564 static int validate_actions(const struct nlattr *attr, 564 static int validate_actions(const struct nlattr *attr,
565 const struct sw_flow_key *key, int depth) 565 const struct sw_flow_key *key, int depth)
566 { 566 {
567 const struct nlattr *a; 567 const struct nlattr *a;
568 int rem, err; 568 int rem, err;
569 569
570 if (depth >= SAMPLE_ACTION_DEPTH) 570 if (depth >= SAMPLE_ACTION_DEPTH)
571 return -EOVERFLOW; 571 return -EOVERFLOW;
572 572
573 nla_for_each_nested(a, attr, rem) { 573 nla_for_each_nested(a, attr, rem) {
574 /* Expected argument lengths, (u32)-1 for variable length. */ 574 /* Expected argument lengths, (u32)-1 for variable length. */
575 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { 575 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
576 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), 576 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
577 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, 577 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
578 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 578 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
579 [OVS_ACTION_ATTR_POP_VLAN] = 0, 579 [OVS_ACTION_ATTR_POP_VLAN] = 0,
580 [OVS_ACTION_ATTR_SET] = (u32)-1, 580 [OVS_ACTION_ATTR_SET] = (u32)-1,
581 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 581 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
582 }; 582 };
583 const struct ovs_action_push_vlan *vlan; 583 const struct ovs_action_push_vlan *vlan;
584 int type = nla_type(a); 584 int type = nla_type(a);
585 585
586 if (type > OVS_ACTION_ATTR_MAX || 586 if (type > OVS_ACTION_ATTR_MAX ||
587 (action_lens[type] != nla_len(a) && 587 (action_lens[type] != nla_len(a) &&
588 action_lens[type] != (u32)-1)) 588 action_lens[type] != (u32)-1))
589 return -EINVAL; 589 return -EINVAL;
590 590
591 switch (type) { 591 switch (type) {
592 case OVS_ACTION_ATTR_UNSPEC: 592 case OVS_ACTION_ATTR_UNSPEC:
593 return -EINVAL; 593 return -EINVAL;
594 594
595 case OVS_ACTION_ATTR_USERSPACE: 595 case OVS_ACTION_ATTR_USERSPACE:
596 err = validate_userspace(a); 596 err = validate_userspace(a);
597 if (err) 597 if (err)
598 return err; 598 return err;
599 break; 599 break;
600 600
601 case OVS_ACTION_ATTR_OUTPUT: 601 case OVS_ACTION_ATTR_OUTPUT:
602 if (nla_get_u32(a) >= DP_MAX_PORTS) 602 if (nla_get_u32(a) >= DP_MAX_PORTS)
603 return -EINVAL; 603 return -EINVAL;
604 break; 604 break;
605 605
606 606
607 case OVS_ACTION_ATTR_POP_VLAN: 607 case OVS_ACTION_ATTR_POP_VLAN:
608 break; 608 break;
609 609
610 case OVS_ACTION_ATTR_PUSH_VLAN: 610 case OVS_ACTION_ATTR_PUSH_VLAN:
611 vlan = nla_data(a); 611 vlan = nla_data(a);
612 if (vlan->vlan_tpid != htons(ETH_P_8021Q)) 612 if (vlan->vlan_tpid != htons(ETH_P_8021Q))
613 return -EINVAL; 613 return -EINVAL;
614 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) 614 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
615 return -EINVAL; 615 return -EINVAL;
616 break; 616 break;
617 617
618 case OVS_ACTION_ATTR_SET: 618 case OVS_ACTION_ATTR_SET:
619 err = validate_set(a, key); 619 err = validate_set(a, key);
620 if (err) 620 if (err)
621 return err; 621 return err;
622 break; 622 break;
623 623
624 case OVS_ACTION_ATTR_SAMPLE: 624 case OVS_ACTION_ATTR_SAMPLE:
625 err = validate_sample(a, key, depth); 625 err = validate_sample(a, key, depth);
626 if (err) 626 if (err)
627 return err; 627 return err;
628 break; 628 break;
629 629
630 default: 630 default:
631 return -EINVAL; 631 return -EINVAL;
632 } 632 }
633 } 633 }
634 634
635 if (rem > 0) 635 if (rem > 0)
636 return -EINVAL; 636 return -EINVAL;
637 637
638 return 0; 638 return 0;
639 } 639 }
640 640
641 static void clear_stats(struct sw_flow *flow) 641 static void clear_stats(struct sw_flow *flow)
642 { 642 {
643 flow->used = 0; 643 flow->used = 0;
644 flow->tcp_flags = 0; 644 flow->tcp_flags = 0;
645 flow->packet_count = 0; 645 flow->packet_count = 0;
646 flow->byte_count = 0; 646 flow->byte_count = 0;
647 } 647 }
648 648
649 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) 649 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
650 { 650 {
651 struct ovs_header *ovs_header = info->userhdr; 651 struct ovs_header *ovs_header = info->userhdr;
652 struct nlattr **a = info->attrs; 652 struct nlattr **a = info->attrs;
653 struct sw_flow_actions *acts; 653 struct sw_flow_actions *acts;
654 struct sk_buff *packet; 654 struct sk_buff *packet;
655 struct sw_flow *flow; 655 struct sw_flow *flow;
656 struct datapath *dp; 656 struct datapath *dp;
657 struct ethhdr *eth; 657 struct ethhdr *eth;
658 int len; 658 int len;
659 int err; 659 int err;
660 int key_len; 660 int key_len;
661 661
662 err = -EINVAL; 662 err = -EINVAL;
663 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 663 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
664 !a[OVS_PACKET_ATTR_ACTIONS] || 664 !a[OVS_PACKET_ATTR_ACTIONS] ||
665 nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN) 665 nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
666 goto err; 666 goto err;
667 667
668 len = nla_len(a[OVS_PACKET_ATTR_PACKET]); 668 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
669 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL); 669 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
670 err = -ENOMEM; 670 err = -ENOMEM;
671 if (!packet) 671 if (!packet)
672 goto err; 672 goto err;
673 skb_reserve(packet, NET_IP_ALIGN); 673 skb_reserve(packet, NET_IP_ALIGN);
674 674
675 memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len); 675 memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
676 676
677 skb_reset_mac_header(packet); 677 skb_reset_mac_header(packet);
678 eth = eth_hdr(packet); 678 eth = eth_hdr(packet);
679 679
680 /* Normally, setting the skb 'protocol' field would be handled by a 680 /* Normally, setting the skb 'protocol' field would be handled by a
681 * call to eth_type_trans(), but it assumes there's a sending 681 * call to eth_type_trans(), but it assumes there's a sending
682 * device, which we may not have. */ 682 * device, which we may not have. */
683 if (ntohs(eth->h_proto) >= 1536) 683 if (ntohs(eth->h_proto) >= 1536)
684 packet->protocol = eth->h_proto; 684 packet->protocol = eth->h_proto;
685 else 685 else
686 packet->protocol = htons(ETH_P_802_2); 686 packet->protocol = htons(ETH_P_802_2);
687 687
688 /* Build an sw_flow for sending this packet. */ 688 /* Build an sw_flow for sending this packet. */
689 flow = ovs_flow_alloc(); 689 flow = ovs_flow_alloc();
690 err = PTR_ERR(flow); 690 err = PTR_ERR(flow);
691 if (IS_ERR(flow)) 691 if (IS_ERR(flow))
692 goto err_kfree_skb; 692 goto err_kfree_skb;
693 693
694 err = ovs_flow_extract(packet, -1, &flow->key, &key_len); 694 err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
695 if (err) 695 if (err)
696 goto err_flow_free; 696 goto err_flow_free;
697 697
698 err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, 698 err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
699 &flow->key.phy.skb_mark, 699 &flow->key.phy.skb_mark,
700 &flow->key.phy.in_port, 700 &flow->key.phy.in_port,
701 a[OVS_PACKET_ATTR_KEY]); 701 a[OVS_PACKET_ATTR_KEY]);
702 if (err) 702 if (err)
703 goto err_flow_free; 703 goto err_flow_free;
704 704
705 err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0); 705 err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0);
706 if (err) 706 if (err)
707 goto err_flow_free; 707 goto err_flow_free;
708 708
709 flow->hash = ovs_flow_hash(&flow->key, key_len); 709 flow->hash = ovs_flow_hash(&flow->key, key_len);
710 710
711 acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]); 711 acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
712 err = PTR_ERR(acts); 712 err = PTR_ERR(acts);
713 if (IS_ERR(acts)) 713 if (IS_ERR(acts))
714 goto err_flow_free; 714 goto err_flow_free;
715 rcu_assign_pointer(flow->sf_acts, acts); 715 rcu_assign_pointer(flow->sf_acts, acts);
716 716
717 OVS_CB(packet)->flow = flow; 717 OVS_CB(packet)->flow = flow;
718 packet->priority = flow->key.phy.priority; 718 packet->priority = flow->key.phy.priority;
719 packet->mark = flow->key.phy.skb_mark; 719 packet->mark = flow->key.phy.skb_mark;
720 720
721 rcu_read_lock(); 721 rcu_read_lock();
722 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 722 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
723 err = -ENODEV; 723 err = -ENODEV;
724 if (!dp) 724 if (!dp)
725 goto err_unlock; 725 goto err_unlock;
726 726
727 local_bh_disable(); 727 local_bh_disable();
728 err = ovs_execute_actions(dp, packet); 728 err = ovs_execute_actions(dp, packet);
729 local_bh_enable(); 729 local_bh_enable();
730 rcu_read_unlock(); 730 rcu_read_unlock();
731 731
732 ovs_flow_free(flow); 732 ovs_flow_free(flow);
733 return err; 733 return err;
734 734
735 err_unlock: 735 err_unlock:
736 rcu_read_unlock(); 736 rcu_read_unlock();
737 err_flow_free: 737 err_flow_free:
738 ovs_flow_free(flow); 738 ovs_flow_free(flow);
739 err_kfree_skb: 739 err_kfree_skb:
740 kfree_skb(packet); 740 kfree_skb(packet);
741 err: 741 err:
742 return err; 742 return err;
743 } 743 }
744 744
745 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { 745 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
746 [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC }, 746 [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
747 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, 747 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
748 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, 748 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
749 }; 749 };
750 750
751 static struct genl_ops dp_packet_genl_ops[] = { 751 static struct genl_ops dp_packet_genl_ops[] = {
752 { .cmd = OVS_PACKET_CMD_EXECUTE, 752 { .cmd = OVS_PACKET_CMD_EXECUTE,
753 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 753 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
754 .policy = packet_policy, 754 .policy = packet_policy,
755 .doit = ovs_packet_cmd_execute 755 .doit = ovs_packet_cmd_execute
756 } 756 }
757 }; 757 };
758 758
759 static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) 759 static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
760 { 760 {
761 int i; 761 int i;
762 struct flow_table *table = genl_dereference(dp->table); 762 struct flow_table *table = genl_dereference(dp->table);
763 763
764 stats->n_flows = ovs_flow_tbl_count(table); 764 stats->n_flows = ovs_flow_tbl_count(table);
765 765
766 stats->n_hit = stats->n_missed = stats->n_lost = 0; 766 stats->n_hit = stats->n_missed = stats->n_lost = 0;
767 for_each_possible_cpu(i) { 767 for_each_possible_cpu(i) {
768 const struct dp_stats_percpu *percpu_stats; 768 const struct dp_stats_percpu *percpu_stats;
769 struct dp_stats_percpu local_stats; 769 struct dp_stats_percpu local_stats;
770 unsigned int start; 770 unsigned int start;
771 771
772 percpu_stats = per_cpu_ptr(dp->stats_percpu, i); 772 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
773 773
774 do { 774 do {
775 start = u64_stats_fetch_begin_bh(&percpu_stats->sync); 775 start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
776 local_stats = *percpu_stats; 776 local_stats = *percpu_stats;
777 } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start)); 777 } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
778 778
779 stats->n_hit += local_stats.n_hit; 779 stats->n_hit += local_stats.n_hit;
780 stats->n_missed += local_stats.n_missed; 780 stats->n_missed += local_stats.n_missed;
781 stats->n_lost += local_stats.n_lost; 781 stats->n_lost += local_stats.n_lost;
782 } 782 }
783 } 783 }
784 784
785 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { 785 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
786 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, 786 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
787 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, 787 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
788 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, 788 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
789 }; 789 };
790 790
791 static struct genl_family dp_flow_genl_family = { 791 static struct genl_family dp_flow_genl_family = {
792 .id = GENL_ID_GENERATE, 792 .id = GENL_ID_GENERATE,
793 .hdrsize = sizeof(struct ovs_header), 793 .hdrsize = sizeof(struct ovs_header),
794 .name = OVS_FLOW_FAMILY, 794 .name = OVS_FLOW_FAMILY,
795 .version = OVS_FLOW_VERSION, 795 .version = OVS_FLOW_VERSION,
796 .maxattr = OVS_FLOW_ATTR_MAX, 796 .maxattr = OVS_FLOW_ATTR_MAX,
797 .netnsok = true 797 .netnsok = true
798 }; 798 };
799 799
800 static struct genl_multicast_group ovs_dp_flow_multicast_group = { 800 static struct genl_multicast_group ovs_dp_flow_multicast_group = {
801 .name = OVS_FLOW_MCGROUP 801 .name = OVS_FLOW_MCGROUP
802 }; 802 };
803 803
804 /* Called with genl_lock. */ 804 /* Called with genl_lock. */
805 static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, 805 static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
806 struct sk_buff *skb, u32 portid, 806 struct sk_buff *skb, u32 portid,
807 u32 seq, u32 flags, u8 cmd) 807 u32 seq, u32 flags, u8 cmd)
808 { 808 {
809 const int skb_orig_len = skb->len; 809 const int skb_orig_len = skb->len;
810 const struct sw_flow_actions *sf_acts; 810 const struct sw_flow_actions *sf_acts;
811 struct ovs_flow_stats stats; 811 struct ovs_flow_stats stats;
812 struct ovs_header *ovs_header; 812 struct ovs_header *ovs_header;
813 struct nlattr *nla; 813 struct nlattr *nla;
814 unsigned long used; 814 unsigned long used;
815 u8 tcp_flags; 815 u8 tcp_flags;
816 int err; 816 int err;
817 817
818 sf_acts = rcu_dereference_protected(flow->sf_acts, 818 sf_acts = rcu_dereference_protected(flow->sf_acts,
819 lockdep_genl_is_held()); 819 lockdep_genl_is_held());
820 820
821 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); 821 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
822 if (!ovs_header) 822 if (!ovs_header)
823 return -EMSGSIZE; 823 return -EMSGSIZE;
824 824
825 ovs_header->dp_ifindex = get_dpifindex(dp); 825 ovs_header->dp_ifindex = get_dpifindex(dp);
826 826
827 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); 827 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
828 if (!nla) 828 if (!nla)
829 goto nla_put_failure; 829 goto nla_put_failure;
830 err = ovs_flow_to_nlattrs(&flow->key, skb); 830 err = ovs_flow_to_nlattrs(&flow->key, skb);
831 if (err) 831 if (err)
832 goto error; 832 goto error;
833 nla_nest_end(skb, nla); 833 nla_nest_end(skb, nla);
834 834
835 spin_lock_bh(&flow->lock); 835 spin_lock_bh(&flow->lock);
836 used = flow->used; 836 used = flow->used;
837 stats.n_packets = flow->packet_count; 837 stats.n_packets = flow->packet_count;
838 stats.n_bytes = flow->byte_count; 838 stats.n_bytes = flow->byte_count;
839 tcp_flags = flow->tcp_flags; 839 tcp_flags = flow->tcp_flags;
840 spin_unlock_bh(&flow->lock); 840 spin_unlock_bh(&flow->lock);
841 841
842 if (used && 842 if (used &&
843 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) 843 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
844 goto nla_put_failure; 844 goto nla_put_failure;
845 845
846 if (stats.n_packets && 846 if (stats.n_packets &&
847 nla_put(skb, OVS_FLOW_ATTR_STATS, 847 nla_put(skb, OVS_FLOW_ATTR_STATS,
848 sizeof(struct ovs_flow_stats), &stats)) 848 sizeof(struct ovs_flow_stats), &stats))
849 goto nla_put_failure; 849 goto nla_put_failure;
850 850
851 if (tcp_flags && 851 if (tcp_flags &&
852 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags)) 852 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
853 goto nla_put_failure; 853 goto nla_put_failure;
854 854
855 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if 855 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
856 * this is the first flow to be dumped into 'skb'. This is unusual for 856 * this is the first flow to be dumped into 'skb'. This is unusual for
857 * Netlink but individual action lists can be longer than 857 * Netlink but individual action lists can be longer than
858 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this. 858 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
859 * The userspace caller can always fetch the actions separately if it 859 * The userspace caller can always fetch the actions separately if it
860 * really wants them. (Most userspace callers in fact don't care.) 860 * really wants them. (Most userspace callers in fact don't care.)
861 * 861 *
862 * This can only fail for dump operations because the skb is always 862 * This can only fail for dump operations because the skb is always
863 * properly sized for single flows. 863 * properly sized for single flows.
864 */ 864 */
865 err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len, 865 err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
866 sf_acts->actions); 866 sf_acts->actions);
867 if (err < 0 && skb_orig_len) 867 if (err < 0 && skb_orig_len)
868 goto error; 868 goto error;
869 869
870 return genlmsg_end(skb, ovs_header); 870 return genlmsg_end(skb, ovs_header);
871 871
872 nla_put_failure: 872 nla_put_failure:
873 err = -EMSGSIZE; 873 err = -EMSGSIZE;
874 error: 874 error:
875 genlmsg_cancel(skb, ovs_header); 875 genlmsg_cancel(skb, ovs_header);
876 return err; 876 return err;
877 } 877 }
878 878
879 static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) 879 static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
880 { 880 {
881 const struct sw_flow_actions *sf_acts; 881 const struct sw_flow_actions *sf_acts;
882 int len; 882 int len;
883 883
884 sf_acts = rcu_dereference_protected(flow->sf_acts, 884 sf_acts = rcu_dereference_protected(flow->sf_acts,
885 lockdep_genl_is_held()); 885 lockdep_genl_is_held());
886 886
887 /* OVS_FLOW_ATTR_KEY */ 887 /* OVS_FLOW_ATTR_KEY */
888 len = nla_total_size(FLOW_BUFSIZE); 888 len = nla_total_size(FLOW_BUFSIZE);
889 /* OVS_FLOW_ATTR_ACTIONS */ 889 /* OVS_FLOW_ATTR_ACTIONS */
890 len += nla_total_size(sf_acts->actions_len); 890 len += nla_total_size(sf_acts->actions_len);
891 /* OVS_FLOW_ATTR_STATS */ 891 /* OVS_FLOW_ATTR_STATS */
892 len += nla_total_size(sizeof(struct ovs_flow_stats)); 892 len += nla_total_size(sizeof(struct ovs_flow_stats));
893 /* OVS_FLOW_ATTR_TCP_FLAGS */ 893 /* OVS_FLOW_ATTR_TCP_FLAGS */
894 len += nla_total_size(1); 894 len += nla_total_size(1);
895 /* OVS_FLOW_ATTR_USED */ 895 /* OVS_FLOW_ATTR_USED */
896 len += nla_total_size(8); 896 len += nla_total_size(8);
897 897
898 len += NLMSG_ALIGN(sizeof(struct ovs_header)); 898 len += NLMSG_ALIGN(sizeof(struct ovs_header));
899 899
900 return genlmsg_new(len, GFP_KERNEL); 900 return genlmsg_new(len, GFP_KERNEL);
901 } 901 }
902 902
903 static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, 903 static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
904 struct datapath *dp, 904 struct datapath *dp,
905 u32 portid, u32 seq, u8 cmd) 905 u32 portid, u32 seq, u8 cmd)
906 { 906 {
907 struct sk_buff *skb; 907 struct sk_buff *skb;
908 int retval; 908 int retval;
909 909
910 skb = ovs_flow_cmd_alloc_info(flow); 910 skb = ovs_flow_cmd_alloc_info(flow);
911 if (!skb) 911 if (!skb)
912 return ERR_PTR(-ENOMEM); 912 return ERR_PTR(-ENOMEM);
913 913
914 retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); 914 retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd);
915 BUG_ON(retval < 0); 915 BUG_ON(retval < 0);
916 return skb; 916 return skb;
917 } 917 }
918 918
919 static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) 919 static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
920 { 920 {
921 struct nlattr **a = info->attrs; 921 struct nlattr **a = info->attrs;
922 struct ovs_header *ovs_header = info->userhdr; 922 struct ovs_header *ovs_header = info->userhdr;
923 struct sw_flow_key key; 923 struct sw_flow_key key;
924 struct sw_flow *flow; 924 struct sw_flow *flow;
925 struct sk_buff *reply; 925 struct sk_buff *reply;
926 struct datapath *dp; 926 struct datapath *dp;
927 struct flow_table *table; 927 struct flow_table *table;
928 int error; 928 int error;
929 int key_len; 929 int key_len;
930 930
931 /* Extract key. */ 931 /* Extract key. */
932 error = -EINVAL; 932 error = -EINVAL;
933 if (!a[OVS_FLOW_ATTR_KEY]) 933 if (!a[OVS_FLOW_ATTR_KEY])
934 goto error; 934 goto error;
935 error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 935 error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
936 if (error) 936 if (error)
937 goto error; 937 goto error;
938 938
939 /* Validate actions. */ 939 /* Validate actions. */
940 if (a[OVS_FLOW_ATTR_ACTIONS]) { 940 if (a[OVS_FLOW_ATTR_ACTIONS]) {
941 error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0); 941 error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0);
942 if (error) 942 if (error)
943 goto error; 943 goto error;
944 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { 944 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
945 error = -EINVAL; 945 error = -EINVAL;
946 goto error; 946 goto error;
947 } 947 }
948 948
949 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 949 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
950 error = -ENODEV; 950 error = -ENODEV;
951 if (!dp) 951 if (!dp)
952 goto error; 952 goto error;
953 953
954 table = genl_dereference(dp->table); 954 table = genl_dereference(dp->table);
955 flow = ovs_flow_tbl_lookup(table, &key, key_len); 955 flow = ovs_flow_tbl_lookup(table, &key, key_len);
956 if (!flow) { 956 if (!flow) {
957 struct sw_flow_actions *acts; 957 struct sw_flow_actions *acts;
958 958
959 /* Bail out if we're not allowed to create a new flow. */ 959 /* Bail out if we're not allowed to create a new flow. */
960 error = -ENOENT; 960 error = -ENOENT;
961 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) 961 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
962 goto error; 962 goto error;
963 963
964 /* Expand table, if necessary, to make room. */ 964 /* Expand table, if necessary, to make room. */
965 if (ovs_flow_tbl_need_to_expand(table)) { 965 if (ovs_flow_tbl_need_to_expand(table)) {
966 struct flow_table *new_table; 966 struct flow_table *new_table;
967 967
968 new_table = ovs_flow_tbl_expand(table); 968 new_table = ovs_flow_tbl_expand(table);
969 if (!IS_ERR(new_table)) { 969 if (!IS_ERR(new_table)) {
970 rcu_assign_pointer(dp->table, new_table); 970 rcu_assign_pointer(dp->table, new_table);
971 ovs_flow_tbl_deferred_destroy(table); 971 ovs_flow_tbl_deferred_destroy(table);
972 table = genl_dereference(dp->table); 972 table = genl_dereference(dp->table);
973 } 973 }
974 } 974 }
975 975
976 /* Allocate flow. */ 976 /* Allocate flow. */
977 flow = ovs_flow_alloc(); 977 flow = ovs_flow_alloc();
978 if (IS_ERR(flow)) { 978 if (IS_ERR(flow)) {
979 error = PTR_ERR(flow); 979 error = PTR_ERR(flow);
980 goto error; 980 goto error;
981 } 981 }
982 flow->key = key; 982 flow->key = key;
983 clear_stats(flow); 983 clear_stats(flow);
984 984
985 /* Obtain actions. */ 985 /* Obtain actions. */
986 acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]); 986 acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
987 error = PTR_ERR(acts); 987 error = PTR_ERR(acts);
988 if (IS_ERR(acts)) 988 if (IS_ERR(acts))
989 goto error_free_flow; 989 goto error_free_flow;
990 rcu_assign_pointer(flow->sf_acts, acts); 990 rcu_assign_pointer(flow->sf_acts, acts);
991 991
992 /* Put flow in bucket. */ 992 /* Put flow in bucket. */
993 flow->hash = ovs_flow_hash(&key, key_len); 993 flow->hash = ovs_flow_hash(&key, key_len);
994 ovs_flow_tbl_insert(table, flow); 994 ovs_flow_tbl_insert(table, flow);
995 995
996 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 996 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
997 info->snd_seq, 997 info->snd_seq,
998 OVS_FLOW_CMD_NEW); 998 OVS_FLOW_CMD_NEW);
999 } else { 999 } else {
1000 /* We found a matching flow. */ 1000 /* We found a matching flow. */
1001 struct sw_flow_actions *old_acts; 1001 struct sw_flow_actions *old_acts;
1002 struct nlattr *acts_attrs; 1002 struct nlattr *acts_attrs;
1003 1003
1004 /* Bail out if we're not allowed to modify an existing flow. 1004 /* Bail out if we're not allowed to modify an existing flow.
1005 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL 1005 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1006 * because Generic Netlink treats the latter as a dump 1006 * because Generic Netlink treats the latter as a dump
1007 * request. We also accept NLM_F_EXCL in case that bug ever 1007 * request. We also accept NLM_F_EXCL in case that bug ever
1008 * gets fixed. 1008 * gets fixed.
1009 */ 1009 */
1010 error = -EEXIST; 1010 error = -EEXIST;
1011 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && 1011 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1012 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) 1012 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1013 goto error; 1013 goto error;
1014 1014
1015 /* Update actions. */ 1015 /* Update actions. */
1016 old_acts = rcu_dereference_protected(flow->sf_acts, 1016 old_acts = rcu_dereference_protected(flow->sf_acts,
1017 lockdep_genl_is_held()); 1017 lockdep_genl_is_held());
1018 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; 1018 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
1019 if (acts_attrs && 1019 if (acts_attrs &&
1020 (old_acts->actions_len != nla_len(acts_attrs) || 1020 (old_acts->actions_len != nla_len(acts_attrs) ||
1021 memcmp(old_acts->actions, nla_data(acts_attrs), 1021 memcmp(old_acts->actions, nla_data(acts_attrs),
1022 old_acts->actions_len))) { 1022 old_acts->actions_len))) {
1023 struct sw_flow_actions *new_acts; 1023 struct sw_flow_actions *new_acts;
1024 1024
1025 new_acts = ovs_flow_actions_alloc(acts_attrs); 1025 new_acts = ovs_flow_actions_alloc(acts_attrs);
1026 error = PTR_ERR(new_acts); 1026 error = PTR_ERR(new_acts);
1027 if (IS_ERR(new_acts)) 1027 if (IS_ERR(new_acts))
1028 goto error; 1028 goto error;
1029 1029
1030 rcu_assign_pointer(flow->sf_acts, new_acts); 1030 rcu_assign_pointer(flow->sf_acts, new_acts);
1031 ovs_flow_deferred_free_acts(old_acts); 1031 ovs_flow_deferred_free_acts(old_acts);
1032 } 1032 }
1033 1033
1034 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 1034 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1035 info->snd_seq, OVS_FLOW_CMD_NEW); 1035 info->snd_seq, OVS_FLOW_CMD_NEW);
1036 1036
1037 /* Clear stats. */ 1037 /* Clear stats. */
1038 if (a[OVS_FLOW_ATTR_CLEAR]) { 1038 if (a[OVS_FLOW_ATTR_CLEAR]) {
1039 spin_lock_bh(&flow->lock); 1039 spin_lock_bh(&flow->lock);
1040 clear_stats(flow); 1040 clear_stats(flow);
1041 spin_unlock_bh(&flow->lock); 1041 spin_unlock_bh(&flow->lock);
1042 } 1042 }
1043 } 1043 }
1044 1044
1045 if (!IS_ERR(reply)) 1045 if (!IS_ERR(reply))
1046 genl_notify(reply, genl_info_net(info), info->snd_portid, 1046 genl_notify(reply, genl_info_net(info), info->snd_portid,
1047 ovs_dp_flow_multicast_group.id, info->nlhdr, 1047 ovs_dp_flow_multicast_group.id, info->nlhdr,
1048 GFP_KERNEL); 1048 GFP_KERNEL);
1049 else 1049 else
1050 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 1050 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1051 ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); 1051 ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
1052 return 0; 1052 return 0;
1053 1053
1054 error_free_flow: 1054 error_free_flow:
1055 ovs_flow_free(flow); 1055 ovs_flow_free(flow);
1056 error: 1056 error:
1057 return error; 1057 return error;
1058 } 1058 }
1059 1059
1060 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) 1060 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1061 { 1061 {
1062 struct nlattr **a = info->attrs; 1062 struct nlattr **a = info->attrs;
1063 struct ovs_header *ovs_header = info->userhdr; 1063 struct ovs_header *ovs_header = info->userhdr;
1064 struct sw_flow_key key; 1064 struct sw_flow_key key;
1065 struct sk_buff *reply; 1065 struct sk_buff *reply;
1066 struct sw_flow *flow; 1066 struct sw_flow *flow;
1067 struct datapath *dp; 1067 struct datapath *dp;
1068 struct flow_table *table; 1068 struct flow_table *table;
1069 int err; 1069 int err;
1070 int key_len; 1070 int key_len;
1071 1071
1072 if (!a[OVS_FLOW_ATTR_KEY]) 1072 if (!a[OVS_FLOW_ATTR_KEY])
1073 return -EINVAL; 1073 return -EINVAL;
1074 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1074 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1075 if (err) 1075 if (err)
1076 return err; 1076 return err;
1077 1077
1078 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1078 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1079 if (!dp) 1079 if (!dp)
1080 return -ENODEV; 1080 return -ENODEV;
1081 1081
1082 table = genl_dereference(dp->table); 1082 table = genl_dereference(dp->table);
1083 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1083 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1084 if (!flow) 1084 if (!flow)
1085 return -ENOENT; 1085 return -ENOENT;
1086 1086
1087 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 1087 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1088 info->snd_seq, OVS_FLOW_CMD_NEW); 1088 info->snd_seq, OVS_FLOW_CMD_NEW);
1089 if (IS_ERR(reply)) 1089 if (IS_ERR(reply))
1090 return PTR_ERR(reply); 1090 return PTR_ERR(reply);
1091 1091
1092 return genlmsg_reply(reply, info); 1092 return genlmsg_reply(reply, info);
1093 } 1093 }
1094 1094
1095 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) 1095 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1096 { 1096 {
1097 struct nlattr **a = info->attrs; 1097 struct nlattr **a = info->attrs;
1098 struct ovs_header *ovs_header = info->userhdr; 1098 struct ovs_header *ovs_header = info->userhdr;
1099 struct sw_flow_key key; 1099 struct sw_flow_key key;
1100 struct sk_buff *reply; 1100 struct sk_buff *reply;
1101 struct sw_flow *flow; 1101 struct sw_flow *flow;
1102 struct datapath *dp; 1102 struct datapath *dp;
1103 struct flow_table *table; 1103 struct flow_table *table;
1104 int err; 1104 int err;
1105 int key_len; 1105 int key_len;
1106 1106
1107 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1107 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1108 if (!dp) 1108 if (!dp)
1109 return -ENODEV; 1109 return -ENODEV;
1110 1110
1111 if (!a[OVS_FLOW_ATTR_KEY]) 1111 if (!a[OVS_FLOW_ATTR_KEY])
1112 return flush_flows(dp); 1112 return flush_flows(dp);
1113 1113
1114 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1114 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1115 if (err) 1115 if (err)
1116 return err; 1116 return err;
1117 1117
1118 table = genl_dereference(dp->table); 1118 table = genl_dereference(dp->table);
1119 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1119 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1120 if (!flow) 1120 if (!flow)
1121 return -ENOENT; 1121 return -ENOENT;
1122 1122
1123 reply = ovs_flow_cmd_alloc_info(flow); 1123 reply = ovs_flow_cmd_alloc_info(flow);
1124 if (!reply) 1124 if (!reply)
1125 return -ENOMEM; 1125 return -ENOMEM;
1126 1126
1127 ovs_flow_tbl_remove(table, flow); 1127 ovs_flow_tbl_remove(table, flow);
1128 1128
1129 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, 1129 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
1130 info->snd_seq, 0, OVS_FLOW_CMD_DEL); 1130 info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1131 BUG_ON(err < 0); 1131 BUG_ON(err < 0);
1132 1132
1133 ovs_flow_deferred_free(flow); 1133 ovs_flow_deferred_free(flow);
1134 1134
1135 genl_notify(reply, genl_info_net(info), info->snd_portid, 1135 genl_notify(reply, genl_info_net(info), info->snd_portid,
1136 ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); 1136 ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1137 return 0; 1137 return 0;
1138 } 1138 }
1139 1139
1140 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1140 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1141 { 1141 {
1142 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 1142 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1143 struct datapath *dp; 1143 struct datapath *dp;
1144 struct flow_table *table; 1144 struct flow_table *table;
1145 1145
1146 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1146 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1147 if (!dp) 1147 if (!dp)
1148 return -ENODEV; 1148 return -ENODEV;
1149 1149
1150 table = genl_dereference(dp->table); 1150 table = genl_dereference(dp->table);
1151 1151
1152 for (;;) { 1152 for (;;) {
1153 struct sw_flow *flow; 1153 struct sw_flow *flow;
1154 u32 bucket, obj; 1154 u32 bucket, obj;
1155 1155
1156 bucket = cb->args[0]; 1156 bucket = cb->args[0];
1157 obj = cb->args[1]; 1157 obj = cb->args[1];
1158 flow = ovs_flow_tbl_next(table, &bucket, &obj); 1158 flow = ovs_flow_tbl_next(table, &bucket, &obj);
1159 if (!flow) 1159 if (!flow)
1160 break; 1160 break;
1161 1161
1162 if (ovs_flow_cmd_fill_info(flow, dp, skb, 1162 if (ovs_flow_cmd_fill_info(flow, dp, skb,
1163 NETLINK_CB(cb->skb).portid, 1163 NETLINK_CB(cb->skb).portid,
1164 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1164 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1165 OVS_FLOW_CMD_NEW) < 0) 1165 OVS_FLOW_CMD_NEW) < 0)
1166 break; 1166 break;
1167 1167
1168 cb->args[0] = bucket; 1168 cb->args[0] = bucket;
1169 cb->args[1] = obj; 1169 cb->args[1] = obj;
1170 } 1170 }
1171 return skb->len; 1171 return skb->len;
1172 } 1172 }
1173 1173
1174 static struct genl_ops dp_flow_genl_ops[] = { 1174 static struct genl_ops dp_flow_genl_ops[] = {
1175 { .cmd = OVS_FLOW_CMD_NEW, 1175 { .cmd = OVS_FLOW_CMD_NEW,
1176 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1176 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1177 .policy = flow_policy, 1177 .policy = flow_policy,
1178 .doit = ovs_flow_cmd_new_or_set 1178 .doit = ovs_flow_cmd_new_or_set
1179 }, 1179 },
1180 { .cmd = OVS_FLOW_CMD_DEL, 1180 { .cmd = OVS_FLOW_CMD_DEL,
1181 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1181 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1182 .policy = flow_policy, 1182 .policy = flow_policy,
1183 .doit = ovs_flow_cmd_del 1183 .doit = ovs_flow_cmd_del
1184 }, 1184 },
1185 { .cmd = OVS_FLOW_CMD_GET, 1185 { .cmd = OVS_FLOW_CMD_GET,
1186 .flags = 0, /* OK for unprivileged users. */ 1186 .flags = 0, /* OK for unprivileged users. */
1187 .policy = flow_policy, 1187 .policy = flow_policy,
1188 .doit = ovs_flow_cmd_get, 1188 .doit = ovs_flow_cmd_get,
1189 .dumpit = ovs_flow_cmd_dump 1189 .dumpit = ovs_flow_cmd_dump
1190 }, 1190 },
1191 { .cmd = OVS_FLOW_CMD_SET, 1191 { .cmd = OVS_FLOW_CMD_SET,
1192 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1192 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1193 .policy = flow_policy, 1193 .policy = flow_policy,
1194 .doit = ovs_flow_cmd_new_or_set, 1194 .doit = ovs_flow_cmd_new_or_set,
1195 }, 1195 },
1196 }; 1196 };
1197 1197
1198 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { 1198 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1199 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 1199 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1200 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 1200 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1201 }; 1201 };
1202 1202
1203 static struct genl_family dp_datapath_genl_family = { 1203 static struct genl_family dp_datapath_genl_family = {
1204 .id = GENL_ID_GENERATE, 1204 .id = GENL_ID_GENERATE,
1205 .hdrsize = sizeof(struct ovs_header), 1205 .hdrsize = sizeof(struct ovs_header),
1206 .name = OVS_DATAPATH_FAMILY, 1206 .name = OVS_DATAPATH_FAMILY,
1207 .version = OVS_DATAPATH_VERSION, 1207 .version = OVS_DATAPATH_VERSION,
1208 .maxattr = OVS_DP_ATTR_MAX, 1208 .maxattr = OVS_DP_ATTR_MAX,
1209 .netnsok = true 1209 .netnsok = true
1210 }; 1210 };
1211 1211
1212 static struct genl_multicast_group ovs_dp_datapath_multicast_group = { 1212 static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
1213 .name = OVS_DATAPATH_MCGROUP 1213 .name = OVS_DATAPATH_MCGROUP
1214 }; 1214 };
1215 1215
1216 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, 1216 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1217 u32 portid, u32 seq, u32 flags, u8 cmd) 1217 u32 portid, u32 seq, u32 flags, u8 cmd)
1218 { 1218 {
1219 struct ovs_header *ovs_header; 1219 struct ovs_header *ovs_header;
1220 struct ovs_dp_stats dp_stats; 1220 struct ovs_dp_stats dp_stats;
1221 int err; 1221 int err;
1222 1222
1223 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, 1223 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1224 flags, cmd); 1224 flags, cmd);
1225 if (!ovs_header) 1225 if (!ovs_header)
1226 goto error; 1226 goto error;
1227 1227
1228 ovs_header->dp_ifindex = get_dpifindex(dp); 1228 ovs_header->dp_ifindex = get_dpifindex(dp);
1229 1229
1230 rcu_read_lock(); 1230 rcu_read_lock();
1231 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); 1231 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1232 rcu_read_unlock(); 1232 rcu_read_unlock();
1233 if (err) 1233 if (err)
1234 goto nla_put_failure; 1234 goto nla_put_failure;
1235 1235
1236 get_dp_stats(dp, &dp_stats); 1236 get_dp_stats(dp, &dp_stats);
1237 if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats)) 1237 if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats))
1238 goto nla_put_failure; 1238 goto nla_put_failure;
1239 1239
1240 return genlmsg_end(skb, ovs_header); 1240 return genlmsg_end(skb, ovs_header);
1241 1241
1242 nla_put_failure: 1242 nla_put_failure:
1243 genlmsg_cancel(skb, ovs_header); 1243 genlmsg_cancel(skb, ovs_header);
1244 error: 1244 error:
1245 return -EMSGSIZE; 1245 return -EMSGSIZE;
1246 } 1246 }
1247 1247
1248 static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, 1248 static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1249 u32 seq, u8 cmd) 1249 u32 seq, u8 cmd)
1250 { 1250 {
1251 struct sk_buff *skb; 1251 struct sk_buff *skb;
1252 int retval; 1252 int retval;
1253 1253
1254 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1254 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1255 if (!skb) 1255 if (!skb)
1256 return ERR_PTR(-ENOMEM); 1256 return ERR_PTR(-ENOMEM);
1257 1257
1258 retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); 1258 retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd);
1259 if (retval < 0) { 1259 if (retval < 0) {
1260 kfree_skb(skb); 1260 kfree_skb(skb);
1261 return ERR_PTR(retval); 1261 return ERR_PTR(retval);
1262 } 1262 }
1263 return skb; 1263 return skb;
1264 } 1264 }
1265 1265
1266 /* Called with genl_mutex and optionally with RTNL lock also. */ 1266 /* Called with genl_mutex and optionally with RTNL lock also. */
1267 static struct datapath *lookup_datapath(struct net *net, 1267 static struct datapath *lookup_datapath(struct net *net,
1268 struct ovs_header *ovs_header, 1268 struct ovs_header *ovs_header,
1269 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1269 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1270 { 1270 {
1271 struct datapath *dp; 1271 struct datapath *dp;
1272 1272
1273 if (!a[OVS_DP_ATTR_NAME]) 1273 if (!a[OVS_DP_ATTR_NAME])
1274 dp = get_dp(net, ovs_header->dp_ifindex); 1274 dp = get_dp(net, ovs_header->dp_ifindex);
1275 else { 1275 else {
1276 struct vport *vport; 1276 struct vport *vport;
1277 1277
1278 rcu_read_lock(); 1278 rcu_read_lock();
1279 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); 1279 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1280 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; 1280 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1281 rcu_read_unlock(); 1281 rcu_read_unlock();
1282 } 1282 }
1283 return dp ? dp : ERR_PTR(-ENODEV); 1283 return dp ? dp : ERR_PTR(-ENODEV);
1284 } 1284 }
1285 1285
1286 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) 1286 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1287 { 1287 {
1288 struct nlattr **a = info->attrs; 1288 struct nlattr **a = info->attrs;
1289 struct vport_parms parms; 1289 struct vport_parms parms;
1290 struct sk_buff *reply; 1290 struct sk_buff *reply;
1291 struct datapath *dp; 1291 struct datapath *dp;
1292 struct vport *vport; 1292 struct vport *vport;
1293 struct ovs_net *ovs_net; 1293 struct ovs_net *ovs_net;
1294 int err, i; 1294 int err, i;
1295 1295
1296 err = -EINVAL; 1296 err = -EINVAL;
1297 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1297 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1298 goto err; 1298 goto err;
1299 1299
1300 rtnl_lock(); 1300 rtnl_lock();
1301 1301
1302 err = -ENOMEM; 1302 err = -ENOMEM;
1303 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1303 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1304 if (dp == NULL) 1304 if (dp == NULL)
1305 goto err_unlock_rtnl; 1305 goto err_unlock_rtnl;
1306 1306
1307 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); 1307 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1308 1308
1309 /* Allocate table. */ 1309 /* Allocate table. */
1310 err = -ENOMEM; 1310 err = -ENOMEM;
1311 rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS)); 1311 rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
1312 if (!dp->table) 1312 if (!dp->table)
1313 goto err_free_dp; 1313 goto err_free_dp;
1314 1314
1315 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); 1315 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1316 if (!dp->stats_percpu) { 1316 if (!dp->stats_percpu) {
1317 err = -ENOMEM; 1317 err = -ENOMEM;
1318 goto err_destroy_table; 1318 goto err_destroy_table;
1319 } 1319 }
1320 1320
1321 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), 1321 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1322 GFP_KERNEL); 1322 GFP_KERNEL);
1323 if (!dp->ports) { 1323 if (!dp->ports) {
1324 err = -ENOMEM; 1324 err = -ENOMEM;
1325 goto err_destroy_percpu; 1325 goto err_destroy_percpu;
1326 } 1326 }
1327 1327
1328 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) 1328 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1329 INIT_HLIST_HEAD(&dp->ports[i]); 1329 INIT_HLIST_HEAD(&dp->ports[i]);
1330 1330
1331 /* Set up our datapath device. */ 1331 /* Set up our datapath device. */
1332 parms.name = nla_data(a[OVS_DP_ATTR_NAME]); 1332 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1333 parms.type = OVS_VPORT_TYPE_INTERNAL; 1333 parms.type = OVS_VPORT_TYPE_INTERNAL;
1334 parms.options = NULL; 1334 parms.options = NULL;
1335 parms.dp = dp; 1335 parms.dp = dp;
1336 parms.port_no = OVSP_LOCAL; 1336 parms.port_no = OVSP_LOCAL;
1337 parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); 1337 parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
1338 1338
1339 vport = new_vport(&parms); 1339 vport = new_vport(&parms);
1340 if (IS_ERR(vport)) { 1340 if (IS_ERR(vport)) {
1341 err = PTR_ERR(vport); 1341 err = PTR_ERR(vport);
1342 if (err == -EBUSY) 1342 if (err == -EBUSY)
1343 err = -EEXIST; 1343 err = -EEXIST;
1344 1344
1345 goto err_destroy_ports_array; 1345 goto err_destroy_ports_array;
1346 } 1346 }
1347 1347
1348 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1348 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1349 info->snd_seq, OVS_DP_CMD_NEW); 1349 info->snd_seq, OVS_DP_CMD_NEW);
1350 err = PTR_ERR(reply); 1350 err = PTR_ERR(reply);
1351 if (IS_ERR(reply)) 1351 if (IS_ERR(reply))
1352 goto err_destroy_local_port; 1352 goto err_destroy_local_port;
1353 1353
1354 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1354 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1355 list_add_tail(&dp->list_node, &ovs_net->dps); 1355 list_add_tail(&dp->list_node, &ovs_net->dps);
1356 rtnl_unlock(); 1356 rtnl_unlock();
1357 1357
1358 genl_notify(reply, genl_info_net(info), info->snd_portid, 1358 genl_notify(reply, genl_info_net(info), info->snd_portid,
1359 ovs_dp_datapath_multicast_group.id, info->nlhdr, 1359 ovs_dp_datapath_multicast_group.id, info->nlhdr,
1360 GFP_KERNEL); 1360 GFP_KERNEL);
1361 return 0; 1361 return 0;
1362 1362
1363 err_destroy_local_port: 1363 err_destroy_local_port:
1364 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); 1364 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1365 err_destroy_ports_array: 1365 err_destroy_ports_array:
1366 kfree(dp->ports); 1366 kfree(dp->ports);
1367 err_destroy_percpu: 1367 err_destroy_percpu:
1368 free_percpu(dp->stats_percpu); 1368 free_percpu(dp->stats_percpu);
1369 err_destroy_table: 1369 err_destroy_table:
1370 ovs_flow_tbl_destroy(genl_dereference(dp->table)); 1370 ovs_flow_tbl_destroy(genl_dereference(dp->table));
1371 err_free_dp: 1371 err_free_dp:
1372 release_net(ovs_dp_get_net(dp)); 1372 release_net(ovs_dp_get_net(dp));
1373 kfree(dp); 1373 kfree(dp);
1374 err_unlock_rtnl: 1374 err_unlock_rtnl:
1375 rtnl_unlock(); 1375 rtnl_unlock();
1376 err: 1376 err:
1377 return err; 1377 return err;
1378 } 1378 }
1379 1379
1380 /* Called with genl_mutex. */ 1380 /* Called with genl_mutex. */
1381 static void __dp_destroy(struct datapath *dp) 1381 static void __dp_destroy(struct datapath *dp)
1382 { 1382 {
1383 int i; 1383 int i;
1384 1384
1385 rtnl_lock(); 1385 rtnl_lock();
1386 1386
1387 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 1387 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1388 struct vport *vport; 1388 struct vport *vport;
1389 struct hlist_node *node, *n; 1389 struct hlist_node *node, *n;
1390 1390
1391 hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node) 1391 hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node)
1392 if (vport->port_no != OVSP_LOCAL) 1392 if (vport->port_no != OVSP_LOCAL)
1393 ovs_dp_detach_port(vport); 1393 ovs_dp_detach_port(vport);
1394 } 1394 }
1395 1395
1396 list_del(&dp->list_node); 1396 list_del(&dp->list_node);
1397 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); 1397 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1398 1398
1399 /* rtnl_unlock() will wait until all the references to devices that 1399 /* rtnl_unlock() will wait until all the references to devices that
1400 * are pending unregistration have been dropped. We do it here to 1400 * are pending unregistration have been dropped. We do it here to
1401 * ensure that any internal devices (which contain DP pointers) are 1401 * ensure that any internal devices (which contain DP pointers) are
1402 * fully destroyed before freeing the datapath. 1402 * fully destroyed before freeing the datapath.
1403 */ 1403 */
1404 rtnl_unlock(); 1404 rtnl_unlock();
1405 1405
1406 call_rcu(&dp->rcu, destroy_dp_rcu); 1406 call_rcu(&dp->rcu, destroy_dp_rcu);
1407 } 1407 }
1408 1408
1409 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) 1409 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1410 { 1410 {
1411 struct sk_buff *reply; 1411 struct sk_buff *reply;
1412 struct datapath *dp; 1412 struct datapath *dp;
1413 int err; 1413 int err;
1414 1414
1415 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1415 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1416 err = PTR_ERR(dp); 1416 err = PTR_ERR(dp);
1417 if (IS_ERR(dp)) 1417 if (IS_ERR(dp))
1418 return err; 1418 return err;
1419 1419
1420 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1420 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1421 info->snd_seq, OVS_DP_CMD_DEL); 1421 info->snd_seq, OVS_DP_CMD_DEL);
1422 err = PTR_ERR(reply); 1422 err = PTR_ERR(reply);
1423 if (IS_ERR(reply)) 1423 if (IS_ERR(reply))
1424 return err; 1424 return err;
1425 1425
1426 __dp_destroy(dp); 1426 __dp_destroy(dp);
1427 1427
1428 genl_notify(reply, genl_info_net(info), info->snd_portid, 1428 genl_notify(reply, genl_info_net(info), info->snd_portid,
1429 ovs_dp_datapath_multicast_group.id, info->nlhdr, 1429 ovs_dp_datapath_multicast_group.id, info->nlhdr,
1430 GFP_KERNEL); 1430 GFP_KERNEL);
1431 1431
1432 return 0; 1432 return 0;
1433 } 1433 }
1434 1434
1435 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1435 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1436 { 1436 {
1437 struct sk_buff *reply; 1437 struct sk_buff *reply;
1438 struct datapath *dp; 1438 struct datapath *dp;
1439 int err; 1439 int err;
1440 1440
1441 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1441 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1442 if (IS_ERR(dp)) 1442 if (IS_ERR(dp))
1443 return PTR_ERR(dp); 1443 return PTR_ERR(dp);
1444 1444
1445 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1445 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1446 info->snd_seq, OVS_DP_CMD_NEW); 1446 info->snd_seq, OVS_DP_CMD_NEW);
1447 if (IS_ERR(reply)) { 1447 if (IS_ERR(reply)) {
1448 err = PTR_ERR(reply); 1448 err = PTR_ERR(reply);
1449 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 1449 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1450 ovs_dp_datapath_multicast_group.id, err); 1450 ovs_dp_datapath_multicast_group.id, err);
1451 return 0; 1451 return 0;
1452 } 1452 }
1453 1453
1454 genl_notify(reply, genl_info_net(info), info->snd_portid, 1454 genl_notify(reply, genl_info_net(info), info->snd_portid,
1455 ovs_dp_datapath_multicast_group.id, info->nlhdr, 1455 ovs_dp_datapath_multicast_group.id, info->nlhdr,
1456 GFP_KERNEL); 1456 GFP_KERNEL);
1457 1457
1458 return 0; 1458 return 0;
1459 } 1459 }
1460 1460
1461 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) 1461 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1462 { 1462 {
1463 struct sk_buff *reply; 1463 struct sk_buff *reply;
1464 struct datapath *dp; 1464 struct datapath *dp;
1465 1465
1466 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1466 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1467 if (IS_ERR(dp)) 1467 if (IS_ERR(dp))
1468 return PTR_ERR(dp); 1468 return PTR_ERR(dp);
1469 1469
1470 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1470 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1471 info->snd_seq, OVS_DP_CMD_NEW); 1471 info->snd_seq, OVS_DP_CMD_NEW);
1472 if (IS_ERR(reply)) 1472 if (IS_ERR(reply))
1473 return PTR_ERR(reply); 1473 return PTR_ERR(reply);
1474 1474
1475 return genlmsg_reply(reply, info); 1475 return genlmsg_reply(reply, info);
1476 } 1476 }
1477 1477
1478 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1478 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1479 { 1479 {
1480 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); 1480 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1481 struct datapath *dp; 1481 struct datapath *dp;
1482 int skip = cb->args[0]; 1482 int skip = cb->args[0];
1483 int i = 0; 1483 int i = 0;
1484 1484
1485 list_for_each_entry(dp, &ovs_net->dps, list_node) { 1485 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1486 if (i >= skip && 1486 if (i >= skip &&
1487 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1487 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1488 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1488 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1489 OVS_DP_CMD_NEW) < 0) 1489 OVS_DP_CMD_NEW) < 0)
1490 break; 1490 break;
1491 i++; 1491 i++;
1492 } 1492 }
1493 1493
1494 cb->args[0] = i; 1494 cb->args[0] = i;
1495 1495
1496 return skb->len; 1496 return skb->len;
1497 } 1497 }
1498 1498
1499 static struct genl_ops dp_datapath_genl_ops[] = { 1499 static struct genl_ops dp_datapath_genl_ops[] = {
1500 { .cmd = OVS_DP_CMD_NEW, 1500 { .cmd = OVS_DP_CMD_NEW,
1501 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1501 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1502 .policy = datapath_policy, 1502 .policy = datapath_policy,
1503 .doit = ovs_dp_cmd_new 1503 .doit = ovs_dp_cmd_new
1504 }, 1504 },
1505 { .cmd = OVS_DP_CMD_DEL, 1505 { .cmd = OVS_DP_CMD_DEL,
1506 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1506 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1507 .policy = datapath_policy, 1507 .policy = datapath_policy,
1508 .doit = ovs_dp_cmd_del 1508 .doit = ovs_dp_cmd_del
1509 }, 1509 },
1510 { .cmd = OVS_DP_CMD_GET, 1510 { .cmd = OVS_DP_CMD_GET,
1511 .flags = 0, /* OK for unprivileged users. */ 1511 .flags = 0, /* OK for unprivileged users. */
1512 .policy = datapath_policy, 1512 .policy = datapath_policy,
1513 .doit = ovs_dp_cmd_get, 1513 .doit = ovs_dp_cmd_get,
1514 .dumpit = ovs_dp_cmd_dump 1514 .dumpit = ovs_dp_cmd_dump
1515 }, 1515 },
1516 { .cmd = OVS_DP_CMD_SET, 1516 { .cmd = OVS_DP_CMD_SET,
1517 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1517 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1518 .policy = datapath_policy, 1518 .policy = datapath_policy,
1519 .doit = ovs_dp_cmd_set, 1519 .doit = ovs_dp_cmd_set,
1520 }, 1520 },
1521 }; 1521 };
1522 1522
1523 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { 1523 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
1524 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 1524 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1525 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, 1525 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
1526 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, 1526 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1527 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, 1527 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1528 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 1528 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1529 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, 1529 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1530 }; 1530 };
1531 1531
1532 static struct genl_family dp_vport_genl_family = { 1532 static struct genl_family dp_vport_genl_family = {
1533 .id = GENL_ID_GENERATE, 1533 .id = GENL_ID_GENERATE,
1534 .hdrsize = sizeof(struct ovs_header), 1534 .hdrsize = sizeof(struct ovs_header),
1535 .name = OVS_VPORT_FAMILY, 1535 .name = OVS_VPORT_FAMILY,
1536 .version = OVS_VPORT_VERSION, 1536 .version = OVS_VPORT_VERSION,
1537 .maxattr = OVS_VPORT_ATTR_MAX, 1537 .maxattr = OVS_VPORT_ATTR_MAX,
1538 .netnsok = true 1538 .netnsok = true
1539 }; 1539 };
1540 1540
1541 struct genl_multicast_group ovs_dp_vport_multicast_group = { 1541 struct genl_multicast_group ovs_dp_vport_multicast_group = {
1542 .name = OVS_VPORT_MCGROUP 1542 .name = OVS_VPORT_MCGROUP
1543 }; 1543 };
1544 1544
1545 /* Called with RTNL lock or RCU read lock. */ 1545 /* Called with RTNL lock or RCU read lock. */
1546 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 1546 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1547 u32 portid, u32 seq, u32 flags, u8 cmd) 1547 u32 portid, u32 seq, u32 flags, u8 cmd)
1548 { 1548 {
1549 struct ovs_header *ovs_header; 1549 struct ovs_header *ovs_header;
1550 struct ovs_vport_stats vport_stats; 1550 struct ovs_vport_stats vport_stats;
1551 int err; 1551 int err;
1552 1552
1553 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, 1553 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1554 flags, cmd); 1554 flags, cmd);
1555 if (!ovs_header) 1555 if (!ovs_header)
1556 return -EMSGSIZE; 1556 return -EMSGSIZE;
1557 1557
1558 ovs_header->dp_ifindex = get_dpifindex(vport->dp); 1558 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1559 1559
1560 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || 1560 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1561 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || 1561 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1562 nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) || 1562 nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
1563 nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid)) 1563 nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
1564 goto nla_put_failure; 1564 goto nla_put_failure;
1565 1565
1566 ovs_vport_get_stats(vport, &vport_stats); 1566 ovs_vport_get_stats(vport, &vport_stats);
1567 if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), 1567 if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1568 &vport_stats)) 1568 &vport_stats))
1569 goto nla_put_failure; 1569 goto nla_put_failure;
1570 1570
1571 err = ovs_vport_get_options(vport, skb); 1571 err = ovs_vport_get_options(vport, skb);
1572 if (err == -EMSGSIZE) 1572 if (err == -EMSGSIZE)
1573 goto error; 1573 goto error;
1574 1574
1575 return genlmsg_end(skb, ovs_header); 1575 return genlmsg_end(skb, ovs_header);
1576 1576
1577 nla_put_failure: 1577 nla_put_failure:
1578 err = -EMSGSIZE; 1578 err = -EMSGSIZE;
1579 error: 1579 error:
1580 genlmsg_cancel(skb, ovs_header); 1580 genlmsg_cancel(skb, ovs_header);
1581 return err; 1581 return err;
1582 } 1582 }
1583 1583
1584 /* Called with RTNL lock or RCU read lock. */ 1584 /* Called with RTNL lock or RCU read lock. */
1585 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, 1585 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1586 u32 seq, u8 cmd) 1586 u32 seq, u8 cmd)
1587 { 1587 {
1588 struct sk_buff *skb; 1588 struct sk_buff *skb;
1589 int retval; 1589 int retval;
1590 1590
1591 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 1591 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1592 if (!skb) 1592 if (!skb)
1593 return ERR_PTR(-ENOMEM); 1593 return ERR_PTR(-ENOMEM);
1594 1594
1595 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); 1595 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1596 if (retval < 0) { 1596 if (retval < 0) {
1597 kfree_skb(skb); 1597 kfree_skb(skb);
1598 return ERR_PTR(retval); 1598 return ERR_PTR(retval);
1599 } 1599 }
1600 return skb; 1600 return skb;
1601 } 1601 }
1602 1602
1603 /* Called with RTNL lock or RCU read lock. */ 1603 /* Called with RTNL lock or RCU read lock. */
1604 static struct vport *lookup_vport(struct net *net, 1604 static struct vport *lookup_vport(struct net *net,
1605 struct ovs_header *ovs_header, 1605 struct ovs_header *ovs_header,
1606 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 1606 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1607 { 1607 {
1608 struct datapath *dp; 1608 struct datapath *dp;
1609 struct vport *vport; 1609 struct vport *vport;
1610 1610
1611 if (a[OVS_VPORT_ATTR_NAME]) { 1611 if (a[OVS_VPORT_ATTR_NAME]) {
1612 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); 1612 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1613 if (!vport) 1613 if (!vport)
1614 return ERR_PTR(-ENODEV); 1614 return ERR_PTR(-ENODEV);
1615 if (ovs_header->dp_ifindex && 1615 if (ovs_header->dp_ifindex &&
1616 ovs_header->dp_ifindex != get_dpifindex(vport->dp)) 1616 ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1617 return ERR_PTR(-ENODEV); 1617 return ERR_PTR(-ENODEV);
1618 return vport; 1618 return vport;
1619 } else if (a[OVS_VPORT_ATTR_PORT_NO]) { 1619 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1620 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); 1620 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1621 1621
1622 if (port_no >= DP_MAX_PORTS) 1622 if (port_no >= DP_MAX_PORTS)
1623 return ERR_PTR(-EFBIG); 1623 return ERR_PTR(-EFBIG);
1624 1624
1625 dp = get_dp(net, ovs_header->dp_ifindex); 1625 dp = get_dp(net, ovs_header->dp_ifindex);
1626 if (!dp) 1626 if (!dp)
1627 return ERR_PTR(-ENODEV); 1627 return ERR_PTR(-ENODEV);
1628 1628
1629 vport = ovs_vport_rtnl_rcu(dp, port_no); 1629 vport = ovs_vport_rtnl_rcu(dp, port_no);
1630 if (!vport) 1630 if (!vport)
1631 return ERR_PTR(-ENOENT); 1631 return ERR_PTR(-ENOENT);
1632 return vport; 1632 return vport;
1633 } else 1633 } else
1634 return ERR_PTR(-EINVAL); 1634 return ERR_PTR(-EINVAL);
1635 } 1635 }
1636 1636
1637 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) 1637 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1638 { 1638 {
1639 struct nlattr **a = info->attrs; 1639 struct nlattr **a = info->attrs;
1640 struct ovs_header *ovs_header = info->userhdr; 1640 struct ovs_header *ovs_header = info->userhdr;
1641 struct vport_parms parms; 1641 struct vport_parms parms;
1642 struct sk_buff *reply; 1642 struct sk_buff *reply;
1643 struct vport *vport; 1643 struct vport *vport;
1644 struct datapath *dp; 1644 struct datapath *dp;
1645 u32 port_no; 1645 u32 port_no;
1646 int err; 1646 int err;
1647 1647
1648 err = -EINVAL; 1648 err = -EINVAL;
1649 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || 1649 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1650 !a[OVS_VPORT_ATTR_UPCALL_PID]) 1650 !a[OVS_VPORT_ATTR_UPCALL_PID])
1651 goto exit; 1651 goto exit;
1652 1652
1653 rtnl_lock(); 1653 rtnl_lock();
1654 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1654 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1655 err = -ENODEV; 1655 err = -ENODEV;
1656 if (!dp) 1656 if (!dp)
1657 goto exit_unlock; 1657 goto exit_unlock;
1658 1658
1659 if (a[OVS_VPORT_ATTR_PORT_NO]) { 1659 if (a[OVS_VPORT_ATTR_PORT_NO]) {
1660 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); 1660 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1661 1661
1662 err = -EFBIG; 1662 err = -EFBIG;
1663 if (port_no >= DP_MAX_PORTS) 1663 if (port_no >= DP_MAX_PORTS)
1664 goto exit_unlock; 1664 goto exit_unlock;
1665 1665
1666 vport = ovs_vport_rtnl_rcu(dp, port_no); 1666 vport = ovs_vport_rtnl_rcu(dp, port_no);
1667 err = -EBUSY; 1667 err = -EBUSY;
1668 if (vport) 1668 if (vport)
1669 goto exit_unlock; 1669 goto exit_unlock;
1670 } else { 1670 } else {
1671 for (port_no = 1; ; port_no++) { 1671 for (port_no = 1; ; port_no++) {
1672 if (port_no >= DP_MAX_PORTS) { 1672 if (port_no >= DP_MAX_PORTS) {
1673 err = -EFBIG; 1673 err = -EFBIG;
1674 goto exit_unlock; 1674 goto exit_unlock;
1675 } 1675 }
1676 vport = ovs_vport_rtnl(dp, port_no); 1676 vport = ovs_vport_rtnl(dp, port_no);
1677 if (!vport) 1677 if (!vport)
1678 break; 1678 break;
1679 } 1679 }
1680 } 1680 }
1681 1681
1682 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]); 1682 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1683 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]); 1683 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1684 parms.options = a[OVS_VPORT_ATTR_OPTIONS]; 1684 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1685 parms.dp = dp; 1685 parms.dp = dp;
1686 parms.port_no = port_no; 1686 parms.port_no = port_no;
1687 parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); 1687 parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1688 1688
1689 vport = new_vport(&parms); 1689 vport = new_vport(&parms);
1690 err = PTR_ERR(vport); 1690 err = PTR_ERR(vport);
1691 if (IS_ERR(vport)) 1691 if (IS_ERR(vport))
1692 goto exit_unlock; 1692 goto exit_unlock;
1693 1693
1694 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, 1694 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1695 OVS_VPORT_CMD_NEW); 1695 OVS_VPORT_CMD_NEW);
1696 if (IS_ERR(reply)) { 1696 if (IS_ERR(reply)) {
1697 err = PTR_ERR(reply); 1697 err = PTR_ERR(reply);
1698 ovs_dp_detach_port(vport); 1698 ovs_dp_detach_port(vport);
1699 goto exit_unlock; 1699 goto exit_unlock;
1700 } 1700 }
1701 genl_notify(reply, genl_info_net(info), info->snd_portid, 1701 genl_notify(reply, genl_info_net(info), info->snd_portid,
1702 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); 1702 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1703 1703
1704 exit_unlock: 1704 exit_unlock:
1705 rtnl_unlock(); 1705 rtnl_unlock();
1706 exit: 1706 exit:
1707 return err; 1707 return err;
1708 } 1708 }
1709 1709
1710 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) 1710 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1711 { 1711 {
1712 struct nlattr **a = info->attrs; 1712 struct nlattr **a = info->attrs;
1713 struct sk_buff *reply; 1713 struct sk_buff *reply;
1714 struct vport *vport; 1714 struct vport *vport;
1715 int err; 1715 int err;
1716 1716
1717 rtnl_lock(); 1717 rtnl_lock();
1718 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1718 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1719 err = PTR_ERR(vport); 1719 err = PTR_ERR(vport);
1720 if (IS_ERR(vport)) 1720 if (IS_ERR(vport))
1721 goto exit_unlock; 1721 goto exit_unlock;
1722 1722
1723 err = 0; 1723 err = 0;
1724 if (a[OVS_VPORT_ATTR_TYPE] && 1724 if (a[OVS_VPORT_ATTR_TYPE] &&
1725 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) 1725 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type)
1726 err = -EINVAL; 1726 err = -EINVAL;
1727 1727
1728 if (!err && a[OVS_VPORT_ATTR_OPTIONS]) 1728 if (!err && a[OVS_VPORT_ATTR_OPTIONS])
1729 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); 1729 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1730 if (err) 1730 if (err)
1731 goto exit_unlock; 1731 goto exit_unlock;
1732 if (a[OVS_VPORT_ATTR_UPCALL_PID]) 1732 if (a[OVS_VPORT_ATTR_UPCALL_PID])
1733 vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); 1733 vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1734 1734
1735 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, 1735 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1736 OVS_VPORT_CMD_NEW); 1736 OVS_VPORT_CMD_NEW);
1737 if (IS_ERR(reply)) { 1737 if (IS_ERR(reply)) {
1738 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 1738 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1739 ovs_dp_vport_multicast_group.id, PTR_ERR(reply)); 1739 ovs_dp_vport_multicast_group.id, PTR_ERR(reply));
1740 goto exit_unlock; 1740 goto exit_unlock;
1741 } 1741 }
1742 1742
1743 genl_notify(reply, genl_info_net(info), info->snd_portid, 1743 genl_notify(reply, genl_info_net(info), info->snd_portid,
1744 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); 1744 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1745 1745
1746 exit_unlock: 1746 exit_unlock:
1747 rtnl_unlock(); 1747 rtnl_unlock();
1748 return err; 1748 return err;
1749 } 1749 }
1750 1750
1751 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) 1751 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1752 { 1752 {
1753 struct nlattr **a = info->attrs; 1753 struct nlattr **a = info->attrs;
1754 struct sk_buff *reply; 1754 struct sk_buff *reply;
1755 struct vport *vport; 1755 struct vport *vport;
1756 int err; 1756 int err;
1757 1757
1758 rtnl_lock(); 1758 rtnl_lock();
1759 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1759 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1760 err = PTR_ERR(vport); 1760 err = PTR_ERR(vport);
1761 if (IS_ERR(vport)) 1761 if (IS_ERR(vport))
1762 goto exit_unlock; 1762 goto exit_unlock;
1763 1763
1764 if (vport->port_no == OVSP_LOCAL) { 1764 if (vport->port_no == OVSP_LOCAL) {
1765 err = -EINVAL; 1765 err = -EINVAL;
1766 goto exit_unlock; 1766 goto exit_unlock;
1767 } 1767 }
1768 1768
1769 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, 1769 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1770 OVS_VPORT_CMD_DEL); 1770 OVS_VPORT_CMD_DEL);
1771 err = PTR_ERR(reply); 1771 err = PTR_ERR(reply);
1772 if (IS_ERR(reply)) 1772 if (IS_ERR(reply))
1773 goto exit_unlock; 1773 goto exit_unlock;
1774 1774
1775 ovs_dp_detach_port(vport); 1775 ovs_dp_detach_port(vport);
1776 1776
1777 genl_notify(reply, genl_info_net(info), info->snd_portid, 1777 genl_notify(reply, genl_info_net(info), info->snd_portid,
1778 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); 1778 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1779 1779
1780 exit_unlock: 1780 exit_unlock:
1781 rtnl_unlock(); 1781 rtnl_unlock();
1782 return err; 1782 return err;
1783 } 1783 }
1784 1784
1785 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) 1785 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1786 { 1786 {
1787 struct nlattr **a = info->attrs; 1787 struct nlattr **a = info->attrs;
1788 struct ovs_header *ovs_header = info->userhdr; 1788 struct ovs_header *ovs_header = info->userhdr;
1789 struct sk_buff *reply; 1789 struct sk_buff *reply;
1790 struct vport *vport; 1790 struct vport *vport;
1791 int err; 1791 int err;
1792 1792
1793 rcu_read_lock(); 1793 rcu_read_lock();
1794 vport = lookup_vport(sock_net(skb->sk), ovs_header, a); 1794 vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
1795 err = PTR_ERR(vport); 1795 err = PTR_ERR(vport);
1796 if (IS_ERR(vport)) 1796 if (IS_ERR(vport))
1797 goto exit_unlock; 1797 goto exit_unlock;
1798 1798
1799 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, 1799 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1800 OVS_VPORT_CMD_NEW); 1800 OVS_VPORT_CMD_NEW);
1801 err = PTR_ERR(reply); 1801 err = PTR_ERR(reply);
1802 if (IS_ERR(reply)) 1802 if (IS_ERR(reply))
1803 goto exit_unlock; 1803 goto exit_unlock;
1804 1804
1805 rcu_read_unlock(); 1805 rcu_read_unlock();
1806 1806
1807 return genlmsg_reply(reply, info); 1807 return genlmsg_reply(reply, info);
1808 1808
1809 exit_unlock: 1809 exit_unlock:
1810 rcu_read_unlock(); 1810 rcu_read_unlock();
1811 return err; 1811 return err;
1812 } 1812 }
1813 1813
1814 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1814 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1815 { 1815 {
1816 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 1816 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1817 struct datapath *dp; 1817 struct datapath *dp;
1818 int bucket = cb->args[0], skip = cb->args[1]; 1818 int bucket = cb->args[0], skip = cb->args[1];
1819 int i, j = 0; 1819 int i, j = 0;
1820 1820
1821 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1821 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1822 if (!dp) 1822 if (!dp)
1823 return -ENODEV; 1823 return -ENODEV;
1824 1824
1825 rcu_read_lock(); 1825 rcu_read_lock();
1826 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { 1826 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
1827 struct vport *vport; 1827 struct vport *vport;
1828 struct hlist_node *n; 1828 struct hlist_node *n;
1829 1829
1830 j = 0; 1830 j = 0;
1831 hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) { 1831 hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) {
1832 if (j >= skip && 1832 if (j >= skip &&
1833 ovs_vport_cmd_fill_info(vport, skb, 1833 ovs_vport_cmd_fill_info(vport, skb,
1834 NETLINK_CB(cb->skb).portid, 1834 NETLINK_CB(cb->skb).portid,
1835 cb->nlh->nlmsg_seq, 1835 cb->nlh->nlmsg_seq,
1836 NLM_F_MULTI, 1836 NLM_F_MULTI,
1837 OVS_VPORT_CMD_NEW) < 0) 1837 OVS_VPORT_CMD_NEW) < 0)
1838 goto out; 1838 goto out;
1839 1839
1840 j++; 1840 j++;
1841 } 1841 }
1842 skip = 0; 1842 skip = 0;
1843 } 1843 }
1844 out: 1844 out:
1845 rcu_read_unlock(); 1845 rcu_read_unlock();
1846 1846
1847 cb->args[0] = i; 1847 cb->args[0] = i;
1848 cb->args[1] = j; 1848 cb->args[1] = j;
1849 1849
1850 return skb->len; 1850 return skb->len;
1851 } 1851 }
1852 1852
1853 static struct genl_ops dp_vport_genl_ops[] = { 1853 static struct genl_ops dp_vport_genl_ops[] = {
1854 { .cmd = OVS_VPORT_CMD_NEW, 1854 { .cmd = OVS_VPORT_CMD_NEW,
1855 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1855 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1856 .policy = vport_policy, 1856 .policy = vport_policy,
1857 .doit = ovs_vport_cmd_new 1857 .doit = ovs_vport_cmd_new
1858 }, 1858 },
1859 { .cmd = OVS_VPORT_CMD_DEL, 1859 { .cmd = OVS_VPORT_CMD_DEL,
1860 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1860 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1861 .policy = vport_policy, 1861 .policy = vport_policy,
1862 .doit = ovs_vport_cmd_del 1862 .doit = ovs_vport_cmd_del
1863 }, 1863 },
1864 { .cmd = OVS_VPORT_CMD_GET, 1864 { .cmd = OVS_VPORT_CMD_GET,
1865 .flags = 0, /* OK for unprivileged users. */ 1865 .flags = 0, /* OK for unprivileged users. */
1866 .policy = vport_policy, 1866 .policy = vport_policy,
1867 .doit = ovs_vport_cmd_get, 1867 .doit = ovs_vport_cmd_get,
1868 .dumpit = ovs_vport_cmd_dump 1868 .dumpit = ovs_vport_cmd_dump
1869 }, 1869 },
1870 { .cmd = OVS_VPORT_CMD_SET, 1870 { .cmd = OVS_VPORT_CMD_SET,
1871 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1871 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1872 .policy = vport_policy, 1872 .policy = vport_policy,
1873 .doit = ovs_vport_cmd_set, 1873 .doit = ovs_vport_cmd_set,
1874 }, 1874 },
1875 }; 1875 };
1876 1876
1877 struct genl_family_and_ops { 1877 struct genl_family_and_ops {
1878 struct genl_family *family; 1878 struct genl_family *family;
1879 struct genl_ops *ops; 1879 struct genl_ops *ops;
1880 int n_ops; 1880 int n_ops;
1881 struct genl_multicast_group *group; 1881 struct genl_multicast_group *group;
1882 }; 1882 };
1883 1883
1884 static const struct genl_family_and_ops dp_genl_families[] = { 1884 static const struct genl_family_and_ops dp_genl_families[] = {
1885 { &dp_datapath_genl_family, 1885 { &dp_datapath_genl_family,
1886 dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops), 1886 dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1887 &ovs_dp_datapath_multicast_group }, 1887 &ovs_dp_datapath_multicast_group },
1888 { &dp_vport_genl_family, 1888 { &dp_vport_genl_family,
1889 dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops), 1889 dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1890 &ovs_dp_vport_multicast_group }, 1890 &ovs_dp_vport_multicast_group },
1891 { &dp_flow_genl_family, 1891 { &dp_flow_genl_family,
1892 dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops), 1892 dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1893 &ovs_dp_flow_multicast_group }, 1893 &ovs_dp_flow_multicast_group },
1894 { &dp_packet_genl_family, 1894 { &dp_packet_genl_family,
1895 dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops), 1895 dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1896 NULL }, 1896 NULL },
1897 }; 1897 };
1898 1898
1899 static void dp_unregister_genl(int n_families) 1899 static void dp_unregister_genl(int n_families)
1900 { 1900 {
1901 int i; 1901 int i;
1902 1902
1903 for (i = 0; i < n_families; i++) 1903 for (i = 0; i < n_families; i++)
1904 genl_unregister_family(dp_genl_families[i].family); 1904 genl_unregister_family(dp_genl_families[i].family);
1905 } 1905 }
1906 1906
1907 static int dp_register_genl(void) 1907 static int dp_register_genl(void)
1908 { 1908 {
1909 int n_registered; 1909 int n_registered;
1910 int err; 1910 int err;
1911 int i; 1911 int i;
1912 1912
1913 n_registered = 0; 1913 n_registered = 0;
1914 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { 1914 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
1915 const struct genl_family_and_ops *f = &dp_genl_families[i]; 1915 const struct genl_family_and_ops *f = &dp_genl_families[i];
1916 1916
1917 err = genl_register_family_with_ops(f->family, f->ops, 1917 err = genl_register_family_with_ops(f->family, f->ops,
1918 f->n_ops); 1918 f->n_ops);
1919 if (err) 1919 if (err)
1920 goto error; 1920 goto error;
1921 n_registered++; 1921 n_registered++;
1922 1922
1923 if (f->group) { 1923 if (f->group) {
1924 err = genl_register_mc_group(f->family, f->group); 1924 err = genl_register_mc_group(f->family, f->group);
1925 if (err) 1925 if (err)
1926 goto error; 1926 goto error;
1927 } 1927 }
1928 } 1928 }
1929 1929
1930 return 0; 1930 return 0;
1931 1931
1932 error: 1932 error:
1933 dp_unregister_genl(n_registered); 1933 dp_unregister_genl(n_registered);
1934 return err; 1934 return err;
1935 } 1935 }
1936 1936
1937 static void rehash_flow_table(struct work_struct *work) 1937 static void rehash_flow_table(struct work_struct *work)
1938 { 1938 {
1939 struct datapath *dp; 1939 struct datapath *dp;
1940 struct net *net; 1940 struct net *net;
1941 1941
1942 genl_lock(); 1942 genl_lock();
1943 rtnl_lock(); 1943 rtnl_lock();
1944 for_each_net(net) { 1944 for_each_net(net) {
1945 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1945 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1946 1946
1947 list_for_each_entry(dp, &ovs_net->dps, list_node) { 1947 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1948 struct flow_table *old_table = genl_dereference(dp->table); 1948 struct flow_table *old_table = genl_dereference(dp->table);
1949 struct flow_table *new_table; 1949 struct flow_table *new_table;
1950 1950
1951 new_table = ovs_flow_tbl_rehash(old_table); 1951 new_table = ovs_flow_tbl_rehash(old_table);
1952 if (!IS_ERR(new_table)) { 1952 if (!IS_ERR(new_table)) {
1953 rcu_assign_pointer(dp->table, new_table); 1953 rcu_assign_pointer(dp->table, new_table);
1954 ovs_flow_tbl_deferred_destroy(old_table); 1954 ovs_flow_tbl_deferred_destroy(old_table);
1955 } 1955 }
1956 } 1956 }
1957 } 1957 }
1958 rtnl_unlock(); 1958 rtnl_unlock();
1959 genl_unlock(); 1959 genl_unlock();
1960 1960
1961 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); 1961 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
1962 } 1962 }
1963 1963
1964 static int __net_init ovs_init_net(struct net *net) 1964 static int __net_init ovs_init_net(struct net *net)
1965 { 1965 {
1966 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1966 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1967 1967
1968 INIT_LIST_HEAD(&ovs_net->dps); 1968 INIT_LIST_HEAD(&ovs_net->dps);
1969 return 0; 1969 return 0;
1970 } 1970 }
1971 1971
1972 static void __net_exit ovs_exit_net(struct net *net) 1972 static void __net_exit ovs_exit_net(struct net *net)
1973 { 1973 {
1974 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1974 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1975 struct datapath *dp, *dp_next; 1975 struct datapath *dp, *dp_next;
1976 1976
1977 genl_lock(); 1977 genl_lock();
1978 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) 1978 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
1979 __dp_destroy(dp); 1979 __dp_destroy(dp);
1980 genl_unlock(); 1980 genl_unlock();
1981 } 1981 }
1982 1982
1983 static struct pernet_operations ovs_net_ops = { 1983 static struct pernet_operations ovs_net_ops = {
1984 .init = ovs_init_net, 1984 .init = ovs_init_net,
1985 .exit = ovs_exit_net, 1985 .exit = ovs_exit_net,
1986 .id = &ovs_net_id, 1986 .id = &ovs_net_id,
1987 .size = sizeof(struct ovs_net), 1987 .size = sizeof(struct ovs_net),
1988 }; 1988 };
1989 1989
1990 static int __init dp_init(void) 1990 static int __init dp_init(void)
1991 { 1991 {
1992 int err; 1992 int err;
1993 1993
1994 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); 1994 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
1995 1995
1996 pr_info("Open vSwitch switching datapath\n"); 1996 pr_info("Open vSwitch switching datapath\n");
1997 1997
1998 err = ovs_flow_init(); 1998 err = ovs_flow_init();
1999 if (err) 1999 if (err)
2000 goto error; 2000 goto error;
2001 2001
2002 err = ovs_vport_init(); 2002 err = ovs_vport_init();
2003 if (err) 2003 if (err)
2004 goto error_flow_exit; 2004 goto error_flow_exit;
2005 2005
2006 err = register_pernet_device(&ovs_net_ops); 2006 err = register_pernet_device(&ovs_net_ops);
2007 if (err) 2007 if (err)
2008 goto error_vport_exit; 2008 goto error_vport_exit;
2009 2009
2010 err = register_netdevice_notifier(&ovs_dp_device_notifier); 2010 err = register_netdevice_notifier(&ovs_dp_device_notifier);
2011 if (err) 2011 if (err)
2012 goto error_netns_exit; 2012 goto error_netns_exit;
2013 2013
2014 err = dp_register_genl(); 2014 err = dp_register_genl();
2015 if (err < 0) 2015 if (err < 0)
2016 goto error_unreg_notifier; 2016 goto error_unreg_notifier;
2017 2017
2018 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); 2018 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
2019 2019
2020 return 0; 2020 return 0;
2021 2021
2022 error_unreg_notifier: 2022 error_unreg_notifier:
2023 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2023 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2024 error_netns_exit: 2024 error_netns_exit:
2025 unregister_pernet_device(&ovs_net_ops); 2025 unregister_pernet_device(&ovs_net_ops);
2026 error_vport_exit: 2026 error_vport_exit:
2027 ovs_vport_exit(); 2027 ovs_vport_exit();
2028 error_flow_exit: 2028 error_flow_exit:
2029 ovs_flow_exit(); 2029 ovs_flow_exit();
2030 error: 2030 error:
2031 return err; 2031 return err;
2032 } 2032 }
2033 2033
2034 static void dp_cleanup(void) 2034 static void dp_cleanup(void)
2035 { 2035 {
2036 cancel_delayed_work_sync(&rehash_flow_wq); 2036 cancel_delayed_work_sync(&rehash_flow_wq);
2037 dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); 2037 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2038 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2038 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2039 unregister_pernet_device(&ovs_net_ops); 2039 unregister_pernet_device(&ovs_net_ops);
2040 rcu_barrier(); 2040 rcu_barrier();
2041 ovs_vport_exit(); 2041 ovs_vport_exit();
2042 ovs_flow_exit(); 2042 ovs_flow_exit();
2043 } 2043 }
2044 2044
2045 module_init(dp_init); 2045 module_init(dp_init);
2046 module_exit(dp_cleanup); 2046 module_exit(dp_cleanup);
2047 2047
2048 MODULE_DESCRIPTION("Open vSwitch switching datapath"); 2048 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2049 MODULE_LICENSE("GPL"); 2049 MODULE_LICENSE("GPL");
2050 2050