Commit 12b0004d1d1e2a9aa667412d479041e403bcafae
Committed by
David S. Miller
1 parent
25060d8f3f
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
net: adjust skb_gso_segment() for calling in rx path
skb_gso_segment() is almost always called in tx path, except for openvswitch. It calls this function when it receives the packet and tries to queue it to user-space. In this special case, the ->ip_summed check inside skb_gso_segment() is no longer true, as ->ip_summed value has different meanings on rx path. This patch adjusts skb_gso_segment() so that we can at least avoid such warnings on checksum. Cc: Jesse Gross <jesse@nicira.com> Cc: David S. Miller <davem@davemloft.net> Signed-off-by: Cong Wang <amwang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 3 changed files with 26 additions and 8 deletions Inline Diff
include/linux/netdevice.h
1 | /* | 1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
3 | * operating system. INET is implemented using the BSD Socket | 3 | * operating system. INET is implemented using the BSD Socket |
4 | * interface as the means of communication with the user level. | 4 | * interface as the means of communication with the user level. |
5 | * | 5 | * |
6 | * Definitions for the Interfaces handler. | 6 | * Definitions for the Interfaces handler. |
7 | * | 7 | * |
8 | * Version: @(#)dev.h 1.0.10 08/12/93 | 8 | * Version: @(#)dev.h 1.0.10 08/12/93 |
9 | * | 9 | * |
10 | * Authors: Ross Biro | 10 | * Authors: Ross Biro |
11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
12 | * Corey Minyard <wf-rch!minyard@relay.EU.net> | 12 | * Corey Minyard <wf-rch!minyard@relay.EU.net> |
13 | * Donald J. Becker, <becker@cesdis.gsfc.nasa.gov> | 13 | * Donald J. Becker, <becker@cesdis.gsfc.nasa.gov> |
14 | * Alan Cox, <alan@lxorguk.ukuu.org.uk> | 14 | * Alan Cox, <alan@lxorguk.ukuu.org.uk> |
15 | * Bjorn Ekwall. <bj0rn@blox.se> | 15 | * Bjorn Ekwall. <bj0rn@blox.se> |
16 | * Pekka Riikonen <priikone@poseidon.pspt.fi> | 16 | * Pekka Riikonen <priikone@poseidon.pspt.fi> |
17 | * | 17 | * |
18 | * This program is free software; you can redistribute it and/or | 18 | * This program is free software; you can redistribute it and/or |
19 | * modify it under the terms of the GNU General Public License | 19 | * modify it under the terms of the GNU General Public License |
20 | * as published by the Free Software Foundation; either version | 20 | * as published by the Free Software Foundation; either version |
21 | * 2 of the License, or (at your option) any later version. | 21 | * 2 of the License, or (at your option) any later version. |
22 | * | 22 | * |
23 | * Moved to /usr/include/linux for NET3 | 23 | * Moved to /usr/include/linux for NET3 |
24 | */ | 24 | */ |
25 | #ifndef _LINUX_NETDEVICE_H | 25 | #ifndef _LINUX_NETDEVICE_H |
26 | #define _LINUX_NETDEVICE_H | 26 | #define _LINUX_NETDEVICE_H |
27 | 27 | ||
28 | #include <linux/pm_qos.h> | 28 | #include <linux/pm_qos.h> |
29 | #include <linux/timer.h> | 29 | #include <linux/timer.h> |
30 | #include <linux/bug.h> | 30 | #include <linux/bug.h> |
31 | #include <linux/delay.h> | 31 | #include <linux/delay.h> |
32 | #include <linux/atomic.h> | 32 | #include <linux/atomic.h> |
33 | #include <asm/cache.h> | 33 | #include <asm/cache.h> |
34 | #include <asm/byteorder.h> | 34 | #include <asm/byteorder.h> |
35 | 35 | ||
36 | #include <linux/percpu.h> | 36 | #include <linux/percpu.h> |
37 | #include <linux/rculist.h> | 37 | #include <linux/rculist.h> |
38 | #include <linux/dmaengine.h> | 38 | #include <linux/dmaengine.h> |
39 | #include <linux/workqueue.h> | 39 | #include <linux/workqueue.h> |
40 | #include <linux/dynamic_queue_limits.h> | 40 | #include <linux/dynamic_queue_limits.h> |
41 | 41 | ||
42 | #include <linux/ethtool.h> | 42 | #include <linux/ethtool.h> |
43 | #include <net/net_namespace.h> | 43 | #include <net/net_namespace.h> |
44 | #include <net/dsa.h> | 44 | #include <net/dsa.h> |
45 | #ifdef CONFIG_DCB | 45 | #ifdef CONFIG_DCB |
46 | #include <net/dcbnl.h> | 46 | #include <net/dcbnl.h> |
47 | #endif | 47 | #endif |
48 | #include <net/netprio_cgroup.h> | 48 | #include <net/netprio_cgroup.h> |
49 | 49 | ||
50 | #include <linux/netdev_features.h> | 50 | #include <linux/netdev_features.h> |
51 | #include <linux/neighbour.h> | 51 | #include <linux/neighbour.h> |
52 | #include <uapi/linux/netdevice.h> | 52 | #include <uapi/linux/netdevice.h> |
53 | 53 | ||
54 | struct netpoll_info; | 54 | struct netpoll_info; |
55 | struct device; | 55 | struct device; |
56 | struct phy_device; | 56 | struct phy_device; |
57 | /* 802.11 specific */ | 57 | /* 802.11 specific */ |
58 | struct wireless_dev; | 58 | struct wireless_dev; |
59 | /* source back-compat hooks */ | 59 | /* source back-compat hooks */ |
60 | #define SET_ETHTOOL_OPS(netdev,ops) \ | 60 | #define SET_ETHTOOL_OPS(netdev,ops) \ |
61 | ( (netdev)->ethtool_ops = (ops) ) | 61 | ( (netdev)->ethtool_ops = (ops) ) |
62 | 62 | ||
63 | extern void netdev_set_default_ethtool_ops(struct net_device *dev, | 63 | extern void netdev_set_default_ethtool_ops(struct net_device *dev, |
64 | const struct ethtool_ops *ops); | 64 | const struct ethtool_ops *ops); |
65 | 65 | ||
66 | /* hardware address assignment types */ | 66 | /* hardware address assignment types */ |
67 | #define NET_ADDR_PERM 0 /* address is permanent (default) */ | 67 | #define NET_ADDR_PERM 0 /* address is permanent (default) */ |
68 | #define NET_ADDR_RANDOM 1 /* address is generated randomly */ | 68 | #define NET_ADDR_RANDOM 1 /* address is generated randomly */ |
69 | #define NET_ADDR_STOLEN 2 /* address is stolen from other device */ | 69 | #define NET_ADDR_STOLEN 2 /* address is stolen from other device */ |
70 | #define NET_ADDR_SET 3 /* address is set using | 70 | #define NET_ADDR_SET 3 /* address is set using |
71 | * dev_set_mac_address() */ | 71 | * dev_set_mac_address() */ |
72 | 72 | ||
73 | /* Backlog congestion levels */ | 73 | /* Backlog congestion levels */ |
74 | #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ | 74 | #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ |
75 | #define NET_RX_DROP 1 /* packet dropped */ | 75 | #define NET_RX_DROP 1 /* packet dropped */ |
76 | 76 | ||
77 | /* | 77 | /* |
78 | * Transmit return codes: transmit return codes originate from three different | 78 | * Transmit return codes: transmit return codes originate from three different |
79 | * namespaces: | 79 | * namespaces: |
80 | * | 80 | * |
81 | * - qdisc return codes | 81 | * - qdisc return codes |
82 | * - driver transmit return codes | 82 | * - driver transmit return codes |
83 | * - errno values | 83 | * - errno values |
84 | * | 84 | * |
85 | * Drivers are allowed to return any one of those in their hard_start_xmit() | 85 | * Drivers are allowed to return any one of those in their hard_start_xmit() |
86 | * function. Real network devices commonly used with qdiscs should only return | 86 | * function. Real network devices commonly used with qdiscs should only return |
87 | * the driver transmit return codes though - when qdiscs are used, the actual | 87 | * the driver transmit return codes though - when qdiscs are used, the actual |
88 | * transmission happens asynchronously, so the value is not propagated to | 88 | * transmission happens asynchronously, so the value is not propagated to |
89 | * higher layers. Virtual network devices transmit synchronously, in this case | 89 | * higher layers. Virtual network devices transmit synchronously, in this case |
90 | * the driver transmit return codes are consumed by dev_queue_xmit(), all | 90 | * the driver transmit return codes are consumed by dev_queue_xmit(), all |
91 | * others are propagated to higher layers. | 91 | * others are propagated to higher layers. |
92 | */ | 92 | */ |
93 | 93 | ||
94 | /* qdisc ->enqueue() return codes. */ | 94 | /* qdisc ->enqueue() return codes. */ |
95 | #define NET_XMIT_SUCCESS 0x00 | 95 | #define NET_XMIT_SUCCESS 0x00 |
96 | #define NET_XMIT_DROP 0x01 /* skb dropped */ | 96 | #define NET_XMIT_DROP 0x01 /* skb dropped */ |
97 | #define NET_XMIT_CN 0x02 /* congestion notification */ | 97 | #define NET_XMIT_CN 0x02 /* congestion notification */ |
98 | #define NET_XMIT_POLICED 0x03 /* skb is shot by police */ | 98 | #define NET_XMIT_POLICED 0x03 /* skb is shot by police */ |
99 | #define NET_XMIT_MASK 0x0f /* qdisc flags in net/sch_generic.h */ | 99 | #define NET_XMIT_MASK 0x0f /* qdisc flags in net/sch_generic.h */ |
100 | 100 | ||
101 | /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It | 101 | /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It |
102 | * indicates that the device will soon be dropping packets, or already drops | 102 | * indicates that the device will soon be dropping packets, or already drops |
103 | * some packets of the same priority; prompting us to send less aggressively. */ | 103 | * some packets of the same priority; prompting us to send less aggressively. */ |
104 | #define net_xmit_eval(e) ((e) == NET_XMIT_CN ? 0 : (e)) | 104 | #define net_xmit_eval(e) ((e) == NET_XMIT_CN ? 0 : (e)) |
105 | #define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0) | 105 | #define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0) |
106 | 106 | ||
107 | /* Driver transmit return codes */ | 107 | /* Driver transmit return codes */ |
108 | #define NETDEV_TX_MASK 0xf0 | 108 | #define NETDEV_TX_MASK 0xf0 |
109 | 109 | ||
110 | enum netdev_tx { | 110 | enum netdev_tx { |
111 | __NETDEV_TX_MIN = INT_MIN, /* make sure enum is signed */ | 111 | __NETDEV_TX_MIN = INT_MIN, /* make sure enum is signed */ |
112 | NETDEV_TX_OK = 0x00, /* driver took care of packet */ | 112 | NETDEV_TX_OK = 0x00, /* driver took care of packet */ |
113 | NETDEV_TX_BUSY = 0x10, /* driver tx path was busy*/ | 113 | NETDEV_TX_BUSY = 0x10, /* driver tx path was busy*/ |
114 | NETDEV_TX_LOCKED = 0x20, /* driver tx lock was already taken */ | 114 | NETDEV_TX_LOCKED = 0x20, /* driver tx lock was already taken */ |
115 | }; | 115 | }; |
116 | typedef enum netdev_tx netdev_tx_t; | 116 | typedef enum netdev_tx netdev_tx_t; |
117 | 117 | ||
118 | /* | 118 | /* |
119 | * Current order: NETDEV_TX_MASK > NET_XMIT_MASK >= 0 is significant; | 119 | * Current order: NETDEV_TX_MASK > NET_XMIT_MASK >= 0 is significant; |
120 | * hard_start_xmit() return < NET_XMIT_MASK means skb was consumed. | 120 | * hard_start_xmit() return < NET_XMIT_MASK means skb was consumed. |
121 | */ | 121 | */ |
122 | static inline bool dev_xmit_complete(int rc) | 122 | static inline bool dev_xmit_complete(int rc) |
123 | { | 123 | { |
124 | /* | 124 | /* |
125 | * Positive cases with an skb consumed by a driver: | 125 | * Positive cases with an skb consumed by a driver: |
126 | * - successful transmission (rc == NETDEV_TX_OK) | 126 | * - successful transmission (rc == NETDEV_TX_OK) |
127 | * - error while transmitting (rc < 0) | 127 | * - error while transmitting (rc < 0) |
128 | * - error while queueing to a different device (rc & NET_XMIT_MASK) | 128 | * - error while queueing to a different device (rc & NET_XMIT_MASK) |
129 | */ | 129 | */ |
130 | if (likely(rc < NET_XMIT_MASK)) | 130 | if (likely(rc < NET_XMIT_MASK)) |
131 | return true; | 131 | return true; |
132 | 132 | ||
133 | return false; | 133 | return false; |
134 | } | 134 | } |
135 | 135 | ||
136 | /* | 136 | /* |
137 | * Compute the worst case header length according to the protocols | 137 | * Compute the worst case header length according to the protocols |
138 | * used. | 138 | * used. |
139 | */ | 139 | */ |
140 | 140 | ||
141 | #if defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25) | 141 | #if defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25) |
142 | # if defined(CONFIG_MAC80211_MESH) | 142 | # if defined(CONFIG_MAC80211_MESH) |
143 | # define LL_MAX_HEADER 128 | 143 | # define LL_MAX_HEADER 128 |
144 | # else | 144 | # else |
145 | # define LL_MAX_HEADER 96 | 145 | # define LL_MAX_HEADER 96 |
146 | # endif | 146 | # endif |
147 | #elif IS_ENABLED(CONFIG_TR) | 147 | #elif IS_ENABLED(CONFIG_TR) |
148 | # define LL_MAX_HEADER 48 | 148 | # define LL_MAX_HEADER 48 |
149 | #else | 149 | #else |
150 | # define LL_MAX_HEADER 32 | 150 | # define LL_MAX_HEADER 32 |
151 | #endif | 151 | #endif |
152 | 152 | ||
153 | #if !IS_ENABLED(CONFIG_NET_IPIP) && !IS_ENABLED(CONFIG_NET_IPGRE) && \ | 153 | #if !IS_ENABLED(CONFIG_NET_IPIP) && !IS_ENABLED(CONFIG_NET_IPGRE) && \ |
154 | !IS_ENABLED(CONFIG_IPV6_SIT) && !IS_ENABLED(CONFIG_IPV6_TUNNEL) | 154 | !IS_ENABLED(CONFIG_IPV6_SIT) && !IS_ENABLED(CONFIG_IPV6_TUNNEL) |
155 | #define MAX_HEADER LL_MAX_HEADER | 155 | #define MAX_HEADER LL_MAX_HEADER |
156 | #else | 156 | #else |
157 | #define MAX_HEADER (LL_MAX_HEADER + 48) | 157 | #define MAX_HEADER (LL_MAX_HEADER + 48) |
158 | #endif | 158 | #endif |
159 | 159 | ||
160 | /* | 160 | /* |
161 | * Old network device statistics. Fields are native words | 161 | * Old network device statistics. Fields are native words |
162 | * (unsigned long) so they can be read and written atomically. | 162 | * (unsigned long) so they can be read and written atomically. |
163 | */ | 163 | */ |
164 | 164 | ||
165 | struct net_device_stats { | 165 | struct net_device_stats { |
166 | unsigned long rx_packets; | 166 | unsigned long rx_packets; |
167 | unsigned long tx_packets; | 167 | unsigned long tx_packets; |
168 | unsigned long rx_bytes; | 168 | unsigned long rx_bytes; |
169 | unsigned long tx_bytes; | 169 | unsigned long tx_bytes; |
170 | unsigned long rx_errors; | 170 | unsigned long rx_errors; |
171 | unsigned long tx_errors; | 171 | unsigned long tx_errors; |
172 | unsigned long rx_dropped; | 172 | unsigned long rx_dropped; |
173 | unsigned long tx_dropped; | 173 | unsigned long tx_dropped; |
174 | unsigned long multicast; | 174 | unsigned long multicast; |
175 | unsigned long collisions; | 175 | unsigned long collisions; |
176 | unsigned long rx_length_errors; | 176 | unsigned long rx_length_errors; |
177 | unsigned long rx_over_errors; | 177 | unsigned long rx_over_errors; |
178 | unsigned long rx_crc_errors; | 178 | unsigned long rx_crc_errors; |
179 | unsigned long rx_frame_errors; | 179 | unsigned long rx_frame_errors; |
180 | unsigned long rx_fifo_errors; | 180 | unsigned long rx_fifo_errors; |
181 | unsigned long rx_missed_errors; | 181 | unsigned long rx_missed_errors; |
182 | unsigned long tx_aborted_errors; | 182 | unsigned long tx_aborted_errors; |
183 | unsigned long tx_carrier_errors; | 183 | unsigned long tx_carrier_errors; |
184 | unsigned long tx_fifo_errors; | 184 | unsigned long tx_fifo_errors; |
185 | unsigned long tx_heartbeat_errors; | 185 | unsigned long tx_heartbeat_errors; |
186 | unsigned long tx_window_errors; | 186 | unsigned long tx_window_errors; |
187 | unsigned long rx_compressed; | 187 | unsigned long rx_compressed; |
188 | unsigned long tx_compressed; | 188 | unsigned long tx_compressed; |
189 | }; | 189 | }; |
190 | 190 | ||
191 | 191 | ||
192 | #include <linux/cache.h> | 192 | #include <linux/cache.h> |
193 | #include <linux/skbuff.h> | 193 | #include <linux/skbuff.h> |
194 | 194 | ||
195 | #ifdef CONFIG_RPS | 195 | #ifdef CONFIG_RPS |
196 | #include <linux/static_key.h> | 196 | #include <linux/static_key.h> |
197 | extern struct static_key rps_needed; | 197 | extern struct static_key rps_needed; |
198 | #endif | 198 | #endif |
199 | 199 | ||
200 | struct neighbour; | 200 | struct neighbour; |
201 | struct neigh_parms; | 201 | struct neigh_parms; |
202 | struct sk_buff; | 202 | struct sk_buff; |
203 | 203 | ||
204 | struct netdev_hw_addr { | 204 | struct netdev_hw_addr { |
205 | struct list_head list; | 205 | struct list_head list; |
206 | unsigned char addr[MAX_ADDR_LEN]; | 206 | unsigned char addr[MAX_ADDR_LEN]; |
207 | unsigned char type; | 207 | unsigned char type; |
208 | #define NETDEV_HW_ADDR_T_LAN 1 | 208 | #define NETDEV_HW_ADDR_T_LAN 1 |
209 | #define NETDEV_HW_ADDR_T_SAN 2 | 209 | #define NETDEV_HW_ADDR_T_SAN 2 |
210 | #define NETDEV_HW_ADDR_T_SLAVE 3 | 210 | #define NETDEV_HW_ADDR_T_SLAVE 3 |
211 | #define NETDEV_HW_ADDR_T_UNICAST 4 | 211 | #define NETDEV_HW_ADDR_T_UNICAST 4 |
212 | #define NETDEV_HW_ADDR_T_MULTICAST 5 | 212 | #define NETDEV_HW_ADDR_T_MULTICAST 5 |
213 | bool synced; | 213 | bool synced; |
214 | bool global_use; | 214 | bool global_use; |
215 | int refcount; | 215 | int refcount; |
216 | struct rcu_head rcu_head; | 216 | struct rcu_head rcu_head; |
217 | }; | 217 | }; |
218 | 218 | ||
219 | struct netdev_hw_addr_list { | 219 | struct netdev_hw_addr_list { |
220 | struct list_head list; | 220 | struct list_head list; |
221 | int count; | 221 | int count; |
222 | }; | 222 | }; |
223 | 223 | ||
224 | #define netdev_hw_addr_list_count(l) ((l)->count) | 224 | #define netdev_hw_addr_list_count(l) ((l)->count) |
225 | #define netdev_hw_addr_list_empty(l) (netdev_hw_addr_list_count(l) == 0) | 225 | #define netdev_hw_addr_list_empty(l) (netdev_hw_addr_list_count(l) == 0) |
226 | #define netdev_hw_addr_list_for_each(ha, l) \ | 226 | #define netdev_hw_addr_list_for_each(ha, l) \ |
227 | list_for_each_entry(ha, &(l)->list, list) | 227 | list_for_each_entry(ha, &(l)->list, list) |
228 | 228 | ||
229 | #define netdev_uc_count(dev) netdev_hw_addr_list_count(&(dev)->uc) | 229 | #define netdev_uc_count(dev) netdev_hw_addr_list_count(&(dev)->uc) |
230 | #define netdev_uc_empty(dev) netdev_hw_addr_list_empty(&(dev)->uc) | 230 | #define netdev_uc_empty(dev) netdev_hw_addr_list_empty(&(dev)->uc) |
231 | #define netdev_for_each_uc_addr(ha, dev) \ | 231 | #define netdev_for_each_uc_addr(ha, dev) \ |
232 | netdev_hw_addr_list_for_each(ha, &(dev)->uc) | 232 | netdev_hw_addr_list_for_each(ha, &(dev)->uc) |
233 | 233 | ||
234 | #define netdev_mc_count(dev) netdev_hw_addr_list_count(&(dev)->mc) | 234 | #define netdev_mc_count(dev) netdev_hw_addr_list_count(&(dev)->mc) |
235 | #define netdev_mc_empty(dev) netdev_hw_addr_list_empty(&(dev)->mc) | 235 | #define netdev_mc_empty(dev) netdev_hw_addr_list_empty(&(dev)->mc) |
236 | #define netdev_for_each_mc_addr(ha, dev) \ | 236 | #define netdev_for_each_mc_addr(ha, dev) \ |
237 | netdev_hw_addr_list_for_each(ha, &(dev)->mc) | 237 | netdev_hw_addr_list_for_each(ha, &(dev)->mc) |
238 | 238 | ||
239 | struct hh_cache { | 239 | struct hh_cache { |
240 | u16 hh_len; | 240 | u16 hh_len; |
241 | u16 __pad; | 241 | u16 __pad; |
242 | seqlock_t hh_lock; | 242 | seqlock_t hh_lock; |
243 | 243 | ||
244 | /* cached hardware header; allow for machine alignment needs. */ | 244 | /* cached hardware header; allow for machine alignment needs. */ |
245 | #define HH_DATA_MOD 16 | 245 | #define HH_DATA_MOD 16 |
246 | #define HH_DATA_OFF(__len) \ | 246 | #define HH_DATA_OFF(__len) \ |
247 | (HH_DATA_MOD - (((__len - 1) & (HH_DATA_MOD - 1)) + 1)) | 247 | (HH_DATA_MOD - (((__len - 1) & (HH_DATA_MOD - 1)) + 1)) |
248 | #define HH_DATA_ALIGN(__len) \ | 248 | #define HH_DATA_ALIGN(__len) \ |
249 | (((__len)+(HH_DATA_MOD-1))&~(HH_DATA_MOD - 1)) | 249 | (((__len)+(HH_DATA_MOD-1))&~(HH_DATA_MOD - 1)) |
250 | unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)]; | 250 | unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)]; |
251 | }; | 251 | }; |
252 | 252 | ||
253 | /* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much. | 253 | /* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much. |
254 | * Alternative is: | 254 | * Alternative is: |
255 | * dev->hard_header_len ? (dev->hard_header_len + | 255 | * dev->hard_header_len ? (dev->hard_header_len + |
256 | * (HH_DATA_MOD - 1)) & ~(HH_DATA_MOD - 1) : 0 | 256 | * (HH_DATA_MOD - 1)) & ~(HH_DATA_MOD - 1) : 0 |
257 | * | 257 | * |
258 | * We could use other alignment values, but we must maintain the | 258 | * We could use other alignment values, but we must maintain the |
259 | * relationship HH alignment <= LL alignment. | 259 | * relationship HH alignment <= LL alignment. |
260 | */ | 260 | */ |
261 | #define LL_RESERVED_SPACE(dev) \ | 261 | #define LL_RESERVED_SPACE(dev) \ |
262 | ((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) | 262 | ((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) |
263 | #define LL_RESERVED_SPACE_EXTRA(dev,extra) \ | 263 | #define LL_RESERVED_SPACE_EXTRA(dev,extra) \ |
264 | ((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) | 264 | ((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) |
265 | 265 | ||
266 | struct header_ops { | 266 | struct header_ops { |
267 | int (*create) (struct sk_buff *skb, struct net_device *dev, | 267 | int (*create) (struct sk_buff *skb, struct net_device *dev, |
268 | unsigned short type, const void *daddr, | 268 | unsigned short type, const void *daddr, |
269 | const void *saddr, unsigned int len); | 269 | const void *saddr, unsigned int len); |
270 | int (*parse)(const struct sk_buff *skb, unsigned char *haddr); | 270 | int (*parse)(const struct sk_buff *skb, unsigned char *haddr); |
271 | int (*rebuild)(struct sk_buff *skb); | 271 | int (*rebuild)(struct sk_buff *skb); |
272 | int (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); | 272 | int (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); |
273 | void (*cache_update)(struct hh_cache *hh, | 273 | void (*cache_update)(struct hh_cache *hh, |
274 | const struct net_device *dev, | 274 | const struct net_device *dev, |
275 | const unsigned char *haddr); | 275 | const unsigned char *haddr); |
276 | }; | 276 | }; |
277 | 277 | ||
278 | /* These flag bits are private to the generic network queueing | 278 | /* These flag bits are private to the generic network queueing |
279 | * layer, they may not be explicitly referenced by any other | 279 | * layer, they may not be explicitly referenced by any other |
280 | * code. | 280 | * code. |
281 | */ | 281 | */ |
282 | 282 | ||
283 | enum netdev_state_t { | 283 | enum netdev_state_t { |
284 | __LINK_STATE_START, | 284 | __LINK_STATE_START, |
285 | __LINK_STATE_PRESENT, | 285 | __LINK_STATE_PRESENT, |
286 | __LINK_STATE_NOCARRIER, | 286 | __LINK_STATE_NOCARRIER, |
287 | __LINK_STATE_LINKWATCH_PENDING, | 287 | __LINK_STATE_LINKWATCH_PENDING, |
288 | __LINK_STATE_DORMANT, | 288 | __LINK_STATE_DORMANT, |
289 | }; | 289 | }; |
290 | 290 | ||
291 | 291 | ||
292 | /* | 292 | /* |
293 | * This structure holds at boot time configured netdevice settings. They | 293 | * This structure holds at boot time configured netdevice settings. They |
294 | * are then used in the device probing. | 294 | * are then used in the device probing. |
295 | */ | 295 | */ |
296 | struct netdev_boot_setup { | 296 | struct netdev_boot_setup { |
297 | char name[IFNAMSIZ]; | 297 | char name[IFNAMSIZ]; |
298 | struct ifmap map; | 298 | struct ifmap map; |
299 | }; | 299 | }; |
300 | #define NETDEV_BOOT_SETUP_MAX 8 | 300 | #define NETDEV_BOOT_SETUP_MAX 8 |
301 | 301 | ||
302 | extern int __init netdev_boot_setup(char *str); | 302 | extern int __init netdev_boot_setup(char *str); |
303 | 303 | ||
304 | /* | 304 | /* |
305 | * Structure for NAPI scheduling similar to tasklet but with weighting | 305 | * Structure for NAPI scheduling similar to tasklet but with weighting |
306 | */ | 306 | */ |
307 | struct napi_struct { | 307 | struct napi_struct { |
308 | /* The poll_list must only be managed by the entity which | 308 | /* The poll_list must only be managed by the entity which |
309 | * changes the state of the NAPI_STATE_SCHED bit. This means | 309 | * changes the state of the NAPI_STATE_SCHED bit. This means |
310 | * whoever atomically sets that bit can add this napi_struct | 310 | * whoever atomically sets that bit can add this napi_struct |
311 | * to the per-cpu poll_list, and whoever clears that bit | 311 | * to the per-cpu poll_list, and whoever clears that bit |
312 | * can remove from the list right before clearing the bit. | 312 | * can remove from the list right before clearing the bit. |
313 | */ | 313 | */ |
314 | struct list_head poll_list; | 314 | struct list_head poll_list; |
315 | 315 | ||
316 | unsigned long state; | 316 | unsigned long state; |
317 | int weight; | 317 | int weight; |
318 | unsigned int gro_count; | 318 | unsigned int gro_count; |
319 | int (*poll)(struct napi_struct *, int); | 319 | int (*poll)(struct napi_struct *, int); |
320 | #ifdef CONFIG_NETPOLL | 320 | #ifdef CONFIG_NETPOLL |
321 | spinlock_t poll_lock; | 321 | spinlock_t poll_lock; |
322 | int poll_owner; | 322 | int poll_owner; |
323 | #endif | 323 | #endif |
324 | struct net_device *dev; | 324 | struct net_device *dev; |
325 | struct sk_buff *gro_list; | 325 | struct sk_buff *gro_list; |
326 | struct sk_buff *skb; | 326 | struct sk_buff *skb; |
327 | struct list_head dev_list; | 327 | struct list_head dev_list; |
328 | }; | 328 | }; |
329 | 329 | ||
330 | enum { | 330 | enum { |
331 | NAPI_STATE_SCHED, /* Poll is scheduled */ | 331 | NAPI_STATE_SCHED, /* Poll is scheduled */ |
332 | NAPI_STATE_DISABLE, /* Disable pending */ | 332 | NAPI_STATE_DISABLE, /* Disable pending */ |
333 | NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ | 333 | NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ |
334 | }; | 334 | }; |
335 | 335 | ||
336 | enum gro_result { | 336 | enum gro_result { |
337 | GRO_MERGED, | 337 | GRO_MERGED, |
338 | GRO_MERGED_FREE, | 338 | GRO_MERGED_FREE, |
339 | GRO_HELD, | 339 | GRO_HELD, |
340 | GRO_NORMAL, | 340 | GRO_NORMAL, |
341 | GRO_DROP, | 341 | GRO_DROP, |
342 | }; | 342 | }; |
343 | typedef enum gro_result gro_result_t; | 343 | typedef enum gro_result gro_result_t; |
344 | 344 | ||
345 | /* | 345 | /* |
346 | * enum rx_handler_result - Possible return values for rx_handlers. | 346 | * enum rx_handler_result - Possible return values for rx_handlers. |
347 | * @RX_HANDLER_CONSUMED: skb was consumed by rx_handler, do not process it | 347 | * @RX_HANDLER_CONSUMED: skb was consumed by rx_handler, do not process it |
348 | * further. | 348 | * further. |
349 | * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in | 349 | * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in |
350 | * case skb->dev was changed by rx_handler. | 350 | * case skb->dev was changed by rx_handler. |
351 | * @RX_HANDLER_EXACT: Force exact delivery, no wildcard. | 351 | * @RX_HANDLER_EXACT: Force exact delivery, no wildcard. |
352 | * @RX_HANDLER_PASS: Do nothing, passe the skb as if no rx_handler was called. | 352 | * @RX_HANDLER_PASS: Do nothing, passe the skb as if no rx_handler was called. |
353 | * | 353 | * |
354 | * rx_handlers are functions called from inside __netif_receive_skb(), to do | 354 | * rx_handlers are functions called from inside __netif_receive_skb(), to do |
355 | * special processing of the skb, prior to delivery to protocol handlers. | 355 | * special processing of the skb, prior to delivery to protocol handlers. |
356 | * | 356 | * |
357 | * Currently, a net_device can only have a single rx_handler registered. Trying | 357 | * Currently, a net_device can only have a single rx_handler registered. Trying |
358 | * to register a second rx_handler will return -EBUSY. | 358 | * to register a second rx_handler will return -EBUSY. |
359 | * | 359 | * |
360 | * To register a rx_handler on a net_device, use netdev_rx_handler_register(). | 360 | * To register a rx_handler on a net_device, use netdev_rx_handler_register(). |
361 | * To unregister a rx_handler on a net_device, use | 361 | * To unregister a rx_handler on a net_device, use |
362 | * netdev_rx_handler_unregister(). | 362 | * netdev_rx_handler_unregister(). |
363 | * | 363 | * |
364 | * Upon return, rx_handler is expected to tell __netif_receive_skb() what to | 364 | * Upon return, rx_handler is expected to tell __netif_receive_skb() what to |
365 | * do with the skb. | 365 | * do with the skb. |
366 | * | 366 | * |
367 | * If the rx_handler consumed to skb in some way, it should return | 367 | * If the rx_handler consumed to skb in some way, it should return |
368 | * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for | 368 | * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for |
369 | * the skb to be delivered in some other ways. | 369 | * the skb to be delivered in some other ways. |
370 | * | 370 | * |
371 | * If the rx_handler changed skb->dev, to divert the skb to another | 371 | * If the rx_handler changed skb->dev, to divert the skb to another |
372 | * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the | 372 | * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the |
373 | * new device will be called if it exists. | 373 | * new device will be called if it exists. |
374 | * | 374 | * |
375 | * If the rx_handler consider the skb should be ignored, it should return | 375 | * If the rx_handler consider the skb should be ignored, it should return |
376 | * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that | 376 | * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that |
377 | * are registered on exact device (ptype->dev == skb->dev). | 377 | * are registered on exact device (ptype->dev == skb->dev). |
378 | * | 378 | * |
379 | * If the rx_handler didn't changed skb->dev, but want the skb to be normally | 379 | * If the rx_handler didn't changed skb->dev, but want the skb to be normally |
380 | * delivered, it should return RX_HANDLER_PASS. | 380 | * delivered, it should return RX_HANDLER_PASS. |
381 | * | 381 | * |
382 | * A device without a registered rx_handler will behave as if rx_handler | 382 | * A device without a registered rx_handler will behave as if rx_handler |
383 | * returned RX_HANDLER_PASS. | 383 | * returned RX_HANDLER_PASS. |
384 | */ | 384 | */ |
385 | 385 | ||
386 | enum rx_handler_result { | 386 | enum rx_handler_result { |
387 | RX_HANDLER_CONSUMED, | 387 | RX_HANDLER_CONSUMED, |
388 | RX_HANDLER_ANOTHER, | 388 | RX_HANDLER_ANOTHER, |
389 | RX_HANDLER_EXACT, | 389 | RX_HANDLER_EXACT, |
390 | RX_HANDLER_PASS, | 390 | RX_HANDLER_PASS, |
391 | }; | 391 | }; |
392 | typedef enum rx_handler_result rx_handler_result_t; | 392 | typedef enum rx_handler_result rx_handler_result_t; |
393 | typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); | 393 | typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); |
394 | 394 | ||
395 | extern void __napi_schedule(struct napi_struct *n); | 395 | extern void __napi_schedule(struct napi_struct *n); |
396 | 396 | ||
397 | static inline bool napi_disable_pending(struct napi_struct *n) | 397 | static inline bool napi_disable_pending(struct napi_struct *n) |
398 | { | 398 | { |
399 | return test_bit(NAPI_STATE_DISABLE, &n->state); | 399 | return test_bit(NAPI_STATE_DISABLE, &n->state); |
400 | } | 400 | } |
401 | 401 | ||
402 | /** | 402 | /** |
403 | * napi_schedule_prep - check if napi can be scheduled | 403 | * napi_schedule_prep - check if napi can be scheduled |
404 | * @n: napi context | 404 | * @n: napi context |
405 | * | 405 | * |
406 | * Test if NAPI routine is already running, and if not mark | 406 | * Test if NAPI routine is already running, and if not mark |
407 | * it as running. This is used as a condition variable | 407 | * it as running. This is used as a condition variable |
408 | * insure only one NAPI poll instance runs. We also make | 408 | * insure only one NAPI poll instance runs. We also make |
409 | * sure there is no pending NAPI disable. | 409 | * sure there is no pending NAPI disable. |
410 | */ | 410 | */ |
411 | static inline bool napi_schedule_prep(struct napi_struct *n) | 411 | static inline bool napi_schedule_prep(struct napi_struct *n) |
412 | { | 412 | { |
413 | return !napi_disable_pending(n) && | 413 | return !napi_disable_pending(n) && |
414 | !test_and_set_bit(NAPI_STATE_SCHED, &n->state); | 414 | !test_and_set_bit(NAPI_STATE_SCHED, &n->state); |
415 | } | 415 | } |
416 | 416 | ||
417 | /** | 417 | /** |
418 | * napi_schedule - schedule NAPI poll | 418 | * napi_schedule - schedule NAPI poll |
419 | * @n: napi context | 419 | * @n: napi context |
420 | * | 420 | * |
421 | * Schedule NAPI poll routine to be called if it is not already | 421 | * Schedule NAPI poll routine to be called if it is not already |
422 | * running. | 422 | * running. |
423 | */ | 423 | */ |
424 | static inline void napi_schedule(struct napi_struct *n) | 424 | static inline void napi_schedule(struct napi_struct *n) |
425 | { | 425 | { |
426 | if (napi_schedule_prep(n)) | 426 | if (napi_schedule_prep(n)) |
427 | __napi_schedule(n); | 427 | __napi_schedule(n); |
428 | } | 428 | } |
429 | 429 | ||
430 | /* Try to reschedule poll. Called by dev->poll() after napi_complete(). */ | 430 | /* Try to reschedule poll. Called by dev->poll() after napi_complete(). */ |
431 | static inline bool napi_reschedule(struct napi_struct *napi) | 431 | static inline bool napi_reschedule(struct napi_struct *napi) |
432 | { | 432 | { |
433 | if (napi_schedule_prep(napi)) { | 433 | if (napi_schedule_prep(napi)) { |
434 | __napi_schedule(napi); | 434 | __napi_schedule(napi); |
435 | return true; | 435 | return true; |
436 | } | 436 | } |
437 | return false; | 437 | return false; |
438 | } | 438 | } |
439 | 439 | ||
440 | /** | 440 | /** |
441 | * napi_complete - NAPI processing complete | 441 | * napi_complete - NAPI processing complete |
442 | * @n: napi context | 442 | * @n: napi context |
443 | * | 443 | * |
444 | * Mark NAPI processing as complete. | 444 | * Mark NAPI processing as complete. |
445 | */ | 445 | */ |
446 | extern void __napi_complete(struct napi_struct *n); | 446 | extern void __napi_complete(struct napi_struct *n); |
447 | extern void napi_complete(struct napi_struct *n); | 447 | extern void napi_complete(struct napi_struct *n); |
448 | 448 | ||
449 | /** | 449 | /** |
450 | * napi_disable - prevent NAPI from scheduling | 450 | * napi_disable - prevent NAPI from scheduling |
451 | * @n: napi context | 451 | * @n: napi context |
452 | * | 452 | * |
453 | * Stop NAPI from being scheduled on this context. | 453 | * Stop NAPI from being scheduled on this context. |
454 | * Waits till any outstanding processing completes. | 454 | * Waits till any outstanding processing completes. |
455 | */ | 455 | */ |
456 | static inline void napi_disable(struct napi_struct *n) | 456 | static inline void napi_disable(struct napi_struct *n) |
457 | { | 457 | { |
458 | set_bit(NAPI_STATE_DISABLE, &n->state); | 458 | set_bit(NAPI_STATE_DISABLE, &n->state); |
459 | while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) | 459 | while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) |
460 | msleep(1); | 460 | msleep(1); |
461 | clear_bit(NAPI_STATE_DISABLE, &n->state); | 461 | clear_bit(NAPI_STATE_DISABLE, &n->state); |
462 | } | 462 | } |
463 | 463 | ||
464 | /** | 464 | /** |
465 | * napi_enable - enable NAPI scheduling | 465 | * napi_enable - enable NAPI scheduling |
466 | * @n: napi context | 466 | * @n: napi context |
467 | * | 467 | * |
468 | * Resume NAPI from being scheduled on this context. | 468 | * Resume NAPI from being scheduled on this context. |
469 | * Must be paired with napi_disable. | 469 | * Must be paired with napi_disable. |
470 | */ | 470 | */ |
471 | static inline void napi_enable(struct napi_struct *n) | 471 | static inline void napi_enable(struct napi_struct *n) |
472 | { | 472 | { |
473 | BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); | 473 | BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); |
474 | smp_mb__before_clear_bit(); | 474 | smp_mb__before_clear_bit(); |
475 | clear_bit(NAPI_STATE_SCHED, &n->state); | 475 | clear_bit(NAPI_STATE_SCHED, &n->state); |
476 | } | 476 | } |
477 | 477 | ||
478 | #ifdef CONFIG_SMP | 478 | #ifdef CONFIG_SMP |
479 | /** | 479 | /** |
480 | * napi_synchronize - wait until NAPI is not running | 480 | * napi_synchronize - wait until NAPI is not running |
481 | * @n: napi context | 481 | * @n: napi context |
482 | * | 482 | * |
483 | * Wait until NAPI is done being scheduled on this context. | 483 | * Wait until NAPI is done being scheduled on this context. |
484 | * Waits till any outstanding processing completes but | 484 | * Waits till any outstanding processing completes but |
485 | * does not disable future activations. | 485 | * does not disable future activations. |
486 | */ | 486 | */ |
487 | static inline void napi_synchronize(const struct napi_struct *n) | 487 | static inline void napi_synchronize(const struct napi_struct *n) |
488 | { | 488 | { |
489 | while (test_bit(NAPI_STATE_SCHED, &n->state)) | 489 | while (test_bit(NAPI_STATE_SCHED, &n->state)) |
490 | msleep(1); | 490 | msleep(1); |
491 | } | 491 | } |
492 | #else | 492 | #else |
493 | # define napi_synchronize(n) barrier() | 493 | # define napi_synchronize(n) barrier() |
494 | #endif | 494 | #endif |
495 | 495 | ||
496 | enum netdev_queue_state_t { | 496 | enum netdev_queue_state_t { |
497 | __QUEUE_STATE_DRV_XOFF, | 497 | __QUEUE_STATE_DRV_XOFF, |
498 | __QUEUE_STATE_STACK_XOFF, | 498 | __QUEUE_STATE_STACK_XOFF, |
499 | __QUEUE_STATE_FROZEN, | 499 | __QUEUE_STATE_FROZEN, |
500 | #define QUEUE_STATE_ANY_XOFF ((1 << __QUEUE_STATE_DRV_XOFF) | \ | 500 | #define QUEUE_STATE_ANY_XOFF ((1 << __QUEUE_STATE_DRV_XOFF) | \ |
501 | (1 << __QUEUE_STATE_STACK_XOFF)) | 501 | (1 << __QUEUE_STATE_STACK_XOFF)) |
502 | #define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \ | 502 | #define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \ |
503 | (1 << __QUEUE_STATE_FROZEN)) | 503 | (1 << __QUEUE_STATE_FROZEN)) |
504 | }; | 504 | }; |
505 | /* | 505 | /* |
506 | * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue. The | 506 | * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue. The |
507 | * netif_tx_* functions below are used to manipulate this flag. The | 507 | * netif_tx_* functions below are used to manipulate this flag. The |
508 | * __QUEUE_STATE_STACK_XOFF flag is used by the stack to stop the transmit | 508 | * __QUEUE_STATE_STACK_XOFF flag is used by the stack to stop the transmit |
509 | * queue independently. The netif_xmit_*stopped functions below are called | 509 | * queue independently. The netif_xmit_*stopped functions below are called |
510 | * to check if the queue has been stopped by the driver or stack (either | 510 | * to check if the queue has been stopped by the driver or stack (either |
511 | * of the XOFF bits are set in the state). Drivers should not need to call | 511 | * of the XOFF bits are set in the state). Drivers should not need to call |
512 | * netif_xmit*stopped functions, they should only be using netif_tx_*. | 512 | * netif_xmit*stopped functions, they should only be using netif_tx_*. |
513 | */ | 513 | */ |
514 | 514 | ||
515 | struct netdev_queue { | 515 | struct netdev_queue { |
516 | /* | 516 | /* |
517 | * read mostly part | 517 | * read mostly part |
518 | */ | 518 | */ |
519 | struct net_device *dev; | 519 | struct net_device *dev; |
520 | struct Qdisc *qdisc; | 520 | struct Qdisc *qdisc; |
521 | struct Qdisc *qdisc_sleeping; | 521 | struct Qdisc *qdisc_sleeping; |
522 | #ifdef CONFIG_SYSFS | 522 | #ifdef CONFIG_SYSFS |
523 | struct kobject kobj; | 523 | struct kobject kobj; |
524 | #endif | 524 | #endif |
525 | #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) | 525 | #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) |
526 | int numa_node; | 526 | int numa_node; |
527 | #endif | 527 | #endif |
528 | /* | 528 | /* |
529 | * write mostly part | 529 | * write mostly part |
530 | */ | 530 | */ |
531 | spinlock_t _xmit_lock ____cacheline_aligned_in_smp; | 531 | spinlock_t _xmit_lock ____cacheline_aligned_in_smp; |
532 | int xmit_lock_owner; | 532 | int xmit_lock_owner; |
533 | /* | 533 | /* |
534 | * please use this field instead of dev->trans_start | 534 | * please use this field instead of dev->trans_start |
535 | */ | 535 | */ |
536 | unsigned long trans_start; | 536 | unsigned long trans_start; |
537 | 537 | ||
538 | /* | 538 | /* |
539 | * Number of TX timeouts for this queue | 539 | * Number of TX timeouts for this queue |
540 | * (/sys/class/net/DEV/Q/trans_timeout) | 540 | * (/sys/class/net/DEV/Q/trans_timeout) |
541 | */ | 541 | */ |
542 | unsigned long trans_timeout; | 542 | unsigned long trans_timeout; |
543 | 543 | ||
544 | unsigned long state; | 544 | unsigned long state; |
545 | 545 | ||
546 | #ifdef CONFIG_BQL | 546 | #ifdef CONFIG_BQL |
547 | struct dql dql; | 547 | struct dql dql; |
548 | #endif | 548 | #endif |
549 | } ____cacheline_aligned_in_smp; | 549 | } ____cacheline_aligned_in_smp; |
550 | 550 | ||
551 | static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) | 551 | static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) |
552 | { | 552 | { |
553 | #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) | 553 | #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) |
554 | return q->numa_node; | 554 | return q->numa_node; |
555 | #else | 555 | #else |
556 | return NUMA_NO_NODE; | 556 | return NUMA_NO_NODE; |
557 | #endif | 557 | #endif |
558 | } | 558 | } |
559 | 559 | ||
560 | static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node) | 560 | static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node) |
561 | { | 561 | { |
562 | #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) | 562 | #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) |
563 | q->numa_node = node; | 563 | q->numa_node = node; |
564 | #endif | 564 | #endif |
565 | } | 565 | } |
566 | 566 | ||
567 | #ifdef CONFIG_RPS | 567 | #ifdef CONFIG_RPS |
568 | /* | 568 | /* |
569 | * This structure holds an RPS map which can be of variable length. The | 569 | * This structure holds an RPS map which can be of variable length. The |
570 | * map is an array of CPUs. | 570 | * map is an array of CPUs. |
571 | */ | 571 | */ |
572 | struct rps_map { | 572 | struct rps_map { |
573 | unsigned int len; | 573 | unsigned int len; |
574 | struct rcu_head rcu; | 574 | struct rcu_head rcu; |
575 | u16 cpus[0]; | 575 | u16 cpus[0]; |
576 | }; | 576 | }; |
577 | #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) | 577 | #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) |
578 | 578 | ||
579 | /* | 579 | /* |
580 | * The rps_dev_flow structure contains the mapping of a flow to a CPU, the | 580 | * The rps_dev_flow structure contains the mapping of a flow to a CPU, the |
581 | * tail pointer for that CPU's input queue at the time of last enqueue, and | 581 | * tail pointer for that CPU's input queue at the time of last enqueue, and |
582 | * a hardware filter index. | 582 | * a hardware filter index. |
583 | */ | 583 | */ |
584 | struct rps_dev_flow { | 584 | struct rps_dev_flow { |
585 | u16 cpu; | 585 | u16 cpu; |
586 | u16 filter; | 586 | u16 filter; |
587 | unsigned int last_qtail; | 587 | unsigned int last_qtail; |
588 | }; | 588 | }; |
589 | #define RPS_NO_FILTER 0xffff | 589 | #define RPS_NO_FILTER 0xffff |
590 | 590 | ||
591 | /* | 591 | /* |
592 | * The rps_dev_flow_table structure contains a table of flow mappings. | 592 | * The rps_dev_flow_table structure contains a table of flow mappings. |
593 | */ | 593 | */ |
594 | struct rps_dev_flow_table { | 594 | struct rps_dev_flow_table { |
595 | unsigned int mask; | 595 | unsigned int mask; |
596 | struct rcu_head rcu; | 596 | struct rcu_head rcu; |
597 | struct work_struct free_work; | 597 | struct work_struct free_work; |
598 | struct rps_dev_flow flows[0]; | 598 | struct rps_dev_flow flows[0]; |
599 | }; | 599 | }; |
600 | #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ | 600 | #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ |
601 | ((_num) * sizeof(struct rps_dev_flow))) | 601 | ((_num) * sizeof(struct rps_dev_flow))) |
602 | 602 | ||
603 | /* | 603 | /* |
604 | * The rps_sock_flow_table contains mappings of flows to the last CPU | 604 | * The rps_sock_flow_table contains mappings of flows to the last CPU |
605 | * on which they were processed by the application (set in recvmsg). | 605 | * on which they were processed by the application (set in recvmsg). |
606 | */ | 606 | */ |
607 | struct rps_sock_flow_table { | 607 | struct rps_sock_flow_table { |
608 | unsigned int mask; | 608 | unsigned int mask; |
609 | u16 ents[0]; | 609 | u16 ents[0]; |
610 | }; | 610 | }; |
611 | #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \ | 611 | #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \ |
612 | ((_num) * sizeof(u16))) | 612 | ((_num) * sizeof(u16))) |
613 | 613 | ||
614 | #define RPS_NO_CPU 0xffff | 614 | #define RPS_NO_CPU 0xffff |
615 | 615 | ||
616 | static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, | 616 | static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, |
617 | u32 hash) | 617 | u32 hash) |
618 | { | 618 | { |
619 | if (table && hash) { | 619 | if (table && hash) { |
620 | unsigned int cpu, index = hash & table->mask; | 620 | unsigned int cpu, index = hash & table->mask; |
621 | 621 | ||
622 | /* We only give a hint, preemption can change cpu under us */ | 622 | /* We only give a hint, preemption can change cpu under us */ |
623 | cpu = raw_smp_processor_id(); | 623 | cpu = raw_smp_processor_id(); |
624 | 624 | ||
625 | if (table->ents[index] != cpu) | 625 | if (table->ents[index] != cpu) |
626 | table->ents[index] = cpu; | 626 | table->ents[index] = cpu; |
627 | } | 627 | } |
628 | } | 628 | } |
629 | 629 | ||
630 | static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table, | 630 | static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table, |
631 | u32 hash) | 631 | u32 hash) |
632 | { | 632 | { |
633 | if (table && hash) | 633 | if (table && hash) |
634 | table->ents[hash & table->mask] = RPS_NO_CPU; | 634 | table->ents[hash & table->mask] = RPS_NO_CPU; |
635 | } | 635 | } |
636 | 636 | ||
637 | extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; | 637 | extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; |
638 | 638 | ||
639 | #ifdef CONFIG_RFS_ACCEL | 639 | #ifdef CONFIG_RFS_ACCEL |
640 | extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, | 640 | extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, |
641 | u32 flow_id, u16 filter_id); | 641 | u32 flow_id, u16 filter_id); |
642 | #endif | 642 | #endif |
643 | 643 | ||
644 | /* This structure contains an instance of an RX queue. */ | 644 | /* This structure contains an instance of an RX queue. */ |
645 | struct netdev_rx_queue { | 645 | struct netdev_rx_queue { |
646 | struct rps_map __rcu *rps_map; | 646 | struct rps_map __rcu *rps_map; |
647 | struct rps_dev_flow_table __rcu *rps_flow_table; | 647 | struct rps_dev_flow_table __rcu *rps_flow_table; |
648 | struct kobject kobj; | 648 | struct kobject kobj; |
649 | struct net_device *dev; | 649 | struct net_device *dev; |
650 | } ____cacheline_aligned_in_smp; | 650 | } ____cacheline_aligned_in_smp; |
651 | #endif /* CONFIG_RPS */ | 651 | #endif /* CONFIG_RPS */ |
652 | 652 | ||
653 | #ifdef CONFIG_XPS | 653 | #ifdef CONFIG_XPS |
654 | /* | 654 | /* |
655 | * This structure holds an XPS map which can be of variable length. The | 655 | * This structure holds an XPS map which can be of variable length. The |
656 | * map is an array of queues. | 656 | * map is an array of queues. |
657 | */ | 657 | */ |
658 | struct xps_map { | 658 | struct xps_map { |
659 | unsigned int len; | 659 | unsigned int len; |
660 | unsigned int alloc_len; | 660 | unsigned int alloc_len; |
661 | struct rcu_head rcu; | 661 | struct rcu_head rcu; |
662 | u16 queues[0]; | 662 | u16 queues[0]; |
663 | }; | 663 | }; |
664 | #define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16))) | 664 | #define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16))) |
665 | #define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map)) \ | 665 | #define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map)) \ |
666 | / sizeof(u16)) | 666 | / sizeof(u16)) |
667 | 667 | ||
668 | /* | 668 | /* |
669 | * This structure holds all XPS maps for device. Maps are indexed by CPU. | 669 | * This structure holds all XPS maps for device. Maps are indexed by CPU. |
670 | */ | 670 | */ |
671 | struct xps_dev_maps { | 671 | struct xps_dev_maps { |
672 | struct rcu_head rcu; | 672 | struct rcu_head rcu; |
673 | struct xps_map __rcu *cpu_map[0]; | 673 | struct xps_map __rcu *cpu_map[0]; |
674 | }; | 674 | }; |
675 | #define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ | 675 | #define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ |
676 | (nr_cpu_ids * sizeof(struct xps_map *))) | 676 | (nr_cpu_ids * sizeof(struct xps_map *))) |
677 | #endif /* CONFIG_XPS */ | 677 | #endif /* CONFIG_XPS */ |
678 | 678 | ||
679 | #define TC_MAX_QUEUE 16 | 679 | #define TC_MAX_QUEUE 16 |
680 | #define TC_BITMASK 15 | 680 | #define TC_BITMASK 15 |
681 | /* HW offloaded queuing disciplines txq count and offset maps */ | 681 | /* HW offloaded queuing disciplines txq count and offset maps */ |
682 | struct netdev_tc_txq { | 682 | struct netdev_tc_txq { |
683 | u16 count; | 683 | u16 count; |
684 | u16 offset; | 684 | u16 offset; |
685 | }; | 685 | }; |
686 | 686 | ||
687 | #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) | 687 | #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) |
688 | /* | 688 | /* |
689 | * This structure is to hold information about the device | 689 | * This structure is to hold information about the device |
690 | * configured to run FCoE protocol stack. | 690 | * configured to run FCoE protocol stack. |
691 | */ | 691 | */ |
692 | struct netdev_fcoe_hbainfo { | 692 | struct netdev_fcoe_hbainfo { |
693 | char manufacturer[64]; | 693 | char manufacturer[64]; |
694 | char serial_number[64]; | 694 | char serial_number[64]; |
695 | char hardware_version[64]; | 695 | char hardware_version[64]; |
696 | char driver_version[64]; | 696 | char driver_version[64]; |
697 | char optionrom_version[64]; | 697 | char optionrom_version[64]; |
698 | char firmware_version[64]; | 698 | char firmware_version[64]; |
699 | char model[256]; | 699 | char model[256]; |
700 | char model_description[256]; | 700 | char model_description[256]; |
701 | }; | 701 | }; |
702 | #endif | 702 | #endif |
703 | 703 | ||
704 | /* | 704 | /* |
705 | * This structure defines the management hooks for network devices. | 705 | * This structure defines the management hooks for network devices. |
706 | * The following hooks can be defined; unless noted otherwise, they are | 706 | * The following hooks can be defined; unless noted otherwise, they are |
707 | * optional and can be filled with a null pointer. | 707 | * optional and can be filled with a null pointer. |
708 | * | 708 | * |
709 | * int (*ndo_init)(struct net_device *dev); | 709 | * int (*ndo_init)(struct net_device *dev); |
710 | * This function is called once when network device is registered. | 710 | * This function is called once when network device is registered. |
711 | * The network device can use this to any late stage initializaton | 711 | * The network device can use this to any late stage initializaton |
712 | * or semantic validattion. It can fail with an error code which will | 712 | * or semantic validattion. It can fail with an error code which will |
713 | * be propogated back to register_netdev | 713 | * be propogated back to register_netdev |
714 | * | 714 | * |
715 | * void (*ndo_uninit)(struct net_device *dev); | 715 | * void (*ndo_uninit)(struct net_device *dev); |
716 | * This function is called when device is unregistered or when registration | 716 | * This function is called when device is unregistered or when registration |
717 | * fails. It is not called if init fails. | 717 | * fails. It is not called if init fails. |
718 | * | 718 | * |
719 | * int (*ndo_open)(struct net_device *dev); | 719 | * int (*ndo_open)(struct net_device *dev); |
720 | * This function is called when network device transistions to the up | 720 | * This function is called when network device transistions to the up |
721 | * state. | 721 | * state. |
722 | * | 722 | * |
723 | * int (*ndo_stop)(struct net_device *dev); | 723 | * int (*ndo_stop)(struct net_device *dev); |
724 | * This function is called when network device transistions to the down | 724 | * This function is called when network device transistions to the down |
725 | * state. | 725 | * state. |
726 | * | 726 | * |
727 | * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, | 727 | * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, |
728 | * struct net_device *dev); | 728 | * struct net_device *dev); |
729 | * Called when a packet needs to be transmitted. | 729 | * Called when a packet needs to be transmitted. |
730 | * Must return NETDEV_TX_OK , NETDEV_TX_BUSY. | 730 | * Must return NETDEV_TX_OK , NETDEV_TX_BUSY. |
731 | * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) | 731 | * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) |
732 | * Required can not be NULL. | 732 | * Required can not be NULL. |
733 | * | 733 | * |
734 | * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb); | 734 | * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb); |
735 | * Called to decide which queue to when device supports multiple | 735 | * Called to decide which queue to when device supports multiple |
736 | * transmit queues. | 736 | * transmit queues. |
737 | * | 737 | * |
738 | * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); | 738 | * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); |
739 | * This function is called to allow device receiver to make | 739 | * This function is called to allow device receiver to make |
740 | * changes to configuration when multicast or promiscious is enabled. | 740 | * changes to configuration when multicast or promiscious is enabled. |
741 | * | 741 | * |
742 | * void (*ndo_set_rx_mode)(struct net_device *dev); | 742 | * void (*ndo_set_rx_mode)(struct net_device *dev); |
743 | * This function is called device changes address list filtering. | 743 | * This function is called device changes address list filtering. |
744 | * If driver handles unicast address filtering, it should set | 744 | * If driver handles unicast address filtering, it should set |
745 | * IFF_UNICAST_FLT to its priv_flags. | 745 | * IFF_UNICAST_FLT to its priv_flags. |
746 | * | 746 | * |
747 | * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); | 747 | * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); |
748 | * This function is called when the Media Access Control address | 748 | * This function is called when the Media Access Control address |
749 | * needs to be changed. If this interface is not defined, the | 749 | * needs to be changed. If this interface is not defined, the |
750 | * mac address can not be changed. | 750 | * mac address can not be changed. |
751 | * | 751 | * |
752 | * int (*ndo_validate_addr)(struct net_device *dev); | 752 | * int (*ndo_validate_addr)(struct net_device *dev); |
753 | * Test if Media Access Control address is valid for the device. | 753 | * Test if Media Access Control address is valid for the device. |
754 | * | 754 | * |
755 | * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); | 755 | * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); |
756 | * Called when a user request an ioctl which can't be handled by | 756 | * Called when a user request an ioctl which can't be handled by |
757 | * the generic interface code. If not defined ioctl's return | 757 | * the generic interface code. If not defined ioctl's return |
758 | * not supported error code. | 758 | * not supported error code. |
759 | * | 759 | * |
760 | * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); | 760 | * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); |
761 | * Used to set network devices bus interface parameters. This interface | 761 | * Used to set network devices bus interface parameters. This interface |
762 | * is retained for legacy reason, new devices should use the bus | 762 | * is retained for legacy reason, new devices should use the bus |
763 | * interface (PCI) for low level management. | 763 | * interface (PCI) for low level management. |
764 | * | 764 | * |
765 | * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); | 765 | * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); |
766 | * Called when a user wants to change the Maximum Transfer Unit | 766 | * Called when a user wants to change the Maximum Transfer Unit |
767 | * of a device. If not defined, any request to change MTU will | 767 | * of a device. If not defined, any request to change MTU will |
768 | * will return an error. | 768 | * will return an error. |
769 | * | 769 | * |
770 | * void (*ndo_tx_timeout)(struct net_device *dev); | 770 | * void (*ndo_tx_timeout)(struct net_device *dev); |
771 | * Callback uses when the transmitter has not made any progress | 771 | * Callback uses when the transmitter has not made any progress |
772 | * for dev->watchdog ticks. | 772 | * for dev->watchdog ticks. |
773 | * | 773 | * |
774 | * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, | 774 | * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, |
775 | * struct rtnl_link_stats64 *storage); | 775 | * struct rtnl_link_stats64 *storage); |
776 | * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); | 776 | * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); |
777 | * Called when a user wants to get the network device usage | 777 | * Called when a user wants to get the network device usage |
778 | * statistics. Drivers must do one of the following: | 778 | * statistics. Drivers must do one of the following: |
779 | * 1. Define @ndo_get_stats64 to fill in a zero-initialised | 779 | * 1. Define @ndo_get_stats64 to fill in a zero-initialised |
780 | * rtnl_link_stats64 structure passed by the caller. | 780 | * rtnl_link_stats64 structure passed by the caller. |
781 | * 2. Define @ndo_get_stats to update a net_device_stats structure | 781 | * 2. Define @ndo_get_stats to update a net_device_stats structure |
782 | * (which should normally be dev->stats) and return a pointer to | 782 | * (which should normally be dev->stats) and return a pointer to |
783 | * it. The structure may be changed asynchronously only if each | 783 | * it. The structure may be changed asynchronously only if each |
784 | * field is written atomically. | 784 | * field is written atomically. |
785 | * 3. Update dev->stats asynchronously and atomically, and define | 785 | * 3. Update dev->stats asynchronously and atomically, and define |
786 | * neither operation. | 786 | * neither operation. |
787 | * | 787 | * |
788 | * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid); | 788 | * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid); |
789 | * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) | 789 | * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) |
790 | * this function is called when a VLAN id is registered. | 790 | * this function is called when a VLAN id is registered. |
791 | * | 791 | * |
792 | * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); | 792 | * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); |
793 | * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) | 793 | * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) |
794 | * this function is called when a VLAN id is unregistered. | 794 | * this function is called when a VLAN id is unregistered. |
795 | * | 795 | * |
796 | * void (*ndo_poll_controller)(struct net_device *dev); | 796 | * void (*ndo_poll_controller)(struct net_device *dev); |
797 | * | 797 | * |
798 | * SR-IOV management functions. | 798 | * SR-IOV management functions. |
799 | * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); | 799 | * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); |
800 | * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos); | 800 | * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos); |
801 | * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate); | 801 | * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate); |
802 | * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); | 802 | * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); |
803 | * int (*ndo_get_vf_config)(struct net_device *dev, | 803 | * int (*ndo_get_vf_config)(struct net_device *dev, |
804 | * int vf, struct ifla_vf_info *ivf); | 804 | * int vf, struct ifla_vf_info *ivf); |
805 | * int (*ndo_set_vf_port)(struct net_device *dev, int vf, | 805 | * int (*ndo_set_vf_port)(struct net_device *dev, int vf, |
806 | * struct nlattr *port[]); | 806 | * struct nlattr *port[]); |
807 | * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); | 807 | * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); |
808 | * int (*ndo_setup_tc)(struct net_device *dev, u8 tc) | 808 | * int (*ndo_setup_tc)(struct net_device *dev, u8 tc) |
809 | * Called to setup 'tc' number of traffic classes in the net device. This | 809 | * Called to setup 'tc' number of traffic classes in the net device. This |
810 | * is always called from the stack with the rtnl lock held and netif tx | 810 | * is always called from the stack with the rtnl lock held and netif tx |
811 | * queues stopped. This allows the netdevice to perform queue management | 811 | * queues stopped. This allows the netdevice to perform queue management |
812 | * safely. | 812 | * safely. |
813 | * | 813 | * |
814 | * Fiber Channel over Ethernet (FCoE) offload functions. | 814 | * Fiber Channel over Ethernet (FCoE) offload functions. |
815 | * int (*ndo_fcoe_enable)(struct net_device *dev); | 815 | * int (*ndo_fcoe_enable)(struct net_device *dev); |
816 | * Called when the FCoE protocol stack wants to start using LLD for FCoE | 816 | * Called when the FCoE protocol stack wants to start using LLD for FCoE |
817 | * so the underlying device can perform whatever needed configuration or | 817 | * so the underlying device can perform whatever needed configuration or |
818 | * initialization to support acceleration of FCoE traffic. | 818 | * initialization to support acceleration of FCoE traffic. |
819 | * | 819 | * |
820 | * int (*ndo_fcoe_disable)(struct net_device *dev); | 820 | * int (*ndo_fcoe_disable)(struct net_device *dev); |
821 | * Called when the FCoE protocol stack wants to stop using LLD for FCoE | 821 | * Called when the FCoE protocol stack wants to stop using LLD for FCoE |
822 | * so the underlying device can perform whatever needed clean-ups to | 822 | * so the underlying device can perform whatever needed clean-ups to |
823 | * stop supporting acceleration of FCoE traffic. | 823 | * stop supporting acceleration of FCoE traffic. |
824 | * | 824 | * |
825 | * int (*ndo_fcoe_ddp_setup)(struct net_device *dev, u16 xid, | 825 | * int (*ndo_fcoe_ddp_setup)(struct net_device *dev, u16 xid, |
826 | * struct scatterlist *sgl, unsigned int sgc); | 826 | * struct scatterlist *sgl, unsigned int sgc); |
827 | * Called when the FCoE Initiator wants to initialize an I/O that | 827 | * Called when the FCoE Initiator wants to initialize an I/O that |
828 | * is a possible candidate for Direct Data Placement (DDP). The LLD can | 828 | * is a possible candidate for Direct Data Placement (DDP). The LLD can |
829 | * perform necessary setup and returns 1 to indicate the device is set up | 829 | * perform necessary setup and returns 1 to indicate the device is set up |
830 | * successfully to perform DDP on this I/O, otherwise this returns 0. | 830 | * successfully to perform DDP on this I/O, otherwise this returns 0. |
831 | * | 831 | * |
832 | * int (*ndo_fcoe_ddp_done)(struct net_device *dev, u16 xid); | 832 | * int (*ndo_fcoe_ddp_done)(struct net_device *dev, u16 xid); |
833 | * Called when the FCoE Initiator/Target is done with the DDPed I/O as | 833 | * Called when the FCoE Initiator/Target is done with the DDPed I/O as |
834 | * indicated by the FC exchange id 'xid', so the underlying device can | 834 | * indicated by the FC exchange id 'xid', so the underlying device can |
835 | * clean up and reuse resources for later DDP requests. | 835 | * clean up and reuse resources for later DDP requests. |
836 | * | 836 | * |
837 | * int (*ndo_fcoe_ddp_target)(struct net_device *dev, u16 xid, | 837 | * int (*ndo_fcoe_ddp_target)(struct net_device *dev, u16 xid, |
838 | * struct scatterlist *sgl, unsigned int sgc); | 838 | * struct scatterlist *sgl, unsigned int sgc); |
839 | * Called when the FCoE Target wants to initialize an I/O that | 839 | * Called when the FCoE Target wants to initialize an I/O that |
840 | * is a possible candidate for Direct Data Placement (DDP). The LLD can | 840 | * is a possible candidate for Direct Data Placement (DDP). The LLD can |
841 | * perform necessary setup and returns 1 to indicate the device is set up | 841 | * perform necessary setup and returns 1 to indicate the device is set up |
842 | * successfully to perform DDP on this I/O, otherwise this returns 0. | 842 | * successfully to perform DDP on this I/O, otherwise this returns 0. |
843 | * | 843 | * |
844 | * int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, | 844 | * int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, |
845 | * struct netdev_fcoe_hbainfo *hbainfo); | 845 | * struct netdev_fcoe_hbainfo *hbainfo); |
846 | * Called when the FCoE Protocol stack wants information on the underlying | 846 | * Called when the FCoE Protocol stack wants information on the underlying |
847 | * device. This information is utilized by the FCoE protocol stack to | 847 | * device. This information is utilized by the FCoE protocol stack to |
848 | * register attributes with Fiber Channel management service as per the | 848 | * register attributes with Fiber Channel management service as per the |
849 | * FC-GS Fabric Device Management Information(FDMI) specification. | 849 | * FC-GS Fabric Device Management Information(FDMI) specification. |
850 | * | 850 | * |
851 | * int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type); | 851 | * int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type); |
852 | * Called when the underlying device wants to override default World Wide | 852 | * Called when the underlying device wants to override default World Wide |
853 | * Name (WWN) generation mechanism in FCoE protocol stack to pass its own | 853 | * Name (WWN) generation mechanism in FCoE protocol stack to pass its own |
854 | * World Wide Port Name (WWPN) or World Wide Node Name (WWNN) to the FCoE | 854 | * World Wide Port Name (WWPN) or World Wide Node Name (WWNN) to the FCoE |
855 | * protocol stack to use. | 855 | * protocol stack to use. |
856 | * | 856 | * |
857 | * RFS acceleration. | 857 | * RFS acceleration. |
858 | * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb, | 858 | * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb, |
859 | * u16 rxq_index, u32 flow_id); | 859 | * u16 rxq_index, u32 flow_id); |
860 | * Set hardware filter for RFS. rxq_index is the target queue index; | 860 | * Set hardware filter for RFS. rxq_index is the target queue index; |
861 | * flow_id is a flow ID to be passed to rps_may_expire_flow() later. | 861 | * flow_id is a flow ID to be passed to rps_may_expire_flow() later. |
862 | * Return the filter ID on success, or a negative error code. | 862 | * Return the filter ID on success, or a negative error code. |
863 | * | 863 | * |
864 | * Slave management functions (for bridge, bonding, etc). | 864 | * Slave management functions (for bridge, bonding, etc). |
865 | * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev); | 865 | * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev); |
866 | * Called to make another netdev an underling. | 866 | * Called to make another netdev an underling. |
867 | * | 867 | * |
868 | * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); | 868 | * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); |
869 | * Called to release previously enslaved netdev. | 869 | * Called to release previously enslaved netdev. |
870 | * | 870 | * |
871 | * Feature/offload setting functions. | 871 | * Feature/offload setting functions. |
872 | * netdev_features_t (*ndo_fix_features)(struct net_device *dev, | 872 | * netdev_features_t (*ndo_fix_features)(struct net_device *dev, |
873 | * netdev_features_t features); | 873 | * netdev_features_t features); |
874 | * Adjusts the requested feature flags according to device-specific | 874 | * Adjusts the requested feature flags according to device-specific |
875 | * constraints, and returns the resulting flags. Must not modify | 875 | * constraints, and returns the resulting flags. Must not modify |
876 | * the device state. | 876 | * the device state. |
877 | * | 877 | * |
878 | * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); | 878 | * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); |
879 | * Called to update device configuration to new features. Passed | 879 | * Called to update device configuration to new features. Passed |
880 | * feature set might be less than what was returned by ndo_fix_features()). | 880 | * feature set might be less than what was returned by ndo_fix_features()). |
881 | * Must return >0 or -errno if it changed dev->features itself. | 881 | * Must return >0 or -errno if it changed dev->features itself. |
882 | * | 882 | * |
883 | * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], | 883 | * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], |
884 | * struct net_device *dev, | 884 | * struct net_device *dev, |
885 | * const unsigned char *addr, u16 flags) | 885 | * const unsigned char *addr, u16 flags) |
886 | * Adds an FDB entry to dev for addr. | 886 | * Adds an FDB entry to dev for addr. |
887 | * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev, | 887 | * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev, |
888 | * const unsigned char *addr) | 888 | * const unsigned char *addr) |
889 | * Deletes the FDB entry from dev coresponding to addr. | 889 | * Deletes the FDB entry from dev coresponding to addr. |
890 | * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, | 890 | * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, |
891 | * struct net_device *dev, int idx) | 891 | * struct net_device *dev, int idx) |
892 | * Used to add FDB entries to dump requests. Implementers should add | 892 | * Used to add FDB entries to dump requests. Implementers should add |
893 | * entries to skb and update idx with the number of entries. | 893 | * entries to skb and update idx with the number of entries. |
894 | * | 894 | * |
895 | * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh) | 895 | * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh) |
896 | * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, | 896 | * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, |
897 | * struct net_device *dev) | 897 | * struct net_device *dev) |
898 | * | 898 | * |
899 | * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); | 899 | * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); |
900 | * Called to change device carrier. Soft-devices (like dummy, team, etc) | 900 | * Called to change device carrier. Soft-devices (like dummy, team, etc) |
901 | * which do not represent real hardware may define this to allow their | 901 | * which do not represent real hardware may define this to allow their |
902 | * userspace components to manage their virtual carrier state. Devices | 902 | * userspace components to manage their virtual carrier state. Devices |
903 | * that determine carrier state from physical hardware properties (eg | 903 | * that determine carrier state from physical hardware properties (eg |
904 | * network cables) or protocol-dependent mechanisms (eg | 904 | * network cables) or protocol-dependent mechanisms (eg |
905 | * USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function. | 905 | * USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function. |
906 | */ | 906 | */ |
907 | struct net_device_ops { | 907 | struct net_device_ops { |
908 | int (*ndo_init)(struct net_device *dev); | 908 | int (*ndo_init)(struct net_device *dev); |
909 | void (*ndo_uninit)(struct net_device *dev); | 909 | void (*ndo_uninit)(struct net_device *dev); |
910 | int (*ndo_open)(struct net_device *dev); | 910 | int (*ndo_open)(struct net_device *dev); |
911 | int (*ndo_stop)(struct net_device *dev); | 911 | int (*ndo_stop)(struct net_device *dev); |
912 | netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb, | 912 | netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb, |
913 | struct net_device *dev); | 913 | struct net_device *dev); |
914 | u16 (*ndo_select_queue)(struct net_device *dev, | 914 | u16 (*ndo_select_queue)(struct net_device *dev, |
915 | struct sk_buff *skb); | 915 | struct sk_buff *skb); |
916 | void (*ndo_change_rx_flags)(struct net_device *dev, | 916 | void (*ndo_change_rx_flags)(struct net_device *dev, |
917 | int flags); | 917 | int flags); |
918 | void (*ndo_set_rx_mode)(struct net_device *dev); | 918 | void (*ndo_set_rx_mode)(struct net_device *dev); |
919 | int (*ndo_set_mac_address)(struct net_device *dev, | 919 | int (*ndo_set_mac_address)(struct net_device *dev, |
920 | void *addr); | 920 | void *addr); |
921 | int (*ndo_validate_addr)(struct net_device *dev); | 921 | int (*ndo_validate_addr)(struct net_device *dev); |
922 | int (*ndo_do_ioctl)(struct net_device *dev, | 922 | int (*ndo_do_ioctl)(struct net_device *dev, |
923 | struct ifreq *ifr, int cmd); | 923 | struct ifreq *ifr, int cmd); |
924 | int (*ndo_set_config)(struct net_device *dev, | 924 | int (*ndo_set_config)(struct net_device *dev, |
925 | struct ifmap *map); | 925 | struct ifmap *map); |
926 | int (*ndo_change_mtu)(struct net_device *dev, | 926 | int (*ndo_change_mtu)(struct net_device *dev, |
927 | int new_mtu); | 927 | int new_mtu); |
928 | int (*ndo_neigh_setup)(struct net_device *dev, | 928 | int (*ndo_neigh_setup)(struct net_device *dev, |
929 | struct neigh_parms *); | 929 | struct neigh_parms *); |
930 | void (*ndo_tx_timeout) (struct net_device *dev); | 930 | void (*ndo_tx_timeout) (struct net_device *dev); |
931 | 931 | ||
932 | struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, | 932 | struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, |
933 | struct rtnl_link_stats64 *storage); | 933 | struct rtnl_link_stats64 *storage); |
934 | struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); | 934 | struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); |
935 | 935 | ||
936 | int (*ndo_vlan_rx_add_vid)(struct net_device *dev, | 936 | int (*ndo_vlan_rx_add_vid)(struct net_device *dev, |
937 | unsigned short vid); | 937 | unsigned short vid); |
938 | int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, | 938 | int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, |
939 | unsigned short vid); | 939 | unsigned short vid); |
940 | #ifdef CONFIG_NET_POLL_CONTROLLER | 940 | #ifdef CONFIG_NET_POLL_CONTROLLER |
941 | void (*ndo_poll_controller)(struct net_device *dev); | 941 | void (*ndo_poll_controller)(struct net_device *dev); |
942 | int (*ndo_netpoll_setup)(struct net_device *dev, | 942 | int (*ndo_netpoll_setup)(struct net_device *dev, |
943 | struct netpoll_info *info, | 943 | struct netpoll_info *info, |
944 | gfp_t gfp); | 944 | gfp_t gfp); |
945 | void (*ndo_netpoll_cleanup)(struct net_device *dev); | 945 | void (*ndo_netpoll_cleanup)(struct net_device *dev); |
946 | #endif | 946 | #endif |
947 | int (*ndo_set_vf_mac)(struct net_device *dev, | 947 | int (*ndo_set_vf_mac)(struct net_device *dev, |
948 | int queue, u8 *mac); | 948 | int queue, u8 *mac); |
949 | int (*ndo_set_vf_vlan)(struct net_device *dev, | 949 | int (*ndo_set_vf_vlan)(struct net_device *dev, |
950 | int queue, u16 vlan, u8 qos); | 950 | int queue, u16 vlan, u8 qos); |
951 | int (*ndo_set_vf_tx_rate)(struct net_device *dev, | 951 | int (*ndo_set_vf_tx_rate)(struct net_device *dev, |
952 | int vf, int rate); | 952 | int vf, int rate); |
953 | int (*ndo_set_vf_spoofchk)(struct net_device *dev, | 953 | int (*ndo_set_vf_spoofchk)(struct net_device *dev, |
954 | int vf, bool setting); | 954 | int vf, bool setting); |
955 | int (*ndo_get_vf_config)(struct net_device *dev, | 955 | int (*ndo_get_vf_config)(struct net_device *dev, |
956 | int vf, | 956 | int vf, |
957 | struct ifla_vf_info *ivf); | 957 | struct ifla_vf_info *ivf); |
958 | int (*ndo_set_vf_port)(struct net_device *dev, | 958 | int (*ndo_set_vf_port)(struct net_device *dev, |
959 | int vf, | 959 | int vf, |
960 | struct nlattr *port[]); | 960 | struct nlattr *port[]); |
961 | int (*ndo_get_vf_port)(struct net_device *dev, | 961 | int (*ndo_get_vf_port)(struct net_device *dev, |
962 | int vf, struct sk_buff *skb); | 962 | int vf, struct sk_buff *skb); |
963 | int (*ndo_setup_tc)(struct net_device *dev, u8 tc); | 963 | int (*ndo_setup_tc)(struct net_device *dev, u8 tc); |
964 | #if IS_ENABLED(CONFIG_FCOE) | 964 | #if IS_ENABLED(CONFIG_FCOE) |
965 | int (*ndo_fcoe_enable)(struct net_device *dev); | 965 | int (*ndo_fcoe_enable)(struct net_device *dev); |
966 | int (*ndo_fcoe_disable)(struct net_device *dev); | 966 | int (*ndo_fcoe_disable)(struct net_device *dev); |
967 | int (*ndo_fcoe_ddp_setup)(struct net_device *dev, | 967 | int (*ndo_fcoe_ddp_setup)(struct net_device *dev, |
968 | u16 xid, | 968 | u16 xid, |
969 | struct scatterlist *sgl, | 969 | struct scatterlist *sgl, |
970 | unsigned int sgc); | 970 | unsigned int sgc); |
971 | int (*ndo_fcoe_ddp_done)(struct net_device *dev, | 971 | int (*ndo_fcoe_ddp_done)(struct net_device *dev, |
972 | u16 xid); | 972 | u16 xid); |
973 | int (*ndo_fcoe_ddp_target)(struct net_device *dev, | 973 | int (*ndo_fcoe_ddp_target)(struct net_device *dev, |
974 | u16 xid, | 974 | u16 xid, |
975 | struct scatterlist *sgl, | 975 | struct scatterlist *sgl, |
976 | unsigned int sgc); | 976 | unsigned int sgc); |
977 | int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, | 977 | int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, |
978 | struct netdev_fcoe_hbainfo *hbainfo); | 978 | struct netdev_fcoe_hbainfo *hbainfo); |
979 | #endif | 979 | #endif |
980 | 980 | ||
981 | #if IS_ENABLED(CONFIG_LIBFCOE) | 981 | #if IS_ENABLED(CONFIG_LIBFCOE) |
982 | #define NETDEV_FCOE_WWNN 0 | 982 | #define NETDEV_FCOE_WWNN 0 |
983 | #define NETDEV_FCOE_WWPN 1 | 983 | #define NETDEV_FCOE_WWPN 1 |
984 | int (*ndo_fcoe_get_wwn)(struct net_device *dev, | 984 | int (*ndo_fcoe_get_wwn)(struct net_device *dev, |
985 | u64 *wwn, int type); | 985 | u64 *wwn, int type); |
986 | #endif | 986 | #endif |
987 | 987 | ||
988 | #ifdef CONFIG_RFS_ACCEL | 988 | #ifdef CONFIG_RFS_ACCEL |
989 | int (*ndo_rx_flow_steer)(struct net_device *dev, | 989 | int (*ndo_rx_flow_steer)(struct net_device *dev, |
990 | const struct sk_buff *skb, | 990 | const struct sk_buff *skb, |
991 | u16 rxq_index, | 991 | u16 rxq_index, |
992 | u32 flow_id); | 992 | u32 flow_id); |
993 | #endif | 993 | #endif |
994 | int (*ndo_add_slave)(struct net_device *dev, | 994 | int (*ndo_add_slave)(struct net_device *dev, |
995 | struct net_device *slave_dev); | 995 | struct net_device *slave_dev); |
996 | int (*ndo_del_slave)(struct net_device *dev, | 996 | int (*ndo_del_slave)(struct net_device *dev, |
997 | struct net_device *slave_dev); | 997 | struct net_device *slave_dev); |
998 | netdev_features_t (*ndo_fix_features)(struct net_device *dev, | 998 | netdev_features_t (*ndo_fix_features)(struct net_device *dev, |
999 | netdev_features_t features); | 999 | netdev_features_t features); |
1000 | int (*ndo_set_features)(struct net_device *dev, | 1000 | int (*ndo_set_features)(struct net_device *dev, |
1001 | netdev_features_t features); | 1001 | netdev_features_t features); |
1002 | int (*ndo_neigh_construct)(struct neighbour *n); | 1002 | int (*ndo_neigh_construct)(struct neighbour *n); |
1003 | void (*ndo_neigh_destroy)(struct neighbour *n); | 1003 | void (*ndo_neigh_destroy)(struct neighbour *n); |
1004 | 1004 | ||
1005 | int (*ndo_fdb_add)(struct ndmsg *ndm, | 1005 | int (*ndo_fdb_add)(struct ndmsg *ndm, |
1006 | struct nlattr *tb[], | 1006 | struct nlattr *tb[], |
1007 | struct net_device *dev, | 1007 | struct net_device *dev, |
1008 | const unsigned char *addr, | 1008 | const unsigned char *addr, |
1009 | u16 flags); | 1009 | u16 flags); |
1010 | int (*ndo_fdb_del)(struct ndmsg *ndm, | 1010 | int (*ndo_fdb_del)(struct ndmsg *ndm, |
1011 | struct net_device *dev, | 1011 | struct net_device *dev, |
1012 | const unsigned char *addr); | 1012 | const unsigned char *addr); |
1013 | int (*ndo_fdb_dump)(struct sk_buff *skb, | 1013 | int (*ndo_fdb_dump)(struct sk_buff *skb, |
1014 | struct netlink_callback *cb, | 1014 | struct netlink_callback *cb, |
1015 | struct net_device *dev, | 1015 | struct net_device *dev, |
1016 | int idx); | 1016 | int idx); |
1017 | 1017 | ||
1018 | int (*ndo_bridge_setlink)(struct net_device *dev, | 1018 | int (*ndo_bridge_setlink)(struct net_device *dev, |
1019 | struct nlmsghdr *nlh); | 1019 | struct nlmsghdr *nlh); |
1020 | int (*ndo_bridge_getlink)(struct sk_buff *skb, | 1020 | int (*ndo_bridge_getlink)(struct sk_buff *skb, |
1021 | u32 pid, u32 seq, | 1021 | u32 pid, u32 seq, |
1022 | struct net_device *dev); | 1022 | struct net_device *dev); |
1023 | int (*ndo_change_carrier)(struct net_device *dev, | 1023 | int (*ndo_change_carrier)(struct net_device *dev, |
1024 | bool new_carrier); | 1024 | bool new_carrier); |
1025 | }; | 1025 | }; |
1026 | 1026 | ||
1027 | /* | 1027 | /* |
1028 | * The DEVICE structure. | 1028 | * The DEVICE structure. |
1029 | * Actually, this whole structure is a big mistake. It mixes I/O | 1029 | * Actually, this whole structure is a big mistake. It mixes I/O |
1030 | * data with strictly "high-level" data, and it has to know about | 1030 | * data with strictly "high-level" data, and it has to know about |
1031 | * almost every data structure used in the INET module. | 1031 | * almost every data structure used in the INET module. |
1032 | * | 1032 | * |
1033 | * FIXME: cleanup struct net_device such that network protocol info | 1033 | * FIXME: cleanup struct net_device such that network protocol info |
1034 | * moves out. | 1034 | * moves out. |
1035 | */ | 1035 | */ |
1036 | 1036 | ||
1037 | struct net_device { | 1037 | struct net_device { |
1038 | 1038 | ||
1039 | /* | 1039 | /* |
1040 | * This is the first field of the "visible" part of this structure | 1040 | * This is the first field of the "visible" part of this structure |
1041 | * (i.e. as seen by users in the "Space.c" file). It is the name | 1041 | * (i.e. as seen by users in the "Space.c" file). It is the name |
1042 | * of the interface. | 1042 | * of the interface. |
1043 | */ | 1043 | */ |
1044 | char name[IFNAMSIZ]; | 1044 | char name[IFNAMSIZ]; |
1045 | 1045 | ||
1046 | /* device name hash chain, please keep it close to name[] */ | 1046 | /* device name hash chain, please keep it close to name[] */ |
1047 | struct hlist_node name_hlist; | 1047 | struct hlist_node name_hlist; |
1048 | 1048 | ||
1049 | /* snmp alias */ | 1049 | /* snmp alias */ |
1050 | char *ifalias; | 1050 | char *ifalias; |
1051 | 1051 | ||
1052 | /* | 1052 | /* |
1053 | * I/O specific fields | 1053 | * I/O specific fields |
1054 | * FIXME: Merge these and struct ifmap into one | 1054 | * FIXME: Merge these and struct ifmap into one |
1055 | */ | 1055 | */ |
1056 | unsigned long mem_end; /* shared mem end */ | 1056 | unsigned long mem_end; /* shared mem end */ |
1057 | unsigned long mem_start; /* shared mem start */ | 1057 | unsigned long mem_start; /* shared mem start */ |
1058 | unsigned long base_addr; /* device I/O address */ | 1058 | unsigned long base_addr; /* device I/O address */ |
1059 | unsigned int irq; /* device IRQ number */ | 1059 | unsigned int irq; /* device IRQ number */ |
1060 | 1060 | ||
1061 | /* | 1061 | /* |
1062 | * Some hardware also needs these fields, but they are not | 1062 | * Some hardware also needs these fields, but they are not |
1063 | * part of the usual set specified in Space.c. | 1063 | * part of the usual set specified in Space.c. |
1064 | */ | 1064 | */ |
1065 | 1065 | ||
1066 | unsigned long state; | 1066 | unsigned long state; |
1067 | 1067 | ||
1068 | struct list_head dev_list; | 1068 | struct list_head dev_list; |
1069 | struct list_head napi_list; | 1069 | struct list_head napi_list; |
1070 | struct list_head unreg_list; | 1070 | struct list_head unreg_list; |
1071 | 1071 | ||
1072 | /* currently active device features */ | 1072 | /* currently active device features */ |
1073 | netdev_features_t features; | 1073 | netdev_features_t features; |
1074 | /* user-changeable features */ | 1074 | /* user-changeable features */ |
1075 | netdev_features_t hw_features; | 1075 | netdev_features_t hw_features; |
1076 | /* user-requested features */ | 1076 | /* user-requested features */ |
1077 | netdev_features_t wanted_features; | 1077 | netdev_features_t wanted_features; |
1078 | /* mask of features inheritable by VLAN devices */ | 1078 | /* mask of features inheritable by VLAN devices */ |
1079 | netdev_features_t vlan_features; | 1079 | netdev_features_t vlan_features; |
1080 | /* mask of features inherited by encapsulating devices | 1080 | /* mask of features inherited by encapsulating devices |
1081 | * This field indicates what encapsulation offloads | 1081 | * This field indicates what encapsulation offloads |
1082 | * the hardware is capable of doing, and drivers will | 1082 | * the hardware is capable of doing, and drivers will |
1083 | * need to set them appropriately. | 1083 | * need to set them appropriately. |
1084 | */ | 1084 | */ |
1085 | netdev_features_t hw_enc_features; | 1085 | netdev_features_t hw_enc_features; |
1086 | 1086 | ||
1087 | /* Interface index. Unique device identifier */ | 1087 | /* Interface index. Unique device identifier */ |
1088 | int ifindex; | 1088 | int ifindex; |
1089 | int iflink; | 1089 | int iflink; |
1090 | 1090 | ||
1091 | struct net_device_stats stats; | 1091 | struct net_device_stats stats; |
1092 | atomic_long_t rx_dropped; /* dropped packets by core network | 1092 | atomic_long_t rx_dropped; /* dropped packets by core network |
1093 | * Do not use this in drivers. | 1093 | * Do not use this in drivers. |
1094 | */ | 1094 | */ |
1095 | 1095 | ||
1096 | #ifdef CONFIG_WIRELESS_EXT | 1096 | #ifdef CONFIG_WIRELESS_EXT |
1097 | /* List of functions to handle Wireless Extensions (instead of ioctl). | 1097 | /* List of functions to handle Wireless Extensions (instead of ioctl). |
1098 | * See <net/iw_handler.h> for details. Jean II */ | 1098 | * See <net/iw_handler.h> for details. Jean II */ |
1099 | const struct iw_handler_def * wireless_handlers; | 1099 | const struct iw_handler_def * wireless_handlers; |
1100 | /* Instance data managed by the core of Wireless Extensions. */ | 1100 | /* Instance data managed by the core of Wireless Extensions. */ |
1101 | struct iw_public_data * wireless_data; | 1101 | struct iw_public_data * wireless_data; |
1102 | #endif | 1102 | #endif |
1103 | /* Management operations */ | 1103 | /* Management operations */ |
1104 | const struct net_device_ops *netdev_ops; | 1104 | const struct net_device_ops *netdev_ops; |
1105 | const struct ethtool_ops *ethtool_ops; | 1105 | const struct ethtool_ops *ethtool_ops; |
1106 | 1106 | ||
1107 | /* Hardware header description */ | 1107 | /* Hardware header description */ |
1108 | const struct header_ops *header_ops; | 1108 | const struct header_ops *header_ops; |
1109 | 1109 | ||
1110 | unsigned int flags; /* interface flags (a la BSD) */ | 1110 | unsigned int flags; /* interface flags (a la BSD) */ |
1111 | unsigned int priv_flags; /* Like 'flags' but invisible to userspace. | 1111 | unsigned int priv_flags; /* Like 'flags' but invisible to userspace. |
1112 | * See if.h for definitions. */ | 1112 | * See if.h for definitions. */ |
1113 | unsigned short gflags; | 1113 | unsigned short gflags; |
1114 | unsigned short padded; /* How much padding added by alloc_netdev() */ | 1114 | unsigned short padded; /* How much padding added by alloc_netdev() */ |
1115 | 1115 | ||
1116 | unsigned char operstate; /* RFC2863 operstate */ | 1116 | unsigned char operstate; /* RFC2863 operstate */ |
1117 | unsigned char link_mode; /* mapping policy to operstate */ | 1117 | unsigned char link_mode; /* mapping policy to operstate */ |
1118 | 1118 | ||
1119 | unsigned char if_port; /* Selectable AUI, TP,..*/ | 1119 | unsigned char if_port; /* Selectable AUI, TP,..*/ |
1120 | unsigned char dma; /* DMA channel */ | 1120 | unsigned char dma; /* DMA channel */ |
1121 | 1121 | ||
1122 | unsigned int mtu; /* interface MTU value */ | 1122 | unsigned int mtu; /* interface MTU value */ |
1123 | unsigned short type; /* interface hardware type */ | 1123 | unsigned short type; /* interface hardware type */ |
1124 | unsigned short hard_header_len; /* hardware hdr length */ | 1124 | unsigned short hard_header_len; /* hardware hdr length */ |
1125 | 1125 | ||
1126 | /* extra head- and tailroom the hardware may need, but not in all cases | 1126 | /* extra head- and tailroom the hardware may need, but not in all cases |
1127 | * can this be guaranteed, especially tailroom. Some cases also use | 1127 | * can this be guaranteed, especially tailroom. Some cases also use |
1128 | * LL_MAX_HEADER instead to allocate the skb. | 1128 | * LL_MAX_HEADER instead to allocate the skb. |
1129 | */ | 1129 | */ |
1130 | unsigned short needed_headroom; | 1130 | unsigned short needed_headroom; |
1131 | unsigned short needed_tailroom; | 1131 | unsigned short needed_tailroom; |
1132 | 1132 | ||
1133 | /* Interface address info. */ | 1133 | /* Interface address info. */ |
1134 | unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ | 1134 | unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ |
1135 | unsigned char addr_assign_type; /* hw address assignment type */ | 1135 | unsigned char addr_assign_type; /* hw address assignment type */ |
1136 | unsigned char addr_len; /* hardware address length */ | 1136 | unsigned char addr_len; /* hardware address length */ |
1137 | unsigned char neigh_priv_len; | 1137 | unsigned char neigh_priv_len; |
1138 | unsigned short dev_id; /* for shared network cards */ | 1138 | unsigned short dev_id; /* for shared network cards */ |
1139 | 1139 | ||
1140 | spinlock_t addr_list_lock; | 1140 | spinlock_t addr_list_lock; |
1141 | struct netdev_hw_addr_list uc; /* Unicast mac addresses */ | 1141 | struct netdev_hw_addr_list uc; /* Unicast mac addresses */ |
1142 | struct netdev_hw_addr_list mc; /* Multicast mac addresses */ | 1142 | struct netdev_hw_addr_list mc; /* Multicast mac addresses */ |
1143 | bool uc_promisc; | 1143 | bool uc_promisc; |
1144 | unsigned int promiscuity; | 1144 | unsigned int promiscuity; |
1145 | unsigned int allmulti; | 1145 | unsigned int allmulti; |
1146 | 1146 | ||
1147 | 1147 | ||
1148 | /* Protocol specific pointers */ | 1148 | /* Protocol specific pointers */ |
1149 | 1149 | ||
1150 | #if IS_ENABLED(CONFIG_VLAN_8021Q) | 1150 | #if IS_ENABLED(CONFIG_VLAN_8021Q) |
1151 | struct vlan_info __rcu *vlan_info; /* VLAN info */ | 1151 | struct vlan_info __rcu *vlan_info; /* VLAN info */ |
1152 | #endif | 1152 | #endif |
1153 | #if IS_ENABLED(CONFIG_NET_DSA) | 1153 | #if IS_ENABLED(CONFIG_NET_DSA) |
1154 | struct dsa_switch_tree *dsa_ptr; /* dsa specific data */ | 1154 | struct dsa_switch_tree *dsa_ptr; /* dsa specific data */ |
1155 | #endif | 1155 | #endif |
1156 | void *atalk_ptr; /* AppleTalk link */ | 1156 | void *atalk_ptr; /* AppleTalk link */ |
1157 | struct in_device __rcu *ip_ptr; /* IPv4 specific data */ | 1157 | struct in_device __rcu *ip_ptr; /* IPv4 specific data */ |
1158 | struct dn_dev __rcu *dn_ptr; /* DECnet specific data */ | 1158 | struct dn_dev __rcu *dn_ptr; /* DECnet specific data */ |
1159 | struct inet6_dev __rcu *ip6_ptr; /* IPv6 specific data */ | 1159 | struct inet6_dev __rcu *ip6_ptr; /* IPv6 specific data */ |
1160 | void *ax25_ptr; /* AX.25 specific data */ | 1160 | void *ax25_ptr; /* AX.25 specific data */ |
1161 | struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data, | 1161 | struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data, |
1162 | assign before registering */ | 1162 | assign before registering */ |
1163 | 1163 | ||
1164 | /* | 1164 | /* |
1165 | * Cache lines mostly used on receive path (including eth_type_trans()) | 1165 | * Cache lines mostly used on receive path (including eth_type_trans()) |
1166 | */ | 1166 | */ |
1167 | unsigned long last_rx; /* Time of last Rx | 1167 | unsigned long last_rx; /* Time of last Rx |
1168 | * This should not be set in | 1168 | * This should not be set in |
1169 | * drivers, unless really needed, | 1169 | * drivers, unless really needed, |
1170 | * because network stack (bonding) | 1170 | * because network stack (bonding) |
1171 | * use it if/when necessary, to | 1171 | * use it if/when necessary, to |
1172 | * avoid dirtying this cache line. | 1172 | * avoid dirtying this cache line. |
1173 | */ | 1173 | */ |
1174 | 1174 | ||
1175 | struct list_head upper_dev_list; /* List of upper devices */ | 1175 | struct list_head upper_dev_list; /* List of upper devices */ |
1176 | 1176 | ||
1177 | /* Interface address info used in eth_type_trans() */ | 1177 | /* Interface address info used in eth_type_trans() */ |
1178 | unsigned char *dev_addr; /* hw address, (before bcast | 1178 | unsigned char *dev_addr; /* hw address, (before bcast |
1179 | because most packets are | 1179 | because most packets are |
1180 | unicast) */ | 1180 | unicast) */ |
1181 | 1181 | ||
1182 | struct netdev_hw_addr_list dev_addrs; /* list of device | 1182 | struct netdev_hw_addr_list dev_addrs; /* list of device |
1183 | hw addresses */ | 1183 | hw addresses */ |
1184 | 1184 | ||
1185 | unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ | 1185 | unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ |
1186 | 1186 | ||
1187 | #ifdef CONFIG_SYSFS | 1187 | #ifdef CONFIG_SYSFS |
1188 | struct kset *queues_kset; | 1188 | struct kset *queues_kset; |
1189 | #endif | 1189 | #endif |
1190 | 1190 | ||
1191 | #ifdef CONFIG_RPS | 1191 | #ifdef CONFIG_RPS |
1192 | struct netdev_rx_queue *_rx; | 1192 | struct netdev_rx_queue *_rx; |
1193 | 1193 | ||
1194 | /* Number of RX queues allocated at register_netdev() time */ | 1194 | /* Number of RX queues allocated at register_netdev() time */ |
1195 | unsigned int num_rx_queues; | 1195 | unsigned int num_rx_queues; |
1196 | 1196 | ||
1197 | /* Number of RX queues currently active in device */ | 1197 | /* Number of RX queues currently active in device */ |
1198 | unsigned int real_num_rx_queues; | 1198 | unsigned int real_num_rx_queues; |
1199 | 1199 | ||
1200 | #ifdef CONFIG_RFS_ACCEL | 1200 | #ifdef CONFIG_RFS_ACCEL |
1201 | /* CPU reverse-mapping for RX completion interrupts, indexed | 1201 | /* CPU reverse-mapping for RX completion interrupts, indexed |
1202 | * by RX queue number. Assigned by driver. This must only be | 1202 | * by RX queue number. Assigned by driver. This must only be |
1203 | * set if the ndo_rx_flow_steer operation is defined. */ | 1203 | * set if the ndo_rx_flow_steer operation is defined. */ |
1204 | struct cpu_rmap *rx_cpu_rmap; | 1204 | struct cpu_rmap *rx_cpu_rmap; |
1205 | #endif | 1205 | #endif |
1206 | #endif | 1206 | #endif |
1207 | 1207 | ||
1208 | rx_handler_func_t __rcu *rx_handler; | 1208 | rx_handler_func_t __rcu *rx_handler; |
1209 | void __rcu *rx_handler_data; | 1209 | void __rcu *rx_handler_data; |
1210 | 1210 | ||
1211 | struct netdev_queue __rcu *ingress_queue; | 1211 | struct netdev_queue __rcu *ingress_queue; |
1212 | 1212 | ||
1213 | /* | 1213 | /* |
1214 | * Cache lines mostly used on transmit path | 1214 | * Cache lines mostly used on transmit path |
1215 | */ | 1215 | */ |
1216 | struct netdev_queue *_tx ____cacheline_aligned_in_smp; | 1216 | struct netdev_queue *_tx ____cacheline_aligned_in_smp; |
1217 | 1217 | ||
1218 | /* Number of TX queues allocated at alloc_netdev_mq() time */ | 1218 | /* Number of TX queues allocated at alloc_netdev_mq() time */ |
1219 | unsigned int num_tx_queues; | 1219 | unsigned int num_tx_queues; |
1220 | 1220 | ||
1221 | /* Number of TX queues currently active in device */ | 1221 | /* Number of TX queues currently active in device */ |
1222 | unsigned int real_num_tx_queues; | 1222 | unsigned int real_num_tx_queues; |
1223 | 1223 | ||
1224 | /* root qdisc from userspace point of view */ | 1224 | /* root qdisc from userspace point of view */ |
1225 | struct Qdisc *qdisc; | 1225 | struct Qdisc *qdisc; |
1226 | 1226 | ||
1227 | unsigned long tx_queue_len; /* Max frames per queue allowed */ | 1227 | unsigned long tx_queue_len; /* Max frames per queue allowed */ |
1228 | spinlock_t tx_global_lock; | 1228 | spinlock_t tx_global_lock; |
1229 | 1229 | ||
1230 | #ifdef CONFIG_XPS | 1230 | #ifdef CONFIG_XPS |
1231 | struct xps_dev_maps __rcu *xps_maps; | 1231 | struct xps_dev_maps __rcu *xps_maps; |
1232 | #endif | 1232 | #endif |
1233 | 1233 | ||
1234 | /* These may be needed for future network-power-down code. */ | 1234 | /* These may be needed for future network-power-down code. */ |
1235 | 1235 | ||
1236 | /* | 1236 | /* |
1237 | * trans_start here is expensive for high speed devices on SMP, | 1237 | * trans_start here is expensive for high speed devices on SMP, |
1238 | * please use netdev_queue->trans_start instead. | 1238 | * please use netdev_queue->trans_start instead. |
1239 | */ | 1239 | */ |
1240 | unsigned long trans_start; /* Time (in jiffies) of last Tx */ | 1240 | unsigned long trans_start; /* Time (in jiffies) of last Tx */ |
1241 | 1241 | ||
1242 | int watchdog_timeo; /* used by dev_watchdog() */ | 1242 | int watchdog_timeo; /* used by dev_watchdog() */ |
1243 | struct timer_list watchdog_timer; | 1243 | struct timer_list watchdog_timer; |
1244 | 1244 | ||
1245 | /* Number of references to this device */ | 1245 | /* Number of references to this device */ |
1246 | int __percpu *pcpu_refcnt; | 1246 | int __percpu *pcpu_refcnt; |
1247 | 1247 | ||
1248 | /* delayed register/unregister */ | 1248 | /* delayed register/unregister */ |
1249 | struct list_head todo_list; | 1249 | struct list_head todo_list; |
1250 | /* device index hash chain */ | 1250 | /* device index hash chain */ |
1251 | struct hlist_node index_hlist; | 1251 | struct hlist_node index_hlist; |
1252 | 1252 | ||
1253 | struct list_head link_watch_list; | 1253 | struct list_head link_watch_list; |
1254 | 1254 | ||
1255 | /* register/unregister state machine */ | 1255 | /* register/unregister state machine */ |
1256 | enum { NETREG_UNINITIALIZED=0, | 1256 | enum { NETREG_UNINITIALIZED=0, |
1257 | NETREG_REGISTERED, /* completed register_netdevice */ | 1257 | NETREG_REGISTERED, /* completed register_netdevice */ |
1258 | NETREG_UNREGISTERING, /* called unregister_netdevice */ | 1258 | NETREG_UNREGISTERING, /* called unregister_netdevice */ |
1259 | NETREG_UNREGISTERED, /* completed unregister todo */ | 1259 | NETREG_UNREGISTERED, /* completed unregister todo */ |
1260 | NETREG_RELEASED, /* called free_netdev */ | 1260 | NETREG_RELEASED, /* called free_netdev */ |
1261 | NETREG_DUMMY, /* dummy device for NAPI poll */ | 1261 | NETREG_DUMMY, /* dummy device for NAPI poll */ |
1262 | } reg_state:8; | 1262 | } reg_state:8; |
1263 | 1263 | ||
1264 | bool dismantle; /* device is going do be freed */ | 1264 | bool dismantle; /* device is going do be freed */ |
1265 | 1265 | ||
1266 | enum { | 1266 | enum { |
1267 | RTNL_LINK_INITIALIZED, | 1267 | RTNL_LINK_INITIALIZED, |
1268 | RTNL_LINK_INITIALIZING, | 1268 | RTNL_LINK_INITIALIZING, |
1269 | } rtnl_link_state:16; | 1269 | } rtnl_link_state:16; |
1270 | 1270 | ||
1271 | /* Called from unregister, can be used to call free_netdev */ | 1271 | /* Called from unregister, can be used to call free_netdev */ |
1272 | void (*destructor)(struct net_device *dev); | 1272 | void (*destructor)(struct net_device *dev); |
1273 | 1273 | ||
1274 | #ifdef CONFIG_NETPOLL | 1274 | #ifdef CONFIG_NETPOLL |
1275 | struct netpoll_info __rcu *npinfo; | 1275 | struct netpoll_info __rcu *npinfo; |
1276 | #endif | 1276 | #endif |
1277 | 1277 | ||
1278 | #ifdef CONFIG_NET_NS | 1278 | #ifdef CONFIG_NET_NS |
1279 | /* Network namespace this network device is inside */ | 1279 | /* Network namespace this network device is inside */ |
1280 | struct net *nd_net; | 1280 | struct net *nd_net; |
1281 | #endif | 1281 | #endif |
1282 | 1282 | ||
1283 | /* mid-layer private */ | 1283 | /* mid-layer private */ |
1284 | union { | 1284 | union { |
1285 | void *ml_priv; | 1285 | void *ml_priv; |
1286 | struct pcpu_lstats __percpu *lstats; /* loopback stats */ | 1286 | struct pcpu_lstats __percpu *lstats; /* loopback stats */ |
1287 | struct pcpu_tstats __percpu *tstats; /* tunnel stats */ | 1287 | struct pcpu_tstats __percpu *tstats; /* tunnel stats */ |
1288 | struct pcpu_dstats __percpu *dstats; /* dummy stats */ | 1288 | struct pcpu_dstats __percpu *dstats; /* dummy stats */ |
1289 | struct pcpu_vstats __percpu *vstats; /* veth stats */ | 1289 | struct pcpu_vstats __percpu *vstats; /* veth stats */ |
1290 | }; | 1290 | }; |
1291 | /* GARP */ | 1291 | /* GARP */ |
1292 | struct garp_port __rcu *garp_port; | 1292 | struct garp_port __rcu *garp_port; |
1293 | 1293 | ||
1294 | /* class/net/name entry */ | 1294 | /* class/net/name entry */ |
1295 | struct device dev; | 1295 | struct device dev; |
1296 | /* space for optional device, statistics, and wireless sysfs groups */ | 1296 | /* space for optional device, statistics, and wireless sysfs groups */ |
1297 | const struct attribute_group *sysfs_groups[4]; | 1297 | const struct attribute_group *sysfs_groups[4]; |
1298 | 1298 | ||
1299 | /* rtnetlink link ops */ | 1299 | /* rtnetlink link ops */ |
1300 | const struct rtnl_link_ops *rtnl_link_ops; | 1300 | const struct rtnl_link_ops *rtnl_link_ops; |
1301 | 1301 | ||
1302 | /* for setting kernel sock attribute on TCP connection setup */ | 1302 | /* for setting kernel sock attribute on TCP connection setup */ |
1303 | #define GSO_MAX_SIZE 65536 | 1303 | #define GSO_MAX_SIZE 65536 |
1304 | unsigned int gso_max_size; | 1304 | unsigned int gso_max_size; |
1305 | #define GSO_MAX_SEGS 65535 | 1305 | #define GSO_MAX_SEGS 65535 |
1306 | u16 gso_max_segs; | 1306 | u16 gso_max_segs; |
1307 | 1307 | ||
1308 | #ifdef CONFIG_DCB | 1308 | #ifdef CONFIG_DCB |
1309 | /* Data Center Bridging netlink ops */ | 1309 | /* Data Center Bridging netlink ops */ |
1310 | const struct dcbnl_rtnl_ops *dcbnl_ops; | 1310 | const struct dcbnl_rtnl_ops *dcbnl_ops; |
1311 | #endif | 1311 | #endif |
1312 | u8 num_tc; | 1312 | u8 num_tc; |
1313 | struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; | 1313 | struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; |
1314 | u8 prio_tc_map[TC_BITMASK + 1]; | 1314 | u8 prio_tc_map[TC_BITMASK + 1]; |
1315 | 1315 | ||
1316 | #if IS_ENABLED(CONFIG_FCOE) | 1316 | #if IS_ENABLED(CONFIG_FCOE) |
1317 | /* max exchange id for FCoE LRO by ddp */ | 1317 | /* max exchange id for FCoE LRO by ddp */ |
1318 | unsigned int fcoe_ddp_xid; | 1318 | unsigned int fcoe_ddp_xid; |
1319 | #endif | 1319 | #endif |
1320 | #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) | 1320 | #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) |
1321 | struct netprio_map __rcu *priomap; | 1321 | struct netprio_map __rcu *priomap; |
1322 | #endif | 1322 | #endif |
1323 | /* phy device may attach itself for hardware timestamping */ | 1323 | /* phy device may attach itself for hardware timestamping */ |
1324 | struct phy_device *phydev; | 1324 | struct phy_device *phydev; |
1325 | 1325 | ||
1326 | struct lock_class_key *qdisc_tx_busylock; | 1326 | struct lock_class_key *qdisc_tx_busylock; |
1327 | 1327 | ||
1328 | /* group the device belongs to */ | 1328 | /* group the device belongs to */ |
1329 | int group; | 1329 | int group; |
1330 | 1330 | ||
1331 | struct pm_qos_request pm_qos_req; | 1331 | struct pm_qos_request pm_qos_req; |
1332 | }; | 1332 | }; |
1333 | #define to_net_dev(d) container_of(d, struct net_device, dev) | 1333 | #define to_net_dev(d) container_of(d, struct net_device, dev) |
1334 | 1334 | ||
1335 | #define NETDEV_ALIGN 32 | 1335 | #define NETDEV_ALIGN 32 |
1336 | 1336 | ||
1337 | static inline | 1337 | static inline |
1338 | int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio) | 1338 | int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio) |
1339 | { | 1339 | { |
1340 | return dev->prio_tc_map[prio & TC_BITMASK]; | 1340 | return dev->prio_tc_map[prio & TC_BITMASK]; |
1341 | } | 1341 | } |
1342 | 1342 | ||
1343 | static inline | 1343 | static inline |
1344 | int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) | 1344 | int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) |
1345 | { | 1345 | { |
1346 | if (tc >= dev->num_tc) | 1346 | if (tc >= dev->num_tc) |
1347 | return -EINVAL; | 1347 | return -EINVAL; |
1348 | 1348 | ||
1349 | dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK; | 1349 | dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK; |
1350 | return 0; | 1350 | return 0; |
1351 | } | 1351 | } |
1352 | 1352 | ||
1353 | static inline | 1353 | static inline |
1354 | void netdev_reset_tc(struct net_device *dev) | 1354 | void netdev_reset_tc(struct net_device *dev) |
1355 | { | 1355 | { |
1356 | dev->num_tc = 0; | 1356 | dev->num_tc = 0; |
1357 | memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); | 1357 | memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); |
1358 | memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); | 1358 | memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); |
1359 | } | 1359 | } |
1360 | 1360 | ||
1361 | static inline | 1361 | static inline |
1362 | int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) | 1362 | int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) |
1363 | { | 1363 | { |
1364 | if (tc >= dev->num_tc) | 1364 | if (tc >= dev->num_tc) |
1365 | return -EINVAL; | 1365 | return -EINVAL; |
1366 | 1366 | ||
1367 | dev->tc_to_txq[tc].count = count; | 1367 | dev->tc_to_txq[tc].count = count; |
1368 | dev->tc_to_txq[tc].offset = offset; | 1368 | dev->tc_to_txq[tc].offset = offset; |
1369 | return 0; | 1369 | return 0; |
1370 | } | 1370 | } |
1371 | 1371 | ||
1372 | static inline | 1372 | static inline |
1373 | int netdev_set_num_tc(struct net_device *dev, u8 num_tc) | 1373 | int netdev_set_num_tc(struct net_device *dev, u8 num_tc) |
1374 | { | 1374 | { |
1375 | if (num_tc > TC_MAX_QUEUE) | 1375 | if (num_tc > TC_MAX_QUEUE) |
1376 | return -EINVAL; | 1376 | return -EINVAL; |
1377 | 1377 | ||
1378 | dev->num_tc = num_tc; | 1378 | dev->num_tc = num_tc; |
1379 | return 0; | 1379 | return 0; |
1380 | } | 1380 | } |
1381 | 1381 | ||
1382 | static inline | 1382 | static inline |
1383 | int netdev_get_num_tc(struct net_device *dev) | 1383 | int netdev_get_num_tc(struct net_device *dev) |
1384 | { | 1384 | { |
1385 | return dev->num_tc; | 1385 | return dev->num_tc; |
1386 | } | 1386 | } |
1387 | 1387 | ||
1388 | static inline | 1388 | static inline |
1389 | struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, | 1389 | struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, |
1390 | unsigned int index) | 1390 | unsigned int index) |
1391 | { | 1391 | { |
1392 | return &dev->_tx[index]; | 1392 | return &dev->_tx[index]; |
1393 | } | 1393 | } |
1394 | 1394 | ||
1395 | static inline void netdev_for_each_tx_queue(struct net_device *dev, | 1395 | static inline void netdev_for_each_tx_queue(struct net_device *dev, |
1396 | void (*f)(struct net_device *, | 1396 | void (*f)(struct net_device *, |
1397 | struct netdev_queue *, | 1397 | struct netdev_queue *, |
1398 | void *), | 1398 | void *), |
1399 | void *arg) | 1399 | void *arg) |
1400 | { | 1400 | { |
1401 | unsigned int i; | 1401 | unsigned int i; |
1402 | 1402 | ||
1403 | for (i = 0; i < dev->num_tx_queues; i++) | 1403 | for (i = 0; i < dev->num_tx_queues; i++) |
1404 | f(dev, &dev->_tx[i], arg); | 1404 | f(dev, &dev->_tx[i], arg); |
1405 | } | 1405 | } |
1406 | 1406 | ||
1407 | extern struct netdev_queue *netdev_pick_tx(struct net_device *dev, | 1407 | extern struct netdev_queue *netdev_pick_tx(struct net_device *dev, |
1408 | struct sk_buff *skb); | 1408 | struct sk_buff *skb); |
1409 | extern u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb); | 1409 | extern u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb); |
1410 | 1410 | ||
1411 | /* | 1411 | /* |
1412 | * Net namespace inlines | 1412 | * Net namespace inlines |
1413 | */ | 1413 | */ |
1414 | static inline | 1414 | static inline |
1415 | struct net *dev_net(const struct net_device *dev) | 1415 | struct net *dev_net(const struct net_device *dev) |
1416 | { | 1416 | { |
1417 | return read_pnet(&dev->nd_net); | 1417 | return read_pnet(&dev->nd_net); |
1418 | } | 1418 | } |
1419 | 1419 | ||
1420 | static inline | 1420 | static inline |
1421 | void dev_net_set(struct net_device *dev, struct net *net) | 1421 | void dev_net_set(struct net_device *dev, struct net *net) |
1422 | { | 1422 | { |
1423 | #ifdef CONFIG_NET_NS | 1423 | #ifdef CONFIG_NET_NS |
1424 | release_net(dev->nd_net); | 1424 | release_net(dev->nd_net); |
1425 | dev->nd_net = hold_net(net); | 1425 | dev->nd_net = hold_net(net); |
1426 | #endif | 1426 | #endif |
1427 | } | 1427 | } |
1428 | 1428 | ||
1429 | static inline bool netdev_uses_dsa_tags(struct net_device *dev) | 1429 | static inline bool netdev_uses_dsa_tags(struct net_device *dev) |
1430 | { | 1430 | { |
1431 | #ifdef CONFIG_NET_DSA_TAG_DSA | 1431 | #ifdef CONFIG_NET_DSA_TAG_DSA |
1432 | if (dev->dsa_ptr != NULL) | 1432 | if (dev->dsa_ptr != NULL) |
1433 | return dsa_uses_dsa_tags(dev->dsa_ptr); | 1433 | return dsa_uses_dsa_tags(dev->dsa_ptr); |
1434 | #endif | 1434 | #endif |
1435 | 1435 | ||
1436 | return 0; | 1436 | return 0; |
1437 | } | 1437 | } |
1438 | 1438 | ||
1439 | static inline bool netdev_uses_trailer_tags(struct net_device *dev) | 1439 | static inline bool netdev_uses_trailer_tags(struct net_device *dev) |
1440 | { | 1440 | { |
1441 | #ifdef CONFIG_NET_DSA_TAG_TRAILER | 1441 | #ifdef CONFIG_NET_DSA_TAG_TRAILER |
1442 | if (dev->dsa_ptr != NULL) | 1442 | if (dev->dsa_ptr != NULL) |
1443 | return dsa_uses_trailer_tags(dev->dsa_ptr); | 1443 | return dsa_uses_trailer_tags(dev->dsa_ptr); |
1444 | #endif | 1444 | #endif |
1445 | 1445 | ||
1446 | return 0; | 1446 | return 0; |
1447 | } | 1447 | } |
1448 | 1448 | ||
1449 | /** | 1449 | /** |
1450 | * netdev_priv - access network device private data | 1450 | * netdev_priv - access network device private data |
1451 | * @dev: network device | 1451 | * @dev: network device |
1452 | * | 1452 | * |
1453 | * Get network device private data | 1453 | * Get network device private data |
1454 | */ | 1454 | */ |
1455 | static inline void *netdev_priv(const struct net_device *dev) | 1455 | static inline void *netdev_priv(const struct net_device *dev) |
1456 | { | 1456 | { |
1457 | return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN); | 1457 | return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN); |
1458 | } | 1458 | } |
1459 | 1459 | ||
1460 | /* Set the sysfs physical device reference for the network logical device | 1460 | /* Set the sysfs physical device reference for the network logical device |
1461 | * if set prior to registration will cause a symlink during initialization. | 1461 | * if set prior to registration will cause a symlink during initialization. |
1462 | */ | 1462 | */ |
1463 | #define SET_NETDEV_DEV(net, pdev) ((net)->dev.parent = (pdev)) | 1463 | #define SET_NETDEV_DEV(net, pdev) ((net)->dev.parent = (pdev)) |
1464 | 1464 | ||
1465 | /* Set the sysfs device type for the network logical device to allow | 1465 | /* Set the sysfs device type for the network logical device to allow |
1466 | * fin grained indentification of different network device types. For | 1466 | * fin grained indentification of different network device types. For |
1467 | * example Ethernet, Wirelss LAN, Bluetooth, WiMAX etc. | 1467 | * example Ethernet, Wirelss LAN, Bluetooth, WiMAX etc. |
1468 | */ | 1468 | */ |
1469 | #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype)) | 1469 | #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype)) |
1470 | 1470 | ||
1471 | /** | 1471 | /** |
1472 | * netif_napi_add - initialize a napi context | 1472 | * netif_napi_add - initialize a napi context |
1473 | * @dev: network device | 1473 | * @dev: network device |
1474 | * @napi: napi context | 1474 | * @napi: napi context |
1475 | * @poll: polling function | 1475 | * @poll: polling function |
1476 | * @weight: default weight | 1476 | * @weight: default weight |
1477 | * | 1477 | * |
1478 | * netif_napi_add() must be used to initialize a napi context prior to calling | 1478 | * netif_napi_add() must be used to initialize a napi context prior to calling |
1479 | * *any* of the other napi related functions. | 1479 | * *any* of the other napi related functions. |
1480 | */ | 1480 | */ |
1481 | void netif_napi_add(struct net_device *dev, struct napi_struct *napi, | 1481 | void netif_napi_add(struct net_device *dev, struct napi_struct *napi, |
1482 | int (*poll)(struct napi_struct *, int), int weight); | 1482 | int (*poll)(struct napi_struct *, int), int weight); |
1483 | 1483 | ||
1484 | /** | 1484 | /** |
1485 | * netif_napi_del - remove a napi context | 1485 | * netif_napi_del - remove a napi context |
1486 | * @napi: napi context | 1486 | * @napi: napi context |
1487 | * | 1487 | * |
1488 | * netif_napi_del() removes a napi context from the network device napi list | 1488 | * netif_napi_del() removes a napi context from the network device napi list |
1489 | */ | 1489 | */ |
1490 | void netif_napi_del(struct napi_struct *napi); | 1490 | void netif_napi_del(struct napi_struct *napi); |
1491 | 1491 | ||
1492 | struct napi_gro_cb { | 1492 | struct napi_gro_cb { |
1493 | /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ | 1493 | /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ |
1494 | void *frag0; | 1494 | void *frag0; |
1495 | 1495 | ||
1496 | /* Length of frag0. */ | 1496 | /* Length of frag0. */ |
1497 | unsigned int frag0_len; | 1497 | unsigned int frag0_len; |
1498 | 1498 | ||
1499 | /* This indicates where we are processing relative to skb->data. */ | 1499 | /* This indicates where we are processing relative to skb->data. */ |
1500 | int data_offset; | 1500 | int data_offset; |
1501 | 1501 | ||
1502 | /* This is non-zero if the packet cannot be merged with the new skb. */ | 1502 | /* This is non-zero if the packet cannot be merged with the new skb. */ |
1503 | int flush; | 1503 | int flush; |
1504 | 1504 | ||
1505 | /* Number of segments aggregated. */ | 1505 | /* Number of segments aggregated. */ |
1506 | u16 count; | 1506 | u16 count; |
1507 | 1507 | ||
1508 | /* This is non-zero if the packet may be of the same flow. */ | 1508 | /* This is non-zero if the packet may be of the same flow. */ |
1509 | u8 same_flow; | 1509 | u8 same_flow; |
1510 | 1510 | ||
1511 | /* Free the skb? */ | 1511 | /* Free the skb? */ |
1512 | u8 free; | 1512 | u8 free; |
1513 | #define NAPI_GRO_FREE 1 | 1513 | #define NAPI_GRO_FREE 1 |
1514 | #define NAPI_GRO_FREE_STOLEN_HEAD 2 | 1514 | #define NAPI_GRO_FREE_STOLEN_HEAD 2 |
1515 | 1515 | ||
1516 | /* jiffies when first packet was created/queued */ | 1516 | /* jiffies when first packet was created/queued */ |
1517 | unsigned long age; | 1517 | unsigned long age; |
1518 | 1518 | ||
1519 | /* Used in ipv6_gro_receive() */ | 1519 | /* Used in ipv6_gro_receive() */ |
1520 | int proto; | 1520 | int proto; |
1521 | 1521 | ||
1522 | /* used in skb_gro_receive() slow path */ | 1522 | /* used in skb_gro_receive() slow path */ |
1523 | struct sk_buff *last; | 1523 | struct sk_buff *last; |
1524 | }; | 1524 | }; |
1525 | 1525 | ||
1526 | #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) | 1526 | #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) |
1527 | 1527 | ||
1528 | struct packet_type { | 1528 | struct packet_type { |
1529 | __be16 type; /* This is really htons(ether_type). */ | 1529 | __be16 type; /* This is really htons(ether_type). */ |
1530 | struct net_device *dev; /* NULL is wildcarded here */ | 1530 | struct net_device *dev; /* NULL is wildcarded here */ |
1531 | int (*func) (struct sk_buff *, | 1531 | int (*func) (struct sk_buff *, |
1532 | struct net_device *, | 1532 | struct net_device *, |
1533 | struct packet_type *, | 1533 | struct packet_type *, |
1534 | struct net_device *); | 1534 | struct net_device *); |
1535 | bool (*id_match)(struct packet_type *ptype, | 1535 | bool (*id_match)(struct packet_type *ptype, |
1536 | struct sock *sk); | 1536 | struct sock *sk); |
1537 | void *af_packet_priv; | 1537 | void *af_packet_priv; |
1538 | struct list_head list; | 1538 | struct list_head list; |
1539 | }; | 1539 | }; |
1540 | 1540 | ||
1541 | struct offload_callbacks { | 1541 | struct offload_callbacks { |
1542 | struct sk_buff *(*gso_segment)(struct sk_buff *skb, | 1542 | struct sk_buff *(*gso_segment)(struct sk_buff *skb, |
1543 | netdev_features_t features); | 1543 | netdev_features_t features); |
1544 | int (*gso_send_check)(struct sk_buff *skb); | 1544 | int (*gso_send_check)(struct sk_buff *skb); |
1545 | struct sk_buff **(*gro_receive)(struct sk_buff **head, | 1545 | struct sk_buff **(*gro_receive)(struct sk_buff **head, |
1546 | struct sk_buff *skb); | 1546 | struct sk_buff *skb); |
1547 | int (*gro_complete)(struct sk_buff *skb); | 1547 | int (*gro_complete)(struct sk_buff *skb); |
1548 | }; | 1548 | }; |
1549 | 1549 | ||
1550 | struct packet_offload { | 1550 | struct packet_offload { |
1551 | __be16 type; /* This is really htons(ether_type). */ | 1551 | __be16 type; /* This is really htons(ether_type). */ |
1552 | struct offload_callbacks callbacks; | 1552 | struct offload_callbacks callbacks; |
1553 | struct list_head list; | 1553 | struct list_head list; |
1554 | }; | 1554 | }; |
1555 | 1555 | ||
1556 | #include <linux/notifier.h> | 1556 | #include <linux/notifier.h> |
1557 | 1557 | ||
1558 | /* netdevice notifier chain. Please remember to update the rtnetlink | 1558 | /* netdevice notifier chain. Please remember to update the rtnetlink |
1559 | * notification exclusion list in rtnetlink_event() when adding new | 1559 | * notification exclusion list in rtnetlink_event() when adding new |
1560 | * types. | 1560 | * types. |
1561 | */ | 1561 | */ |
1562 | #define NETDEV_UP 0x0001 /* For now you can't veto a device up/down */ | 1562 | #define NETDEV_UP 0x0001 /* For now you can't veto a device up/down */ |
1563 | #define NETDEV_DOWN 0x0002 | 1563 | #define NETDEV_DOWN 0x0002 |
1564 | #define NETDEV_REBOOT 0x0003 /* Tell a protocol stack a network interface | 1564 | #define NETDEV_REBOOT 0x0003 /* Tell a protocol stack a network interface |
1565 | detected a hardware crash and restarted | 1565 | detected a hardware crash and restarted |
1566 | - we can use this eg to kick tcp sessions | 1566 | - we can use this eg to kick tcp sessions |
1567 | once done */ | 1567 | once done */ |
1568 | #define NETDEV_CHANGE 0x0004 /* Notify device state change */ | 1568 | #define NETDEV_CHANGE 0x0004 /* Notify device state change */ |
1569 | #define NETDEV_REGISTER 0x0005 | 1569 | #define NETDEV_REGISTER 0x0005 |
1570 | #define NETDEV_UNREGISTER 0x0006 | 1570 | #define NETDEV_UNREGISTER 0x0006 |
1571 | #define NETDEV_CHANGEMTU 0x0007 | 1571 | #define NETDEV_CHANGEMTU 0x0007 |
1572 | #define NETDEV_CHANGEADDR 0x0008 | 1572 | #define NETDEV_CHANGEADDR 0x0008 |
1573 | #define NETDEV_GOING_DOWN 0x0009 | 1573 | #define NETDEV_GOING_DOWN 0x0009 |
1574 | #define NETDEV_CHANGENAME 0x000A | 1574 | #define NETDEV_CHANGENAME 0x000A |
1575 | #define NETDEV_FEAT_CHANGE 0x000B | 1575 | #define NETDEV_FEAT_CHANGE 0x000B |
1576 | #define NETDEV_BONDING_FAILOVER 0x000C | 1576 | #define NETDEV_BONDING_FAILOVER 0x000C |
1577 | #define NETDEV_PRE_UP 0x000D | 1577 | #define NETDEV_PRE_UP 0x000D |
1578 | #define NETDEV_PRE_TYPE_CHANGE 0x000E | 1578 | #define NETDEV_PRE_TYPE_CHANGE 0x000E |
1579 | #define NETDEV_POST_TYPE_CHANGE 0x000F | 1579 | #define NETDEV_POST_TYPE_CHANGE 0x000F |
1580 | #define NETDEV_POST_INIT 0x0010 | 1580 | #define NETDEV_POST_INIT 0x0010 |
1581 | #define NETDEV_UNREGISTER_FINAL 0x0011 | 1581 | #define NETDEV_UNREGISTER_FINAL 0x0011 |
1582 | #define NETDEV_RELEASE 0x0012 | 1582 | #define NETDEV_RELEASE 0x0012 |
1583 | #define NETDEV_NOTIFY_PEERS 0x0013 | 1583 | #define NETDEV_NOTIFY_PEERS 0x0013 |
1584 | #define NETDEV_JOIN 0x0014 | 1584 | #define NETDEV_JOIN 0x0014 |
1585 | 1585 | ||
1586 | extern int register_netdevice_notifier(struct notifier_block *nb); | 1586 | extern int register_netdevice_notifier(struct notifier_block *nb); |
1587 | extern int unregister_netdevice_notifier(struct notifier_block *nb); | 1587 | extern int unregister_netdevice_notifier(struct notifier_block *nb); |
1588 | extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); | 1588 | extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); |
1589 | 1589 | ||
1590 | 1590 | ||
1591 | extern rwlock_t dev_base_lock; /* Device list lock */ | 1591 | extern rwlock_t dev_base_lock; /* Device list lock */ |
1592 | 1592 | ||
1593 | extern seqcount_t devnet_rename_seq; /* Device rename seq */ | 1593 | extern seqcount_t devnet_rename_seq; /* Device rename seq */ |
1594 | 1594 | ||
1595 | 1595 | ||
1596 | #define for_each_netdev(net, d) \ | 1596 | #define for_each_netdev(net, d) \ |
1597 | list_for_each_entry(d, &(net)->dev_base_head, dev_list) | 1597 | list_for_each_entry(d, &(net)->dev_base_head, dev_list) |
1598 | #define for_each_netdev_reverse(net, d) \ | 1598 | #define for_each_netdev_reverse(net, d) \ |
1599 | list_for_each_entry_reverse(d, &(net)->dev_base_head, dev_list) | 1599 | list_for_each_entry_reverse(d, &(net)->dev_base_head, dev_list) |
1600 | #define for_each_netdev_rcu(net, d) \ | 1600 | #define for_each_netdev_rcu(net, d) \ |
1601 | list_for_each_entry_rcu(d, &(net)->dev_base_head, dev_list) | 1601 | list_for_each_entry_rcu(d, &(net)->dev_base_head, dev_list) |
1602 | #define for_each_netdev_safe(net, d, n) \ | 1602 | #define for_each_netdev_safe(net, d, n) \ |
1603 | list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) | 1603 | list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) |
1604 | #define for_each_netdev_continue(net, d) \ | 1604 | #define for_each_netdev_continue(net, d) \ |
1605 | list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) | 1605 | list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) |
1606 | #define for_each_netdev_continue_rcu(net, d) \ | 1606 | #define for_each_netdev_continue_rcu(net, d) \ |
1607 | list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) | 1607 | list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) |
1608 | #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) | 1608 | #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) |
1609 | 1609 | ||
1610 | static inline struct net_device *next_net_device(struct net_device *dev) | 1610 | static inline struct net_device *next_net_device(struct net_device *dev) |
1611 | { | 1611 | { |
1612 | struct list_head *lh; | 1612 | struct list_head *lh; |
1613 | struct net *net; | 1613 | struct net *net; |
1614 | 1614 | ||
1615 | net = dev_net(dev); | 1615 | net = dev_net(dev); |
1616 | lh = dev->dev_list.next; | 1616 | lh = dev->dev_list.next; |
1617 | return lh == &net->dev_base_head ? NULL : net_device_entry(lh); | 1617 | return lh == &net->dev_base_head ? NULL : net_device_entry(lh); |
1618 | } | 1618 | } |
1619 | 1619 | ||
1620 | static inline struct net_device *next_net_device_rcu(struct net_device *dev) | 1620 | static inline struct net_device *next_net_device_rcu(struct net_device *dev) |
1621 | { | 1621 | { |
1622 | struct list_head *lh; | 1622 | struct list_head *lh; |
1623 | struct net *net; | 1623 | struct net *net; |
1624 | 1624 | ||
1625 | net = dev_net(dev); | 1625 | net = dev_net(dev); |
1626 | lh = rcu_dereference(list_next_rcu(&dev->dev_list)); | 1626 | lh = rcu_dereference(list_next_rcu(&dev->dev_list)); |
1627 | return lh == &net->dev_base_head ? NULL : net_device_entry(lh); | 1627 | return lh == &net->dev_base_head ? NULL : net_device_entry(lh); |
1628 | } | 1628 | } |
1629 | 1629 | ||
1630 | static inline struct net_device *first_net_device(struct net *net) | 1630 | static inline struct net_device *first_net_device(struct net *net) |
1631 | { | 1631 | { |
1632 | return list_empty(&net->dev_base_head) ? NULL : | 1632 | return list_empty(&net->dev_base_head) ? NULL : |
1633 | net_device_entry(net->dev_base_head.next); | 1633 | net_device_entry(net->dev_base_head.next); |
1634 | } | 1634 | } |
1635 | 1635 | ||
1636 | static inline struct net_device *first_net_device_rcu(struct net *net) | 1636 | static inline struct net_device *first_net_device_rcu(struct net *net) |
1637 | { | 1637 | { |
1638 | struct list_head *lh = rcu_dereference(list_next_rcu(&net->dev_base_head)); | 1638 | struct list_head *lh = rcu_dereference(list_next_rcu(&net->dev_base_head)); |
1639 | 1639 | ||
1640 | return lh == &net->dev_base_head ? NULL : net_device_entry(lh); | 1640 | return lh == &net->dev_base_head ? NULL : net_device_entry(lh); |
1641 | } | 1641 | } |
1642 | 1642 | ||
1643 | extern int netdev_boot_setup_check(struct net_device *dev); | 1643 | extern int netdev_boot_setup_check(struct net_device *dev); |
1644 | extern unsigned long netdev_boot_base(const char *prefix, int unit); | 1644 | extern unsigned long netdev_boot_base(const char *prefix, int unit); |
1645 | extern struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, | 1645 | extern struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, |
1646 | const char *hwaddr); | 1646 | const char *hwaddr); |
1647 | extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); | 1647 | extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); |
1648 | extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type); | 1648 | extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type); |
1649 | extern void dev_add_pack(struct packet_type *pt); | 1649 | extern void dev_add_pack(struct packet_type *pt); |
1650 | extern void dev_remove_pack(struct packet_type *pt); | 1650 | extern void dev_remove_pack(struct packet_type *pt); |
1651 | extern void __dev_remove_pack(struct packet_type *pt); | 1651 | extern void __dev_remove_pack(struct packet_type *pt); |
1652 | extern void dev_add_offload(struct packet_offload *po); | 1652 | extern void dev_add_offload(struct packet_offload *po); |
1653 | extern void dev_remove_offload(struct packet_offload *po); | 1653 | extern void dev_remove_offload(struct packet_offload *po); |
1654 | extern void __dev_remove_offload(struct packet_offload *po); | 1654 | extern void __dev_remove_offload(struct packet_offload *po); |
1655 | 1655 | ||
1656 | extern struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, | 1656 | extern struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, |
1657 | unsigned short mask); | 1657 | unsigned short mask); |
1658 | extern struct net_device *dev_get_by_name(struct net *net, const char *name); | 1658 | extern struct net_device *dev_get_by_name(struct net *net, const char *name); |
1659 | extern struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); | 1659 | extern struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); |
1660 | extern struct net_device *__dev_get_by_name(struct net *net, const char *name); | 1660 | extern struct net_device *__dev_get_by_name(struct net *net, const char *name); |
1661 | extern int dev_alloc_name(struct net_device *dev, const char *name); | 1661 | extern int dev_alloc_name(struct net_device *dev, const char *name); |
1662 | extern int dev_open(struct net_device *dev); | 1662 | extern int dev_open(struct net_device *dev); |
1663 | extern int dev_close(struct net_device *dev); | 1663 | extern int dev_close(struct net_device *dev); |
1664 | extern void dev_disable_lro(struct net_device *dev); | 1664 | extern void dev_disable_lro(struct net_device *dev); |
1665 | extern int dev_loopback_xmit(struct sk_buff *newskb); | 1665 | extern int dev_loopback_xmit(struct sk_buff *newskb); |
1666 | extern int dev_queue_xmit(struct sk_buff *skb); | 1666 | extern int dev_queue_xmit(struct sk_buff *skb); |
1667 | extern int register_netdevice(struct net_device *dev); | 1667 | extern int register_netdevice(struct net_device *dev); |
1668 | extern void unregister_netdevice_queue(struct net_device *dev, | 1668 | extern void unregister_netdevice_queue(struct net_device *dev, |
1669 | struct list_head *head); | 1669 | struct list_head *head); |
1670 | extern void unregister_netdevice_many(struct list_head *head); | 1670 | extern void unregister_netdevice_many(struct list_head *head); |
1671 | static inline void unregister_netdevice(struct net_device *dev) | 1671 | static inline void unregister_netdevice(struct net_device *dev) |
1672 | { | 1672 | { |
1673 | unregister_netdevice_queue(dev, NULL); | 1673 | unregister_netdevice_queue(dev, NULL); |
1674 | } | 1674 | } |
1675 | 1675 | ||
1676 | extern int netdev_refcnt_read(const struct net_device *dev); | 1676 | extern int netdev_refcnt_read(const struct net_device *dev); |
1677 | extern void free_netdev(struct net_device *dev); | 1677 | extern void free_netdev(struct net_device *dev); |
1678 | extern void synchronize_net(void); | 1678 | extern void synchronize_net(void); |
1679 | extern int init_dummy_netdev(struct net_device *dev); | 1679 | extern int init_dummy_netdev(struct net_device *dev); |
1680 | extern void netdev_resync_ops(struct net_device *dev); | 1680 | extern void netdev_resync_ops(struct net_device *dev); |
1681 | 1681 | ||
1682 | extern struct net_device *dev_get_by_index(struct net *net, int ifindex); | 1682 | extern struct net_device *dev_get_by_index(struct net *net, int ifindex); |
1683 | extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); | 1683 | extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); |
1684 | extern struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); | 1684 | extern struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); |
1685 | extern int dev_restart(struct net_device *dev); | 1685 | extern int dev_restart(struct net_device *dev); |
1686 | #ifdef CONFIG_NETPOLL_TRAP | 1686 | #ifdef CONFIG_NETPOLL_TRAP |
1687 | extern int netpoll_trap(void); | 1687 | extern int netpoll_trap(void); |
1688 | #endif | 1688 | #endif |
1689 | extern int skb_gro_receive(struct sk_buff **head, | 1689 | extern int skb_gro_receive(struct sk_buff **head, |
1690 | struct sk_buff *skb); | 1690 | struct sk_buff *skb); |
1691 | 1691 | ||
1692 | static inline unsigned int skb_gro_offset(const struct sk_buff *skb) | 1692 | static inline unsigned int skb_gro_offset(const struct sk_buff *skb) |
1693 | { | 1693 | { |
1694 | return NAPI_GRO_CB(skb)->data_offset; | 1694 | return NAPI_GRO_CB(skb)->data_offset; |
1695 | } | 1695 | } |
1696 | 1696 | ||
1697 | static inline unsigned int skb_gro_len(const struct sk_buff *skb) | 1697 | static inline unsigned int skb_gro_len(const struct sk_buff *skb) |
1698 | { | 1698 | { |
1699 | return skb->len - NAPI_GRO_CB(skb)->data_offset; | 1699 | return skb->len - NAPI_GRO_CB(skb)->data_offset; |
1700 | } | 1700 | } |
1701 | 1701 | ||
1702 | static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) | 1702 | static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) |
1703 | { | 1703 | { |
1704 | NAPI_GRO_CB(skb)->data_offset += len; | 1704 | NAPI_GRO_CB(skb)->data_offset += len; |
1705 | } | 1705 | } |
1706 | 1706 | ||
1707 | static inline void *skb_gro_header_fast(struct sk_buff *skb, | 1707 | static inline void *skb_gro_header_fast(struct sk_buff *skb, |
1708 | unsigned int offset) | 1708 | unsigned int offset) |
1709 | { | 1709 | { |
1710 | return NAPI_GRO_CB(skb)->frag0 + offset; | 1710 | return NAPI_GRO_CB(skb)->frag0 + offset; |
1711 | } | 1711 | } |
1712 | 1712 | ||
1713 | static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) | 1713 | static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) |
1714 | { | 1714 | { |
1715 | return NAPI_GRO_CB(skb)->frag0_len < hlen; | 1715 | return NAPI_GRO_CB(skb)->frag0_len < hlen; |
1716 | } | 1716 | } |
1717 | 1717 | ||
1718 | static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, | 1718 | static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, |
1719 | unsigned int offset) | 1719 | unsigned int offset) |
1720 | { | 1720 | { |
1721 | if (!pskb_may_pull(skb, hlen)) | 1721 | if (!pskb_may_pull(skb, hlen)) |
1722 | return NULL; | 1722 | return NULL; |
1723 | 1723 | ||
1724 | NAPI_GRO_CB(skb)->frag0 = NULL; | 1724 | NAPI_GRO_CB(skb)->frag0 = NULL; |
1725 | NAPI_GRO_CB(skb)->frag0_len = 0; | 1725 | NAPI_GRO_CB(skb)->frag0_len = 0; |
1726 | return skb->data + offset; | 1726 | return skb->data + offset; |
1727 | } | 1727 | } |
1728 | 1728 | ||
1729 | static inline void *skb_gro_mac_header(struct sk_buff *skb) | 1729 | static inline void *skb_gro_mac_header(struct sk_buff *skb) |
1730 | { | 1730 | { |
1731 | return NAPI_GRO_CB(skb)->frag0 ?: skb_mac_header(skb); | 1731 | return NAPI_GRO_CB(skb)->frag0 ?: skb_mac_header(skb); |
1732 | } | 1732 | } |
1733 | 1733 | ||
1734 | static inline void *skb_gro_network_header(struct sk_buff *skb) | 1734 | static inline void *skb_gro_network_header(struct sk_buff *skb) |
1735 | { | 1735 | { |
1736 | return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + | 1736 | return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + |
1737 | skb_network_offset(skb); | 1737 | skb_network_offset(skb); |
1738 | } | 1738 | } |
1739 | 1739 | ||
1740 | static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, | 1740 | static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, |
1741 | unsigned short type, | 1741 | unsigned short type, |
1742 | const void *daddr, const void *saddr, | 1742 | const void *daddr, const void *saddr, |
1743 | unsigned int len) | 1743 | unsigned int len) |
1744 | { | 1744 | { |
1745 | if (!dev->header_ops || !dev->header_ops->create) | 1745 | if (!dev->header_ops || !dev->header_ops->create) |
1746 | return 0; | 1746 | return 0; |
1747 | 1747 | ||
1748 | return dev->header_ops->create(skb, dev, type, daddr, saddr, len); | 1748 | return dev->header_ops->create(skb, dev, type, daddr, saddr, len); |
1749 | } | 1749 | } |
1750 | 1750 | ||
1751 | static inline int dev_parse_header(const struct sk_buff *skb, | 1751 | static inline int dev_parse_header(const struct sk_buff *skb, |
1752 | unsigned char *haddr) | 1752 | unsigned char *haddr) |
1753 | { | 1753 | { |
1754 | const struct net_device *dev = skb->dev; | 1754 | const struct net_device *dev = skb->dev; |
1755 | 1755 | ||
1756 | if (!dev->header_ops || !dev->header_ops->parse) | 1756 | if (!dev->header_ops || !dev->header_ops->parse) |
1757 | return 0; | 1757 | return 0; |
1758 | return dev->header_ops->parse(skb, haddr); | 1758 | return dev->header_ops->parse(skb, haddr); |
1759 | } | 1759 | } |
1760 | 1760 | ||
1761 | typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); | 1761 | typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); |
1762 | extern int register_gifconf(unsigned int family, gifconf_func_t * gifconf); | 1762 | extern int register_gifconf(unsigned int family, gifconf_func_t * gifconf); |
1763 | static inline int unregister_gifconf(unsigned int family) | 1763 | static inline int unregister_gifconf(unsigned int family) |
1764 | { | 1764 | { |
1765 | return register_gifconf(family, NULL); | 1765 | return register_gifconf(family, NULL); |
1766 | } | 1766 | } |
1767 | 1767 | ||
1768 | /* | 1768 | /* |
1769 | * Incoming packets are placed on per-cpu queues | 1769 | * Incoming packets are placed on per-cpu queues |
1770 | */ | 1770 | */ |
1771 | struct softnet_data { | 1771 | struct softnet_data { |
1772 | struct Qdisc *output_queue; | 1772 | struct Qdisc *output_queue; |
1773 | struct Qdisc **output_queue_tailp; | 1773 | struct Qdisc **output_queue_tailp; |
1774 | struct list_head poll_list; | 1774 | struct list_head poll_list; |
1775 | struct sk_buff *completion_queue; | 1775 | struct sk_buff *completion_queue; |
1776 | struct sk_buff_head process_queue; | 1776 | struct sk_buff_head process_queue; |
1777 | 1777 | ||
1778 | /* stats */ | 1778 | /* stats */ |
1779 | unsigned int processed; | 1779 | unsigned int processed; |
1780 | unsigned int time_squeeze; | 1780 | unsigned int time_squeeze; |
1781 | unsigned int cpu_collision; | 1781 | unsigned int cpu_collision; |
1782 | unsigned int received_rps; | 1782 | unsigned int received_rps; |
1783 | 1783 | ||
1784 | #ifdef CONFIG_RPS | 1784 | #ifdef CONFIG_RPS |
1785 | struct softnet_data *rps_ipi_list; | 1785 | struct softnet_data *rps_ipi_list; |
1786 | 1786 | ||
1787 | /* Elements below can be accessed between CPUs for RPS */ | 1787 | /* Elements below can be accessed between CPUs for RPS */ |
1788 | struct call_single_data csd ____cacheline_aligned_in_smp; | 1788 | struct call_single_data csd ____cacheline_aligned_in_smp; |
1789 | struct softnet_data *rps_ipi_next; | 1789 | struct softnet_data *rps_ipi_next; |
1790 | unsigned int cpu; | 1790 | unsigned int cpu; |
1791 | unsigned int input_queue_head; | 1791 | unsigned int input_queue_head; |
1792 | unsigned int input_queue_tail; | 1792 | unsigned int input_queue_tail; |
1793 | #endif | 1793 | #endif |
1794 | unsigned int dropped; | 1794 | unsigned int dropped; |
1795 | struct sk_buff_head input_pkt_queue; | 1795 | struct sk_buff_head input_pkt_queue; |
1796 | struct napi_struct backlog; | 1796 | struct napi_struct backlog; |
1797 | }; | 1797 | }; |
1798 | 1798 | ||
1799 | static inline void input_queue_head_incr(struct softnet_data *sd) | 1799 | static inline void input_queue_head_incr(struct softnet_data *sd) |
1800 | { | 1800 | { |
1801 | #ifdef CONFIG_RPS | 1801 | #ifdef CONFIG_RPS |
1802 | sd->input_queue_head++; | 1802 | sd->input_queue_head++; |
1803 | #endif | 1803 | #endif |
1804 | } | 1804 | } |
1805 | 1805 | ||
1806 | static inline void input_queue_tail_incr_save(struct softnet_data *sd, | 1806 | static inline void input_queue_tail_incr_save(struct softnet_data *sd, |
1807 | unsigned int *qtail) | 1807 | unsigned int *qtail) |
1808 | { | 1808 | { |
1809 | #ifdef CONFIG_RPS | 1809 | #ifdef CONFIG_RPS |
1810 | *qtail = ++sd->input_queue_tail; | 1810 | *qtail = ++sd->input_queue_tail; |
1811 | #endif | 1811 | #endif |
1812 | } | 1812 | } |
1813 | 1813 | ||
1814 | DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); | 1814 | DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); |
1815 | 1815 | ||
1816 | extern void __netif_schedule(struct Qdisc *q); | 1816 | extern void __netif_schedule(struct Qdisc *q); |
1817 | 1817 | ||
1818 | static inline void netif_schedule_queue(struct netdev_queue *txq) | 1818 | static inline void netif_schedule_queue(struct netdev_queue *txq) |
1819 | { | 1819 | { |
1820 | if (!(txq->state & QUEUE_STATE_ANY_XOFF)) | 1820 | if (!(txq->state & QUEUE_STATE_ANY_XOFF)) |
1821 | __netif_schedule(txq->qdisc); | 1821 | __netif_schedule(txq->qdisc); |
1822 | } | 1822 | } |
1823 | 1823 | ||
1824 | static inline void netif_tx_schedule_all(struct net_device *dev) | 1824 | static inline void netif_tx_schedule_all(struct net_device *dev) |
1825 | { | 1825 | { |
1826 | unsigned int i; | 1826 | unsigned int i; |
1827 | 1827 | ||
1828 | for (i = 0; i < dev->num_tx_queues; i++) | 1828 | for (i = 0; i < dev->num_tx_queues; i++) |
1829 | netif_schedule_queue(netdev_get_tx_queue(dev, i)); | 1829 | netif_schedule_queue(netdev_get_tx_queue(dev, i)); |
1830 | } | 1830 | } |
1831 | 1831 | ||
1832 | static inline void netif_tx_start_queue(struct netdev_queue *dev_queue) | 1832 | static inline void netif_tx_start_queue(struct netdev_queue *dev_queue) |
1833 | { | 1833 | { |
1834 | clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); | 1834 | clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); |
1835 | } | 1835 | } |
1836 | 1836 | ||
1837 | /** | 1837 | /** |
1838 | * netif_start_queue - allow transmit | 1838 | * netif_start_queue - allow transmit |
1839 | * @dev: network device | 1839 | * @dev: network device |
1840 | * | 1840 | * |
1841 | * Allow upper layers to call the device hard_start_xmit routine. | 1841 | * Allow upper layers to call the device hard_start_xmit routine. |
1842 | */ | 1842 | */ |
1843 | static inline void netif_start_queue(struct net_device *dev) | 1843 | static inline void netif_start_queue(struct net_device *dev) |
1844 | { | 1844 | { |
1845 | netif_tx_start_queue(netdev_get_tx_queue(dev, 0)); | 1845 | netif_tx_start_queue(netdev_get_tx_queue(dev, 0)); |
1846 | } | 1846 | } |
1847 | 1847 | ||
1848 | static inline void netif_tx_start_all_queues(struct net_device *dev) | 1848 | static inline void netif_tx_start_all_queues(struct net_device *dev) |
1849 | { | 1849 | { |
1850 | unsigned int i; | 1850 | unsigned int i; |
1851 | 1851 | ||
1852 | for (i = 0; i < dev->num_tx_queues; i++) { | 1852 | for (i = 0; i < dev->num_tx_queues; i++) { |
1853 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); | 1853 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); |
1854 | netif_tx_start_queue(txq); | 1854 | netif_tx_start_queue(txq); |
1855 | } | 1855 | } |
1856 | } | 1856 | } |
1857 | 1857 | ||
1858 | static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) | 1858 | static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) |
1859 | { | 1859 | { |
1860 | #ifdef CONFIG_NETPOLL_TRAP | 1860 | #ifdef CONFIG_NETPOLL_TRAP |
1861 | if (netpoll_trap()) { | 1861 | if (netpoll_trap()) { |
1862 | netif_tx_start_queue(dev_queue); | 1862 | netif_tx_start_queue(dev_queue); |
1863 | return; | 1863 | return; |
1864 | } | 1864 | } |
1865 | #endif | 1865 | #endif |
1866 | if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) | 1866 | if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) |
1867 | __netif_schedule(dev_queue->qdisc); | 1867 | __netif_schedule(dev_queue->qdisc); |
1868 | } | 1868 | } |
1869 | 1869 | ||
1870 | /** | 1870 | /** |
1871 | * netif_wake_queue - restart transmit | 1871 | * netif_wake_queue - restart transmit |
1872 | * @dev: network device | 1872 | * @dev: network device |
1873 | * | 1873 | * |
1874 | * Allow upper layers to call the device hard_start_xmit routine. | 1874 | * Allow upper layers to call the device hard_start_xmit routine. |
1875 | * Used for flow control when transmit resources are available. | 1875 | * Used for flow control when transmit resources are available. |
1876 | */ | 1876 | */ |
1877 | static inline void netif_wake_queue(struct net_device *dev) | 1877 | static inline void netif_wake_queue(struct net_device *dev) |
1878 | { | 1878 | { |
1879 | netif_tx_wake_queue(netdev_get_tx_queue(dev, 0)); | 1879 | netif_tx_wake_queue(netdev_get_tx_queue(dev, 0)); |
1880 | } | 1880 | } |
1881 | 1881 | ||
1882 | static inline void netif_tx_wake_all_queues(struct net_device *dev) | 1882 | static inline void netif_tx_wake_all_queues(struct net_device *dev) |
1883 | { | 1883 | { |
1884 | unsigned int i; | 1884 | unsigned int i; |
1885 | 1885 | ||
1886 | for (i = 0; i < dev->num_tx_queues; i++) { | 1886 | for (i = 0; i < dev->num_tx_queues; i++) { |
1887 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); | 1887 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); |
1888 | netif_tx_wake_queue(txq); | 1888 | netif_tx_wake_queue(txq); |
1889 | } | 1889 | } |
1890 | } | 1890 | } |
1891 | 1891 | ||
1892 | static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) | 1892 | static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) |
1893 | { | 1893 | { |
1894 | if (WARN_ON(!dev_queue)) { | 1894 | if (WARN_ON(!dev_queue)) { |
1895 | pr_info("netif_stop_queue() cannot be called before register_netdev()\n"); | 1895 | pr_info("netif_stop_queue() cannot be called before register_netdev()\n"); |
1896 | return; | 1896 | return; |
1897 | } | 1897 | } |
1898 | set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); | 1898 | set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); |
1899 | } | 1899 | } |
1900 | 1900 | ||
1901 | /** | 1901 | /** |
1902 | * netif_stop_queue - stop transmitted packets | 1902 | * netif_stop_queue - stop transmitted packets |
1903 | * @dev: network device | 1903 | * @dev: network device |
1904 | * | 1904 | * |
1905 | * Stop upper layers calling the device hard_start_xmit routine. | 1905 | * Stop upper layers calling the device hard_start_xmit routine. |
1906 | * Used for flow control when transmit resources are unavailable. | 1906 | * Used for flow control when transmit resources are unavailable. |
1907 | */ | 1907 | */ |
1908 | static inline void netif_stop_queue(struct net_device *dev) | 1908 | static inline void netif_stop_queue(struct net_device *dev) |
1909 | { | 1909 | { |
1910 | netif_tx_stop_queue(netdev_get_tx_queue(dev, 0)); | 1910 | netif_tx_stop_queue(netdev_get_tx_queue(dev, 0)); |
1911 | } | 1911 | } |
1912 | 1912 | ||
1913 | static inline void netif_tx_stop_all_queues(struct net_device *dev) | 1913 | static inline void netif_tx_stop_all_queues(struct net_device *dev) |
1914 | { | 1914 | { |
1915 | unsigned int i; | 1915 | unsigned int i; |
1916 | 1916 | ||
1917 | for (i = 0; i < dev->num_tx_queues; i++) { | 1917 | for (i = 0; i < dev->num_tx_queues; i++) { |
1918 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); | 1918 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); |
1919 | netif_tx_stop_queue(txq); | 1919 | netif_tx_stop_queue(txq); |
1920 | } | 1920 | } |
1921 | } | 1921 | } |
1922 | 1922 | ||
1923 | static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue) | 1923 | static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue) |
1924 | { | 1924 | { |
1925 | return test_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); | 1925 | return test_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); |
1926 | } | 1926 | } |
1927 | 1927 | ||
1928 | /** | 1928 | /** |
1929 | * netif_queue_stopped - test if transmit queue is flowblocked | 1929 | * netif_queue_stopped - test if transmit queue is flowblocked |
1930 | * @dev: network device | 1930 | * @dev: network device |
1931 | * | 1931 | * |
1932 | * Test if transmit queue on device is currently unable to send. | 1932 | * Test if transmit queue on device is currently unable to send. |
1933 | */ | 1933 | */ |
1934 | static inline bool netif_queue_stopped(const struct net_device *dev) | 1934 | static inline bool netif_queue_stopped(const struct net_device *dev) |
1935 | { | 1935 | { |
1936 | return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0)); | 1936 | return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0)); |
1937 | } | 1937 | } |
1938 | 1938 | ||
1939 | static inline bool netif_xmit_stopped(const struct netdev_queue *dev_queue) | 1939 | static inline bool netif_xmit_stopped(const struct netdev_queue *dev_queue) |
1940 | { | 1940 | { |
1941 | return dev_queue->state & QUEUE_STATE_ANY_XOFF; | 1941 | return dev_queue->state & QUEUE_STATE_ANY_XOFF; |
1942 | } | 1942 | } |
1943 | 1943 | ||
1944 | static inline bool netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue) | 1944 | static inline bool netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue) |
1945 | { | 1945 | { |
1946 | return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN; | 1946 | return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN; |
1947 | } | 1947 | } |
1948 | 1948 | ||
1949 | static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, | 1949 | static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, |
1950 | unsigned int bytes) | 1950 | unsigned int bytes) |
1951 | { | 1951 | { |
1952 | #ifdef CONFIG_BQL | 1952 | #ifdef CONFIG_BQL |
1953 | dql_queued(&dev_queue->dql, bytes); | 1953 | dql_queued(&dev_queue->dql, bytes); |
1954 | 1954 | ||
1955 | if (likely(dql_avail(&dev_queue->dql) >= 0)) | 1955 | if (likely(dql_avail(&dev_queue->dql) >= 0)) |
1956 | return; | 1956 | return; |
1957 | 1957 | ||
1958 | set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); | 1958 | set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); |
1959 | 1959 | ||
1960 | /* | 1960 | /* |
1961 | * The XOFF flag must be set before checking the dql_avail below, | 1961 | * The XOFF flag must be set before checking the dql_avail below, |
1962 | * because in netdev_tx_completed_queue we update the dql_completed | 1962 | * because in netdev_tx_completed_queue we update the dql_completed |
1963 | * before checking the XOFF flag. | 1963 | * before checking the XOFF flag. |
1964 | */ | 1964 | */ |
1965 | smp_mb(); | 1965 | smp_mb(); |
1966 | 1966 | ||
1967 | /* check again in case another CPU has just made room avail */ | 1967 | /* check again in case another CPU has just made room avail */ |
1968 | if (unlikely(dql_avail(&dev_queue->dql) >= 0)) | 1968 | if (unlikely(dql_avail(&dev_queue->dql) >= 0)) |
1969 | clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); | 1969 | clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); |
1970 | #endif | 1970 | #endif |
1971 | } | 1971 | } |
1972 | 1972 | ||
1973 | static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes) | 1973 | static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes) |
1974 | { | 1974 | { |
1975 | netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes); | 1975 | netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes); |
1976 | } | 1976 | } |
1977 | 1977 | ||
1978 | static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue, | 1978 | static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue, |
1979 | unsigned int pkts, unsigned int bytes) | 1979 | unsigned int pkts, unsigned int bytes) |
1980 | { | 1980 | { |
1981 | #ifdef CONFIG_BQL | 1981 | #ifdef CONFIG_BQL |
1982 | if (unlikely(!bytes)) | 1982 | if (unlikely(!bytes)) |
1983 | return; | 1983 | return; |
1984 | 1984 | ||
1985 | dql_completed(&dev_queue->dql, bytes); | 1985 | dql_completed(&dev_queue->dql, bytes); |
1986 | 1986 | ||
1987 | /* | 1987 | /* |
1988 | * Without the memory barrier there is a small possiblity that | 1988 | * Without the memory barrier there is a small possiblity that |
1989 | * netdev_tx_sent_queue will miss the update and cause the queue to | 1989 | * netdev_tx_sent_queue will miss the update and cause the queue to |
1990 | * be stopped forever | 1990 | * be stopped forever |
1991 | */ | 1991 | */ |
1992 | smp_mb(); | 1992 | smp_mb(); |
1993 | 1993 | ||
1994 | if (dql_avail(&dev_queue->dql) < 0) | 1994 | if (dql_avail(&dev_queue->dql) < 0) |
1995 | return; | 1995 | return; |
1996 | 1996 | ||
1997 | if (test_and_clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state)) | 1997 | if (test_and_clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state)) |
1998 | netif_schedule_queue(dev_queue); | 1998 | netif_schedule_queue(dev_queue); |
1999 | #endif | 1999 | #endif |
2000 | } | 2000 | } |
2001 | 2001 | ||
2002 | static inline void netdev_completed_queue(struct net_device *dev, | 2002 | static inline void netdev_completed_queue(struct net_device *dev, |
2003 | unsigned int pkts, unsigned int bytes) | 2003 | unsigned int pkts, unsigned int bytes) |
2004 | { | 2004 | { |
2005 | netdev_tx_completed_queue(netdev_get_tx_queue(dev, 0), pkts, bytes); | 2005 | netdev_tx_completed_queue(netdev_get_tx_queue(dev, 0), pkts, bytes); |
2006 | } | 2006 | } |
2007 | 2007 | ||
2008 | static inline void netdev_tx_reset_queue(struct netdev_queue *q) | 2008 | static inline void netdev_tx_reset_queue(struct netdev_queue *q) |
2009 | { | 2009 | { |
2010 | #ifdef CONFIG_BQL | 2010 | #ifdef CONFIG_BQL |
2011 | clear_bit(__QUEUE_STATE_STACK_XOFF, &q->state); | 2011 | clear_bit(__QUEUE_STATE_STACK_XOFF, &q->state); |
2012 | dql_reset(&q->dql); | 2012 | dql_reset(&q->dql); |
2013 | #endif | 2013 | #endif |
2014 | } | 2014 | } |
2015 | 2015 | ||
2016 | static inline void netdev_reset_queue(struct net_device *dev_queue) | 2016 | static inline void netdev_reset_queue(struct net_device *dev_queue) |
2017 | { | 2017 | { |
2018 | netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0)); | 2018 | netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0)); |
2019 | } | 2019 | } |
2020 | 2020 | ||
2021 | /** | 2021 | /** |
2022 | * netif_running - test if up | 2022 | * netif_running - test if up |
2023 | * @dev: network device | 2023 | * @dev: network device |
2024 | * | 2024 | * |
2025 | * Test if the device has been brought up. | 2025 | * Test if the device has been brought up. |
2026 | */ | 2026 | */ |
2027 | static inline bool netif_running(const struct net_device *dev) | 2027 | static inline bool netif_running(const struct net_device *dev) |
2028 | { | 2028 | { |
2029 | return test_bit(__LINK_STATE_START, &dev->state); | 2029 | return test_bit(__LINK_STATE_START, &dev->state); |
2030 | } | 2030 | } |
2031 | 2031 | ||
2032 | /* | 2032 | /* |
2033 | * Routines to manage the subqueues on a device. We only need start | 2033 | * Routines to manage the subqueues on a device. We only need start |
2034 | * stop, and a check if it's stopped. All other device management is | 2034 | * stop, and a check if it's stopped. All other device management is |
2035 | * done at the overall netdevice level. | 2035 | * done at the overall netdevice level. |
2036 | * Also test the device if we're multiqueue. | 2036 | * Also test the device if we're multiqueue. |
2037 | */ | 2037 | */ |
2038 | 2038 | ||
2039 | /** | 2039 | /** |
2040 | * netif_start_subqueue - allow sending packets on subqueue | 2040 | * netif_start_subqueue - allow sending packets on subqueue |
2041 | * @dev: network device | 2041 | * @dev: network device |
2042 | * @queue_index: sub queue index | 2042 | * @queue_index: sub queue index |
2043 | * | 2043 | * |
2044 | * Start individual transmit queue of a device with multiple transmit queues. | 2044 | * Start individual transmit queue of a device with multiple transmit queues. |
2045 | */ | 2045 | */ |
2046 | static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) | 2046 | static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) |
2047 | { | 2047 | { |
2048 | struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); | 2048 | struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); |
2049 | 2049 | ||
2050 | netif_tx_start_queue(txq); | 2050 | netif_tx_start_queue(txq); |
2051 | } | 2051 | } |
2052 | 2052 | ||
2053 | /** | 2053 | /** |
2054 | * netif_stop_subqueue - stop sending packets on subqueue | 2054 | * netif_stop_subqueue - stop sending packets on subqueue |
2055 | * @dev: network device | 2055 | * @dev: network device |
2056 | * @queue_index: sub queue index | 2056 | * @queue_index: sub queue index |
2057 | * | 2057 | * |
2058 | * Stop individual transmit queue of a device with multiple transmit queues. | 2058 | * Stop individual transmit queue of a device with multiple transmit queues. |
2059 | */ | 2059 | */ |
2060 | static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) | 2060 | static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) |
2061 | { | 2061 | { |
2062 | struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); | 2062 | struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); |
2063 | #ifdef CONFIG_NETPOLL_TRAP | 2063 | #ifdef CONFIG_NETPOLL_TRAP |
2064 | if (netpoll_trap()) | 2064 | if (netpoll_trap()) |
2065 | return; | 2065 | return; |
2066 | #endif | 2066 | #endif |
2067 | netif_tx_stop_queue(txq); | 2067 | netif_tx_stop_queue(txq); |
2068 | } | 2068 | } |
2069 | 2069 | ||
2070 | /** | 2070 | /** |
2071 | * netif_subqueue_stopped - test status of subqueue | 2071 | * netif_subqueue_stopped - test status of subqueue |
2072 | * @dev: network device | 2072 | * @dev: network device |
2073 | * @queue_index: sub queue index | 2073 | * @queue_index: sub queue index |
2074 | * | 2074 | * |
2075 | * Check individual transmit queue of a device with multiple transmit queues. | 2075 | * Check individual transmit queue of a device with multiple transmit queues. |
2076 | */ | 2076 | */ |
2077 | static inline bool __netif_subqueue_stopped(const struct net_device *dev, | 2077 | static inline bool __netif_subqueue_stopped(const struct net_device *dev, |
2078 | u16 queue_index) | 2078 | u16 queue_index) |
2079 | { | 2079 | { |
2080 | struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); | 2080 | struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); |
2081 | 2081 | ||
2082 | return netif_tx_queue_stopped(txq); | 2082 | return netif_tx_queue_stopped(txq); |
2083 | } | 2083 | } |
2084 | 2084 | ||
2085 | static inline bool netif_subqueue_stopped(const struct net_device *dev, | 2085 | static inline bool netif_subqueue_stopped(const struct net_device *dev, |
2086 | struct sk_buff *skb) | 2086 | struct sk_buff *skb) |
2087 | { | 2087 | { |
2088 | return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb)); | 2088 | return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb)); |
2089 | } | 2089 | } |
2090 | 2090 | ||
2091 | /** | 2091 | /** |
2092 | * netif_wake_subqueue - allow sending packets on subqueue | 2092 | * netif_wake_subqueue - allow sending packets on subqueue |
2093 | * @dev: network device | 2093 | * @dev: network device |
2094 | * @queue_index: sub queue index | 2094 | * @queue_index: sub queue index |
2095 | * | 2095 | * |
2096 | * Resume individual transmit queue of a device with multiple transmit queues. | 2096 | * Resume individual transmit queue of a device with multiple transmit queues. |
2097 | */ | 2097 | */ |
2098 | static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) | 2098 | static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) |
2099 | { | 2099 | { |
2100 | struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); | 2100 | struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); |
2101 | #ifdef CONFIG_NETPOLL_TRAP | 2101 | #ifdef CONFIG_NETPOLL_TRAP |
2102 | if (netpoll_trap()) | 2102 | if (netpoll_trap()) |
2103 | return; | 2103 | return; |
2104 | #endif | 2104 | #endif |
2105 | if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) | 2105 | if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) |
2106 | __netif_schedule(txq->qdisc); | 2106 | __netif_schedule(txq->qdisc); |
2107 | } | 2107 | } |
2108 | 2108 | ||
2109 | #ifdef CONFIG_XPS | 2109 | #ifdef CONFIG_XPS |
2110 | extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, | 2110 | extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, |
2111 | u16 index); | 2111 | u16 index); |
2112 | #else | 2112 | #else |
2113 | static inline int netif_set_xps_queue(struct net_device *dev, | 2113 | static inline int netif_set_xps_queue(struct net_device *dev, |
2114 | struct cpumask *mask, | 2114 | struct cpumask *mask, |
2115 | u16 index) | 2115 | u16 index) |
2116 | { | 2116 | { |
2117 | return 0; | 2117 | return 0; |
2118 | } | 2118 | } |
2119 | #endif | 2119 | #endif |
2120 | 2120 | ||
2121 | /* | 2121 | /* |
2122 | * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used | 2122 | * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used |
2123 | * as a distribution range limit for the returned value. | 2123 | * as a distribution range limit for the returned value. |
2124 | */ | 2124 | */ |
2125 | static inline u16 skb_tx_hash(const struct net_device *dev, | 2125 | static inline u16 skb_tx_hash(const struct net_device *dev, |
2126 | const struct sk_buff *skb) | 2126 | const struct sk_buff *skb) |
2127 | { | 2127 | { |
2128 | return __skb_tx_hash(dev, skb, dev->real_num_tx_queues); | 2128 | return __skb_tx_hash(dev, skb, dev->real_num_tx_queues); |
2129 | } | 2129 | } |
2130 | 2130 | ||
2131 | /** | 2131 | /** |
2132 | * netif_is_multiqueue - test if device has multiple transmit queues | 2132 | * netif_is_multiqueue - test if device has multiple transmit queues |
2133 | * @dev: network device | 2133 | * @dev: network device |
2134 | * | 2134 | * |
2135 | * Check if device has multiple transmit queues | 2135 | * Check if device has multiple transmit queues |
2136 | */ | 2136 | */ |
2137 | static inline bool netif_is_multiqueue(const struct net_device *dev) | 2137 | static inline bool netif_is_multiqueue(const struct net_device *dev) |
2138 | { | 2138 | { |
2139 | return dev->num_tx_queues > 1; | 2139 | return dev->num_tx_queues > 1; |
2140 | } | 2140 | } |
2141 | 2141 | ||
2142 | extern int netif_set_real_num_tx_queues(struct net_device *dev, | 2142 | extern int netif_set_real_num_tx_queues(struct net_device *dev, |
2143 | unsigned int txq); | 2143 | unsigned int txq); |
2144 | 2144 | ||
2145 | #ifdef CONFIG_RPS | 2145 | #ifdef CONFIG_RPS |
2146 | extern int netif_set_real_num_rx_queues(struct net_device *dev, | 2146 | extern int netif_set_real_num_rx_queues(struct net_device *dev, |
2147 | unsigned int rxq); | 2147 | unsigned int rxq); |
2148 | #else | 2148 | #else |
2149 | static inline int netif_set_real_num_rx_queues(struct net_device *dev, | 2149 | static inline int netif_set_real_num_rx_queues(struct net_device *dev, |
2150 | unsigned int rxq) | 2150 | unsigned int rxq) |
2151 | { | 2151 | { |
2152 | return 0; | 2152 | return 0; |
2153 | } | 2153 | } |
2154 | #endif | 2154 | #endif |
2155 | 2155 | ||
2156 | static inline int netif_copy_real_num_queues(struct net_device *to_dev, | 2156 | static inline int netif_copy_real_num_queues(struct net_device *to_dev, |
2157 | const struct net_device *from_dev) | 2157 | const struct net_device *from_dev) |
2158 | { | 2158 | { |
2159 | int err; | 2159 | int err; |
2160 | 2160 | ||
2161 | err = netif_set_real_num_tx_queues(to_dev, | 2161 | err = netif_set_real_num_tx_queues(to_dev, |
2162 | from_dev->real_num_tx_queues); | 2162 | from_dev->real_num_tx_queues); |
2163 | if (err) | 2163 | if (err) |
2164 | return err; | 2164 | return err; |
2165 | #ifdef CONFIG_RPS | 2165 | #ifdef CONFIG_RPS |
2166 | return netif_set_real_num_rx_queues(to_dev, | 2166 | return netif_set_real_num_rx_queues(to_dev, |
2167 | from_dev->real_num_rx_queues); | 2167 | from_dev->real_num_rx_queues); |
2168 | #else | 2168 | #else |
2169 | return 0; | 2169 | return 0; |
2170 | #endif | 2170 | #endif |
2171 | } | 2171 | } |
2172 | 2172 | ||
2173 | #define DEFAULT_MAX_NUM_RSS_QUEUES (8) | 2173 | #define DEFAULT_MAX_NUM_RSS_QUEUES (8) |
2174 | extern int netif_get_num_default_rss_queues(void); | 2174 | extern int netif_get_num_default_rss_queues(void); |
2175 | 2175 | ||
2176 | /* Use this variant when it is known for sure that it | 2176 | /* Use this variant when it is known for sure that it |
2177 | * is executing from hardware interrupt context or with hardware interrupts | 2177 | * is executing from hardware interrupt context or with hardware interrupts |
2178 | * disabled. | 2178 | * disabled. |
2179 | */ | 2179 | */ |
2180 | extern void dev_kfree_skb_irq(struct sk_buff *skb); | 2180 | extern void dev_kfree_skb_irq(struct sk_buff *skb); |
2181 | 2181 | ||
2182 | /* Use this variant in places where it could be invoked | 2182 | /* Use this variant in places where it could be invoked |
2183 | * from either hardware interrupt or other context, with hardware interrupts | 2183 | * from either hardware interrupt or other context, with hardware interrupts |
2184 | * either disabled or enabled. | 2184 | * either disabled or enabled. |
2185 | */ | 2185 | */ |
2186 | extern void dev_kfree_skb_any(struct sk_buff *skb); | 2186 | extern void dev_kfree_skb_any(struct sk_buff *skb); |
2187 | 2187 | ||
2188 | extern int netif_rx(struct sk_buff *skb); | 2188 | extern int netif_rx(struct sk_buff *skb); |
2189 | extern int netif_rx_ni(struct sk_buff *skb); | 2189 | extern int netif_rx_ni(struct sk_buff *skb); |
2190 | extern int netif_receive_skb(struct sk_buff *skb); | 2190 | extern int netif_receive_skb(struct sk_buff *skb); |
2191 | extern gro_result_t napi_gro_receive(struct napi_struct *napi, | 2191 | extern gro_result_t napi_gro_receive(struct napi_struct *napi, |
2192 | struct sk_buff *skb); | 2192 | struct sk_buff *skb); |
2193 | extern void napi_gro_flush(struct napi_struct *napi, bool flush_old); | 2193 | extern void napi_gro_flush(struct napi_struct *napi, bool flush_old); |
2194 | extern struct sk_buff * napi_get_frags(struct napi_struct *napi); | 2194 | extern struct sk_buff * napi_get_frags(struct napi_struct *napi); |
2195 | extern gro_result_t napi_gro_frags(struct napi_struct *napi); | 2195 | extern gro_result_t napi_gro_frags(struct napi_struct *napi); |
2196 | 2196 | ||
2197 | static inline void napi_free_frags(struct napi_struct *napi) | 2197 | static inline void napi_free_frags(struct napi_struct *napi) |
2198 | { | 2198 | { |
2199 | kfree_skb(napi->skb); | 2199 | kfree_skb(napi->skb); |
2200 | napi->skb = NULL; | 2200 | napi->skb = NULL; |
2201 | } | 2201 | } |
2202 | 2202 | ||
2203 | extern int netdev_rx_handler_register(struct net_device *dev, | 2203 | extern int netdev_rx_handler_register(struct net_device *dev, |
2204 | rx_handler_func_t *rx_handler, | 2204 | rx_handler_func_t *rx_handler, |
2205 | void *rx_handler_data); | 2205 | void *rx_handler_data); |
2206 | extern void netdev_rx_handler_unregister(struct net_device *dev); | 2206 | extern void netdev_rx_handler_unregister(struct net_device *dev); |
2207 | 2207 | ||
2208 | extern bool dev_valid_name(const char *name); | 2208 | extern bool dev_valid_name(const char *name); |
2209 | extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); | 2209 | extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); |
2210 | extern int dev_ethtool(struct net *net, struct ifreq *); | 2210 | extern int dev_ethtool(struct net *net, struct ifreq *); |
2211 | extern unsigned int dev_get_flags(const struct net_device *); | 2211 | extern unsigned int dev_get_flags(const struct net_device *); |
2212 | extern int __dev_change_flags(struct net_device *, unsigned int flags); | 2212 | extern int __dev_change_flags(struct net_device *, unsigned int flags); |
2213 | extern int dev_change_flags(struct net_device *, unsigned int); | 2213 | extern int dev_change_flags(struct net_device *, unsigned int); |
2214 | extern void __dev_notify_flags(struct net_device *, unsigned int old_flags); | 2214 | extern void __dev_notify_flags(struct net_device *, unsigned int old_flags); |
2215 | extern int dev_change_name(struct net_device *, const char *); | 2215 | extern int dev_change_name(struct net_device *, const char *); |
2216 | extern int dev_set_alias(struct net_device *, const char *, size_t); | 2216 | extern int dev_set_alias(struct net_device *, const char *, size_t); |
2217 | extern int dev_change_net_namespace(struct net_device *, | 2217 | extern int dev_change_net_namespace(struct net_device *, |
2218 | struct net *, const char *); | 2218 | struct net *, const char *); |
2219 | extern int dev_set_mtu(struct net_device *, int); | 2219 | extern int dev_set_mtu(struct net_device *, int); |
2220 | extern void dev_set_group(struct net_device *, int); | 2220 | extern void dev_set_group(struct net_device *, int); |
2221 | extern int dev_set_mac_address(struct net_device *, | 2221 | extern int dev_set_mac_address(struct net_device *, |
2222 | struct sockaddr *); | 2222 | struct sockaddr *); |
2223 | extern int dev_change_carrier(struct net_device *, | 2223 | extern int dev_change_carrier(struct net_device *, |
2224 | bool new_carrier); | 2224 | bool new_carrier); |
2225 | extern int dev_hard_start_xmit(struct sk_buff *skb, | 2225 | extern int dev_hard_start_xmit(struct sk_buff *skb, |
2226 | struct net_device *dev, | 2226 | struct net_device *dev, |
2227 | struct netdev_queue *txq); | 2227 | struct netdev_queue *txq); |
2228 | extern int dev_forward_skb(struct net_device *dev, | 2228 | extern int dev_forward_skb(struct net_device *dev, |
2229 | struct sk_buff *skb); | 2229 | struct sk_buff *skb); |
2230 | 2230 | ||
2231 | extern int netdev_budget; | 2231 | extern int netdev_budget; |
2232 | 2232 | ||
2233 | /* Called by rtnetlink.c:rtnl_unlock() */ | 2233 | /* Called by rtnetlink.c:rtnl_unlock() */ |
2234 | extern void netdev_run_todo(void); | 2234 | extern void netdev_run_todo(void); |
2235 | 2235 | ||
2236 | /** | 2236 | /** |
2237 | * dev_put - release reference to device | 2237 | * dev_put - release reference to device |
2238 | * @dev: network device | 2238 | * @dev: network device |
2239 | * | 2239 | * |
2240 | * Release reference to device to allow it to be freed. | 2240 | * Release reference to device to allow it to be freed. |
2241 | */ | 2241 | */ |
2242 | static inline void dev_put(struct net_device *dev) | 2242 | static inline void dev_put(struct net_device *dev) |
2243 | { | 2243 | { |
2244 | this_cpu_dec(*dev->pcpu_refcnt); | 2244 | this_cpu_dec(*dev->pcpu_refcnt); |
2245 | } | 2245 | } |
2246 | 2246 | ||
2247 | /** | 2247 | /** |
2248 | * dev_hold - get reference to device | 2248 | * dev_hold - get reference to device |
2249 | * @dev: network device | 2249 | * @dev: network device |
2250 | * | 2250 | * |
2251 | * Hold reference to device to keep it from being freed. | 2251 | * Hold reference to device to keep it from being freed. |
2252 | */ | 2252 | */ |
2253 | static inline void dev_hold(struct net_device *dev) | 2253 | static inline void dev_hold(struct net_device *dev) |
2254 | { | 2254 | { |
2255 | this_cpu_inc(*dev->pcpu_refcnt); | 2255 | this_cpu_inc(*dev->pcpu_refcnt); |
2256 | } | 2256 | } |
2257 | 2257 | ||
2258 | /* Carrier loss detection, dial on demand. The functions netif_carrier_on | 2258 | /* Carrier loss detection, dial on demand. The functions netif_carrier_on |
2259 | * and _off may be called from IRQ context, but it is caller | 2259 | * and _off may be called from IRQ context, but it is caller |
2260 | * who is responsible for serialization of these calls. | 2260 | * who is responsible for serialization of these calls. |
2261 | * | 2261 | * |
2262 | * The name carrier is inappropriate, these functions should really be | 2262 | * The name carrier is inappropriate, these functions should really be |
2263 | * called netif_lowerlayer_*() because they represent the state of any | 2263 | * called netif_lowerlayer_*() because they represent the state of any |
2264 | * kind of lower layer not just hardware media. | 2264 | * kind of lower layer not just hardware media. |
2265 | */ | 2265 | */ |
2266 | 2266 | ||
2267 | extern void linkwatch_init_dev(struct net_device *dev); | 2267 | extern void linkwatch_init_dev(struct net_device *dev); |
2268 | extern void linkwatch_fire_event(struct net_device *dev); | 2268 | extern void linkwatch_fire_event(struct net_device *dev); |
2269 | extern void linkwatch_forget_dev(struct net_device *dev); | 2269 | extern void linkwatch_forget_dev(struct net_device *dev); |
2270 | 2270 | ||
2271 | /** | 2271 | /** |
2272 | * netif_carrier_ok - test if carrier present | 2272 | * netif_carrier_ok - test if carrier present |
2273 | * @dev: network device | 2273 | * @dev: network device |
2274 | * | 2274 | * |
2275 | * Check if carrier is present on device | 2275 | * Check if carrier is present on device |
2276 | */ | 2276 | */ |
2277 | static inline bool netif_carrier_ok(const struct net_device *dev) | 2277 | static inline bool netif_carrier_ok(const struct net_device *dev) |
2278 | { | 2278 | { |
2279 | return !test_bit(__LINK_STATE_NOCARRIER, &dev->state); | 2279 | return !test_bit(__LINK_STATE_NOCARRIER, &dev->state); |
2280 | } | 2280 | } |
2281 | 2281 | ||
2282 | extern unsigned long dev_trans_start(struct net_device *dev); | 2282 | extern unsigned long dev_trans_start(struct net_device *dev); |
2283 | 2283 | ||
2284 | extern void __netdev_watchdog_up(struct net_device *dev); | 2284 | extern void __netdev_watchdog_up(struct net_device *dev); |
2285 | 2285 | ||
2286 | extern void netif_carrier_on(struct net_device *dev); | 2286 | extern void netif_carrier_on(struct net_device *dev); |
2287 | 2287 | ||
2288 | extern void netif_carrier_off(struct net_device *dev); | 2288 | extern void netif_carrier_off(struct net_device *dev); |
2289 | 2289 | ||
2290 | /** | 2290 | /** |
2291 | * netif_dormant_on - mark device as dormant. | 2291 | * netif_dormant_on - mark device as dormant. |
2292 | * @dev: network device | 2292 | * @dev: network device |
2293 | * | 2293 | * |
2294 | * Mark device as dormant (as per RFC2863). | 2294 | * Mark device as dormant (as per RFC2863). |
2295 | * | 2295 | * |
2296 | * The dormant state indicates that the relevant interface is not | 2296 | * The dormant state indicates that the relevant interface is not |
2297 | * actually in a condition to pass packets (i.e., it is not 'up') but is | 2297 | * actually in a condition to pass packets (i.e., it is not 'up') but is |
2298 | * in a "pending" state, waiting for some external event. For "on- | 2298 | * in a "pending" state, waiting for some external event. For "on- |
2299 | * demand" interfaces, this new state identifies the situation where the | 2299 | * demand" interfaces, this new state identifies the situation where the |
2300 | * interface is waiting for events to place it in the up state. | 2300 | * interface is waiting for events to place it in the up state. |
2301 | * | 2301 | * |
2302 | */ | 2302 | */ |
2303 | static inline void netif_dormant_on(struct net_device *dev) | 2303 | static inline void netif_dormant_on(struct net_device *dev) |
2304 | { | 2304 | { |
2305 | if (!test_and_set_bit(__LINK_STATE_DORMANT, &dev->state)) | 2305 | if (!test_and_set_bit(__LINK_STATE_DORMANT, &dev->state)) |
2306 | linkwatch_fire_event(dev); | 2306 | linkwatch_fire_event(dev); |
2307 | } | 2307 | } |
2308 | 2308 | ||
2309 | /** | 2309 | /** |
2310 | * netif_dormant_off - set device as not dormant. | 2310 | * netif_dormant_off - set device as not dormant. |
2311 | * @dev: network device | 2311 | * @dev: network device |
2312 | * | 2312 | * |
2313 | * Device is not in dormant state. | 2313 | * Device is not in dormant state. |
2314 | */ | 2314 | */ |
2315 | static inline void netif_dormant_off(struct net_device *dev) | 2315 | static inline void netif_dormant_off(struct net_device *dev) |
2316 | { | 2316 | { |
2317 | if (test_and_clear_bit(__LINK_STATE_DORMANT, &dev->state)) | 2317 | if (test_and_clear_bit(__LINK_STATE_DORMANT, &dev->state)) |
2318 | linkwatch_fire_event(dev); | 2318 | linkwatch_fire_event(dev); |
2319 | } | 2319 | } |
2320 | 2320 | ||
2321 | /** | 2321 | /** |
2322 | * netif_dormant - test if carrier present | 2322 | * netif_dormant - test if carrier present |
2323 | * @dev: network device | 2323 | * @dev: network device |
2324 | * | 2324 | * |
2325 | * Check if carrier is present on device | 2325 | * Check if carrier is present on device |
2326 | */ | 2326 | */ |
2327 | static inline bool netif_dormant(const struct net_device *dev) | 2327 | static inline bool netif_dormant(const struct net_device *dev) |
2328 | { | 2328 | { |
2329 | return test_bit(__LINK_STATE_DORMANT, &dev->state); | 2329 | return test_bit(__LINK_STATE_DORMANT, &dev->state); |
2330 | } | 2330 | } |
2331 | 2331 | ||
2332 | 2332 | ||
2333 | /** | 2333 | /** |
2334 | * netif_oper_up - test if device is operational | 2334 | * netif_oper_up - test if device is operational |
2335 | * @dev: network device | 2335 | * @dev: network device |
2336 | * | 2336 | * |
2337 | * Check if carrier is operational | 2337 | * Check if carrier is operational |
2338 | */ | 2338 | */ |
2339 | static inline bool netif_oper_up(const struct net_device *dev) | 2339 | static inline bool netif_oper_up(const struct net_device *dev) |
2340 | { | 2340 | { |
2341 | return (dev->operstate == IF_OPER_UP || | 2341 | return (dev->operstate == IF_OPER_UP || |
2342 | dev->operstate == IF_OPER_UNKNOWN /* backward compat */); | 2342 | dev->operstate == IF_OPER_UNKNOWN /* backward compat */); |
2343 | } | 2343 | } |
2344 | 2344 | ||
2345 | /** | 2345 | /** |
2346 | * netif_device_present - is device available or removed | 2346 | * netif_device_present - is device available or removed |
2347 | * @dev: network device | 2347 | * @dev: network device |
2348 | * | 2348 | * |
2349 | * Check if device has not been removed from system. | 2349 | * Check if device has not been removed from system. |
2350 | */ | 2350 | */ |
2351 | static inline bool netif_device_present(struct net_device *dev) | 2351 | static inline bool netif_device_present(struct net_device *dev) |
2352 | { | 2352 | { |
2353 | return test_bit(__LINK_STATE_PRESENT, &dev->state); | 2353 | return test_bit(__LINK_STATE_PRESENT, &dev->state); |
2354 | } | 2354 | } |
2355 | 2355 | ||
2356 | extern void netif_device_detach(struct net_device *dev); | 2356 | extern void netif_device_detach(struct net_device *dev); |
2357 | 2357 | ||
2358 | extern void netif_device_attach(struct net_device *dev); | 2358 | extern void netif_device_attach(struct net_device *dev); |
2359 | 2359 | ||
2360 | /* | 2360 | /* |
2361 | * Network interface message level settings | 2361 | * Network interface message level settings |
2362 | */ | 2362 | */ |
2363 | 2363 | ||
2364 | enum { | 2364 | enum { |
2365 | NETIF_MSG_DRV = 0x0001, | 2365 | NETIF_MSG_DRV = 0x0001, |
2366 | NETIF_MSG_PROBE = 0x0002, | 2366 | NETIF_MSG_PROBE = 0x0002, |
2367 | NETIF_MSG_LINK = 0x0004, | 2367 | NETIF_MSG_LINK = 0x0004, |
2368 | NETIF_MSG_TIMER = 0x0008, | 2368 | NETIF_MSG_TIMER = 0x0008, |
2369 | NETIF_MSG_IFDOWN = 0x0010, | 2369 | NETIF_MSG_IFDOWN = 0x0010, |
2370 | NETIF_MSG_IFUP = 0x0020, | 2370 | NETIF_MSG_IFUP = 0x0020, |
2371 | NETIF_MSG_RX_ERR = 0x0040, | 2371 | NETIF_MSG_RX_ERR = 0x0040, |
2372 | NETIF_MSG_TX_ERR = 0x0080, | 2372 | NETIF_MSG_TX_ERR = 0x0080, |
2373 | NETIF_MSG_TX_QUEUED = 0x0100, | 2373 | NETIF_MSG_TX_QUEUED = 0x0100, |
2374 | NETIF_MSG_INTR = 0x0200, | 2374 | NETIF_MSG_INTR = 0x0200, |
2375 | NETIF_MSG_TX_DONE = 0x0400, | 2375 | NETIF_MSG_TX_DONE = 0x0400, |
2376 | NETIF_MSG_RX_STATUS = 0x0800, | 2376 | NETIF_MSG_RX_STATUS = 0x0800, |
2377 | NETIF_MSG_PKTDATA = 0x1000, | 2377 | NETIF_MSG_PKTDATA = 0x1000, |
2378 | NETIF_MSG_HW = 0x2000, | 2378 | NETIF_MSG_HW = 0x2000, |
2379 | NETIF_MSG_WOL = 0x4000, | 2379 | NETIF_MSG_WOL = 0x4000, |
2380 | }; | 2380 | }; |
2381 | 2381 | ||
2382 | #define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV) | 2382 | #define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV) |
2383 | #define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE) | 2383 | #define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE) |
2384 | #define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK) | 2384 | #define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK) |
2385 | #define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER) | 2385 | #define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER) |
2386 | #define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN) | 2386 | #define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN) |
2387 | #define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP) | 2387 | #define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP) |
2388 | #define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR) | 2388 | #define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR) |
2389 | #define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR) | 2389 | #define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR) |
2390 | #define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED) | 2390 | #define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED) |
2391 | #define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR) | 2391 | #define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR) |
2392 | #define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE) | 2392 | #define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE) |
2393 | #define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS) | 2393 | #define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS) |
2394 | #define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA) | 2394 | #define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA) |
2395 | #define netif_msg_hw(p) ((p)->msg_enable & NETIF_MSG_HW) | 2395 | #define netif_msg_hw(p) ((p)->msg_enable & NETIF_MSG_HW) |
2396 | #define netif_msg_wol(p) ((p)->msg_enable & NETIF_MSG_WOL) | 2396 | #define netif_msg_wol(p) ((p)->msg_enable & NETIF_MSG_WOL) |
2397 | 2397 | ||
2398 | static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) | 2398 | static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) |
2399 | { | 2399 | { |
2400 | /* use default */ | 2400 | /* use default */ |
2401 | if (debug_value < 0 || debug_value >= (sizeof(u32) * 8)) | 2401 | if (debug_value < 0 || debug_value >= (sizeof(u32) * 8)) |
2402 | return default_msg_enable_bits; | 2402 | return default_msg_enable_bits; |
2403 | if (debug_value == 0) /* no output */ | 2403 | if (debug_value == 0) /* no output */ |
2404 | return 0; | 2404 | return 0; |
2405 | /* set low N bits */ | 2405 | /* set low N bits */ |
2406 | return (1 << debug_value) - 1; | 2406 | return (1 << debug_value) - 1; |
2407 | } | 2407 | } |
2408 | 2408 | ||
2409 | static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) | 2409 | static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) |
2410 | { | 2410 | { |
2411 | spin_lock(&txq->_xmit_lock); | 2411 | spin_lock(&txq->_xmit_lock); |
2412 | txq->xmit_lock_owner = cpu; | 2412 | txq->xmit_lock_owner = cpu; |
2413 | } | 2413 | } |
2414 | 2414 | ||
2415 | static inline void __netif_tx_lock_bh(struct netdev_queue *txq) | 2415 | static inline void __netif_tx_lock_bh(struct netdev_queue *txq) |
2416 | { | 2416 | { |
2417 | spin_lock_bh(&txq->_xmit_lock); | 2417 | spin_lock_bh(&txq->_xmit_lock); |
2418 | txq->xmit_lock_owner = smp_processor_id(); | 2418 | txq->xmit_lock_owner = smp_processor_id(); |
2419 | } | 2419 | } |
2420 | 2420 | ||
2421 | static inline bool __netif_tx_trylock(struct netdev_queue *txq) | 2421 | static inline bool __netif_tx_trylock(struct netdev_queue *txq) |
2422 | { | 2422 | { |
2423 | bool ok = spin_trylock(&txq->_xmit_lock); | 2423 | bool ok = spin_trylock(&txq->_xmit_lock); |
2424 | if (likely(ok)) | 2424 | if (likely(ok)) |
2425 | txq->xmit_lock_owner = smp_processor_id(); | 2425 | txq->xmit_lock_owner = smp_processor_id(); |
2426 | return ok; | 2426 | return ok; |
2427 | } | 2427 | } |
2428 | 2428 | ||
2429 | static inline void __netif_tx_unlock(struct netdev_queue *txq) | 2429 | static inline void __netif_tx_unlock(struct netdev_queue *txq) |
2430 | { | 2430 | { |
2431 | txq->xmit_lock_owner = -1; | 2431 | txq->xmit_lock_owner = -1; |
2432 | spin_unlock(&txq->_xmit_lock); | 2432 | spin_unlock(&txq->_xmit_lock); |
2433 | } | 2433 | } |
2434 | 2434 | ||
2435 | static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) | 2435 | static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) |
2436 | { | 2436 | { |
2437 | txq->xmit_lock_owner = -1; | 2437 | txq->xmit_lock_owner = -1; |
2438 | spin_unlock_bh(&txq->_xmit_lock); | 2438 | spin_unlock_bh(&txq->_xmit_lock); |
2439 | } | 2439 | } |
2440 | 2440 | ||
2441 | static inline void txq_trans_update(struct netdev_queue *txq) | 2441 | static inline void txq_trans_update(struct netdev_queue *txq) |
2442 | { | 2442 | { |
2443 | if (txq->xmit_lock_owner != -1) | 2443 | if (txq->xmit_lock_owner != -1) |
2444 | txq->trans_start = jiffies; | 2444 | txq->trans_start = jiffies; |
2445 | } | 2445 | } |
2446 | 2446 | ||
2447 | /** | 2447 | /** |
2448 | * netif_tx_lock - grab network device transmit lock | 2448 | * netif_tx_lock - grab network device transmit lock |
2449 | * @dev: network device | 2449 | * @dev: network device |
2450 | * | 2450 | * |
2451 | * Get network device transmit lock | 2451 | * Get network device transmit lock |
2452 | */ | 2452 | */ |
2453 | static inline void netif_tx_lock(struct net_device *dev) | 2453 | static inline void netif_tx_lock(struct net_device *dev) |
2454 | { | 2454 | { |
2455 | unsigned int i; | 2455 | unsigned int i; |
2456 | int cpu; | 2456 | int cpu; |
2457 | 2457 | ||
2458 | spin_lock(&dev->tx_global_lock); | 2458 | spin_lock(&dev->tx_global_lock); |
2459 | cpu = smp_processor_id(); | 2459 | cpu = smp_processor_id(); |
2460 | for (i = 0; i < dev->num_tx_queues; i++) { | 2460 | for (i = 0; i < dev->num_tx_queues; i++) { |
2461 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); | 2461 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); |
2462 | 2462 | ||
2463 | /* We are the only thread of execution doing a | 2463 | /* We are the only thread of execution doing a |
2464 | * freeze, but we have to grab the _xmit_lock in | 2464 | * freeze, but we have to grab the _xmit_lock in |
2465 | * order to synchronize with threads which are in | 2465 | * order to synchronize with threads which are in |
2466 | * the ->hard_start_xmit() handler and already | 2466 | * the ->hard_start_xmit() handler and already |
2467 | * checked the frozen bit. | 2467 | * checked the frozen bit. |
2468 | */ | 2468 | */ |
2469 | __netif_tx_lock(txq, cpu); | 2469 | __netif_tx_lock(txq, cpu); |
2470 | set_bit(__QUEUE_STATE_FROZEN, &txq->state); | 2470 | set_bit(__QUEUE_STATE_FROZEN, &txq->state); |
2471 | __netif_tx_unlock(txq); | 2471 | __netif_tx_unlock(txq); |
2472 | } | 2472 | } |
2473 | } | 2473 | } |
2474 | 2474 | ||
2475 | static inline void netif_tx_lock_bh(struct net_device *dev) | 2475 | static inline void netif_tx_lock_bh(struct net_device *dev) |
2476 | { | 2476 | { |
2477 | local_bh_disable(); | 2477 | local_bh_disable(); |
2478 | netif_tx_lock(dev); | 2478 | netif_tx_lock(dev); |
2479 | } | 2479 | } |
2480 | 2480 | ||
2481 | static inline void netif_tx_unlock(struct net_device *dev) | 2481 | static inline void netif_tx_unlock(struct net_device *dev) |
2482 | { | 2482 | { |
2483 | unsigned int i; | 2483 | unsigned int i; |
2484 | 2484 | ||
2485 | for (i = 0; i < dev->num_tx_queues; i++) { | 2485 | for (i = 0; i < dev->num_tx_queues; i++) { |
2486 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); | 2486 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); |
2487 | 2487 | ||
2488 | /* No need to grab the _xmit_lock here. If the | 2488 | /* No need to grab the _xmit_lock here. If the |
2489 | * queue is not stopped for another reason, we | 2489 | * queue is not stopped for another reason, we |
2490 | * force a schedule. | 2490 | * force a schedule. |
2491 | */ | 2491 | */ |
2492 | clear_bit(__QUEUE_STATE_FROZEN, &txq->state); | 2492 | clear_bit(__QUEUE_STATE_FROZEN, &txq->state); |
2493 | netif_schedule_queue(txq); | 2493 | netif_schedule_queue(txq); |
2494 | } | 2494 | } |
2495 | spin_unlock(&dev->tx_global_lock); | 2495 | spin_unlock(&dev->tx_global_lock); |
2496 | } | 2496 | } |
2497 | 2497 | ||
2498 | static inline void netif_tx_unlock_bh(struct net_device *dev) | 2498 | static inline void netif_tx_unlock_bh(struct net_device *dev) |
2499 | { | 2499 | { |
2500 | netif_tx_unlock(dev); | 2500 | netif_tx_unlock(dev); |
2501 | local_bh_enable(); | 2501 | local_bh_enable(); |
2502 | } | 2502 | } |
2503 | 2503 | ||
2504 | #define HARD_TX_LOCK(dev, txq, cpu) { \ | 2504 | #define HARD_TX_LOCK(dev, txq, cpu) { \ |
2505 | if ((dev->features & NETIF_F_LLTX) == 0) { \ | 2505 | if ((dev->features & NETIF_F_LLTX) == 0) { \ |
2506 | __netif_tx_lock(txq, cpu); \ | 2506 | __netif_tx_lock(txq, cpu); \ |
2507 | } \ | 2507 | } \ |
2508 | } | 2508 | } |
2509 | 2509 | ||
2510 | #define HARD_TX_UNLOCK(dev, txq) { \ | 2510 | #define HARD_TX_UNLOCK(dev, txq) { \ |
2511 | if ((dev->features & NETIF_F_LLTX) == 0) { \ | 2511 | if ((dev->features & NETIF_F_LLTX) == 0) { \ |
2512 | __netif_tx_unlock(txq); \ | 2512 | __netif_tx_unlock(txq); \ |
2513 | } \ | 2513 | } \ |
2514 | } | 2514 | } |
2515 | 2515 | ||
2516 | static inline void netif_tx_disable(struct net_device *dev) | 2516 | static inline void netif_tx_disable(struct net_device *dev) |
2517 | { | 2517 | { |
2518 | unsigned int i; | 2518 | unsigned int i; |
2519 | int cpu; | 2519 | int cpu; |
2520 | 2520 | ||
2521 | local_bh_disable(); | 2521 | local_bh_disable(); |
2522 | cpu = smp_processor_id(); | 2522 | cpu = smp_processor_id(); |
2523 | for (i = 0; i < dev->num_tx_queues; i++) { | 2523 | for (i = 0; i < dev->num_tx_queues; i++) { |
2524 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); | 2524 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); |
2525 | 2525 | ||
2526 | __netif_tx_lock(txq, cpu); | 2526 | __netif_tx_lock(txq, cpu); |
2527 | netif_tx_stop_queue(txq); | 2527 | netif_tx_stop_queue(txq); |
2528 | __netif_tx_unlock(txq); | 2528 | __netif_tx_unlock(txq); |
2529 | } | 2529 | } |
2530 | local_bh_enable(); | 2530 | local_bh_enable(); |
2531 | } | 2531 | } |
2532 | 2532 | ||
2533 | static inline void netif_addr_lock(struct net_device *dev) | 2533 | static inline void netif_addr_lock(struct net_device *dev) |
2534 | { | 2534 | { |
2535 | spin_lock(&dev->addr_list_lock); | 2535 | spin_lock(&dev->addr_list_lock); |
2536 | } | 2536 | } |
2537 | 2537 | ||
2538 | static inline void netif_addr_lock_nested(struct net_device *dev) | 2538 | static inline void netif_addr_lock_nested(struct net_device *dev) |
2539 | { | 2539 | { |
2540 | spin_lock_nested(&dev->addr_list_lock, SINGLE_DEPTH_NESTING); | 2540 | spin_lock_nested(&dev->addr_list_lock, SINGLE_DEPTH_NESTING); |
2541 | } | 2541 | } |
2542 | 2542 | ||
2543 | static inline void netif_addr_lock_bh(struct net_device *dev) | 2543 | static inline void netif_addr_lock_bh(struct net_device *dev) |
2544 | { | 2544 | { |
2545 | spin_lock_bh(&dev->addr_list_lock); | 2545 | spin_lock_bh(&dev->addr_list_lock); |
2546 | } | 2546 | } |
2547 | 2547 | ||
2548 | static inline void netif_addr_unlock(struct net_device *dev) | 2548 | static inline void netif_addr_unlock(struct net_device *dev) |
2549 | { | 2549 | { |
2550 | spin_unlock(&dev->addr_list_lock); | 2550 | spin_unlock(&dev->addr_list_lock); |
2551 | } | 2551 | } |
2552 | 2552 | ||
2553 | static inline void netif_addr_unlock_bh(struct net_device *dev) | 2553 | static inline void netif_addr_unlock_bh(struct net_device *dev) |
2554 | { | 2554 | { |
2555 | spin_unlock_bh(&dev->addr_list_lock); | 2555 | spin_unlock_bh(&dev->addr_list_lock); |
2556 | } | 2556 | } |
2557 | 2557 | ||
2558 | /* | 2558 | /* |
2559 | * dev_addrs walker. Should be used only for read access. Call with | 2559 | * dev_addrs walker. Should be used only for read access. Call with |
2560 | * rcu_read_lock held. | 2560 | * rcu_read_lock held. |
2561 | */ | 2561 | */ |
2562 | #define for_each_dev_addr(dev, ha) \ | 2562 | #define for_each_dev_addr(dev, ha) \ |
2563 | list_for_each_entry_rcu(ha, &dev->dev_addrs.list, list) | 2563 | list_for_each_entry_rcu(ha, &dev->dev_addrs.list, list) |
2564 | 2564 | ||
2565 | /* These functions live elsewhere (drivers/net/net_init.c, but related) */ | 2565 | /* These functions live elsewhere (drivers/net/net_init.c, but related) */ |
2566 | 2566 | ||
2567 | extern void ether_setup(struct net_device *dev); | 2567 | extern void ether_setup(struct net_device *dev); |
2568 | 2568 | ||
2569 | /* Support for loadable net-drivers */ | 2569 | /* Support for loadable net-drivers */ |
2570 | extern struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | 2570 | extern struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, |
2571 | void (*setup)(struct net_device *), | 2571 | void (*setup)(struct net_device *), |
2572 | unsigned int txqs, unsigned int rxqs); | 2572 | unsigned int txqs, unsigned int rxqs); |
2573 | #define alloc_netdev(sizeof_priv, name, setup) \ | 2573 | #define alloc_netdev(sizeof_priv, name, setup) \ |
2574 | alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1) | 2574 | alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1) |
2575 | 2575 | ||
2576 | #define alloc_netdev_mq(sizeof_priv, name, setup, count) \ | 2576 | #define alloc_netdev_mq(sizeof_priv, name, setup, count) \ |
2577 | alloc_netdev_mqs(sizeof_priv, name, setup, count, count) | 2577 | alloc_netdev_mqs(sizeof_priv, name, setup, count, count) |
2578 | 2578 | ||
2579 | extern int register_netdev(struct net_device *dev); | 2579 | extern int register_netdev(struct net_device *dev); |
2580 | extern void unregister_netdev(struct net_device *dev); | 2580 | extern void unregister_netdev(struct net_device *dev); |
2581 | 2581 | ||
2582 | /* General hardware address lists handling functions */ | 2582 | /* General hardware address lists handling functions */ |
2583 | extern int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, | 2583 | extern int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, |
2584 | struct netdev_hw_addr_list *from_list, | 2584 | struct netdev_hw_addr_list *from_list, |
2585 | int addr_len, unsigned char addr_type); | 2585 | int addr_len, unsigned char addr_type); |
2586 | extern void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, | 2586 | extern void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, |
2587 | struct netdev_hw_addr_list *from_list, | 2587 | struct netdev_hw_addr_list *from_list, |
2588 | int addr_len, unsigned char addr_type); | 2588 | int addr_len, unsigned char addr_type); |
2589 | extern int __hw_addr_sync(struct netdev_hw_addr_list *to_list, | 2589 | extern int __hw_addr_sync(struct netdev_hw_addr_list *to_list, |
2590 | struct netdev_hw_addr_list *from_list, | 2590 | struct netdev_hw_addr_list *from_list, |
2591 | int addr_len); | 2591 | int addr_len); |
2592 | extern void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, | 2592 | extern void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, |
2593 | struct netdev_hw_addr_list *from_list, | 2593 | struct netdev_hw_addr_list *from_list, |
2594 | int addr_len); | 2594 | int addr_len); |
2595 | extern void __hw_addr_flush(struct netdev_hw_addr_list *list); | 2595 | extern void __hw_addr_flush(struct netdev_hw_addr_list *list); |
2596 | extern void __hw_addr_init(struct netdev_hw_addr_list *list); | 2596 | extern void __hw_addr_init(struct netdev_hw_addr_list *list); |
2597 | 2597 | ||
2598 | /* Functions used for device addresses handling */ | 2598 | /* Functions used for device addresses handling */ |
2599 | extern int dev_addr_add(struct net_device *dev, const unsigned char *addr, | 2599 | extern int dev_addr_add(struct net_device *dev, const unsigned char *addr, |
2600 | unsigned char addr_type); | 2600 | unsigned char addr_type); |
2601 | extern int dev_addr_del(struct net_device *dev, const unsigned char *addr, | 2601 | extern int dev_addr_del(struct net_device *dev, const unsigned char *addr, |
2602 | unsigned char addr_type); | 2602 | unsigned char addr_type); |
2603 | extern int dev_addr_add_multiple(struct net_device *to_dev, | 2603 | extern int dev_addr_add_multiple(struct net_device *to_dev, |
2604 | struct net_device *from_dev, | 2604 | struct net_device *from_dev, |
2605 | unsigned char addr_type); | 2605 | unsigned char addr_type); |
2606 | extern int dev_addr_del_multiple(struct net_device *to_dev, | 2606 | extern int dev_addr_del_multiple(struct net_device *to_dev, |
2607 | struct net_device *from_dev, | 2607 | struct net_device *from_dev, |
2608 | unsigned char addr_type); | 2608 | unsigned char addr_type); |
2609 | extern void dev_addr_flush(struct net_device *dev); | 2609 | extern void dev_addr_flush(struct net_device *dev); |
2610 | extern int dev_addr_init(struct net_device *dev); | 2610 | extern int dev_addr_init(struct net_device *dev); |
2611 | 2611 | ||
2612 | /* Functions used for unicast addresses handling */ | 2612 | /* Functions used for unicast addresses handling */ |
2613 | extern int dev_uc_add(struct net_device *dev, const unsigned char *addr); | 2613 | extern int dev_uc_add(struct net_device *dev, const unsigned char *addr); |
2614 | extern int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr); | 2614 | extern int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr); |
2615 | extern int dev_uc_del(struct net_device *dev, const unsigned char *addr); | 2615 | extern int dev_uc_del(struct net_device *dev, const unsigned char *addr); |
2616 | extern int dev_uc_sync(struct net_device *to, struct net_device *from); | 2616 | extern int dev_uc_sync(struct net_device *to, struct net_device *from); |
2617 | extern void dev_uc_unsync(struct net_device *to, struct net_device *from); | 2617 | extern void dev_uc_unsync(struct net_device *to, struct net_device *from); |
2618 | extern void dev_uc_flush(struct net_device *dev); | 2618 | extern void dev_uc_flush(struct net_device *dev); |
2619 | extern void dev_uc_init(struct net_device *dev); | 2619 | extern void dev_uc_init(struct net_device *dev); |
2620 | 2620 | ||
2621 | /* Functions used for multicast addresses handling */ | 2621 | /* Functions used for multicast addresses handling */ |
2622 | extern int dev_mc_add(struct net_device *dev, const unsigned char *addr); | 2622 | extern int dev_mc_add(struct net_device *dev, const unsigned char *addr); |
2623 | extern int dev_mc_add_global(struct net_device *dev, const unsigned char *addr); | 2623 | extern int dev_mc_add_global(struct net_device *dev, const unsigned char *addr); |
2624 | extern int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr); | 2624 | extern int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr); |
2625 | extern int dev_mc_del(struct net_device *dev, const unsigned char *addr); | 2625 | extern int dev_mc_del(struct net_device *dev, const unsigned char *addr); |
2626 | extern int dev_mc_del_global(struct net_device *dev, const unsigned char *addr); | 2626 | extern int dev_mc_del_global(struct net_device *dev, const unsigned char *addr); |
2627 | extern int dev_mc_sync(struct net_device *to, struct net_device *from); | 2627 | extern int dev_mc_sync(struct net_device *to, struct net_device *from); |
2628 | extern void dev_mc_unsync(struct net_device *to, struct net_device *from); | 2628 | extern void dev_mc_unsync(struct net_device *to, struct net_device *from); |
2629 | extern void dev_mc_flush(struct net_device *dev); | 2629 | extern void dev_mc_flush(struct net_device *dev); |
2630 | extern void dev_mc_init(struct net_device *dev); | 2630 | extern void dev_mc_init(struct net_device *dev); |
2631 | 2631 | ||
2632 | /* Functions used for secondary unicast and multicast support */ | 2632 | /* Functions used for secondary unicast and multicast support */ |
2633 | extern void dev_set_rx_mode(struct net_device *dev); | 2633 | extern void dev_set_rx_mode(struct net_device *dev); |
2634 | extern void __dev_set_rx_mode(struct net_device *dev); | 2634 | extern void __dev_set_rx_mode(struct net_device *dev); |
2635 | extern int dev_set_promiscuity(struct net_device *dev, int inc); | 2635 | extern int dev_set_promiscuity(struct net_device *dev, int inc); |
2636 | extern int dev_set_allmulti(struct net_device *dev, int inc); | 2636 | extern int dev_set_allmulti(struct net_device *dev, int inc); |
2637 | extern void netdev_state_change(struct net_device *dev); | 2637 | extern void netdev_state_change(struct net_device *dev); |
2638 | extern void netdev_notify_peers(struct net_device *dev); | 2638 | extern void netdev_notify_peers(struct net_device *dev); |
2639 | extern void netdev_features_change(struct net_device *dev); | 2639 | extern void netdev_features_change(struct net_device *dev); |
2640 | /* Load a device via the kmod */ | 2640 | /* Load a device via the kmod */ |
2641 | extern void dev_load(struct net *net, const char *name); | 2641 | extern void dev_load(struct net *net, const char *name); |
2642 | extern void dev_mcast_init(void); | 2642 | extern void dev_mcast_init(void); |
2643 | extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, | 2643 | extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, |
2644 | struct rtnl_link_stats64 *storage); | 2644 | struct rtnl_link_stats64 *storage); |
2645 | extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, | 2645 | extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, |
2646 | const struct net_device_stats *netdev_stats); | 2646 | const struct net_device_stats *netdev_stats); |
2647 | 2647 | ||
2648 | extern int netdev_max_backlog; | 2648 | extern int netdev_max_backlog; |
2649 | extern int netdev_tstamp_prequeue; | 2649 | extern int netdev_tstamp_prequeue; |
2650 | extern int weight_p; | 2650 | extern int weight_p; |
2651 | extern int bpf_jit_enable; | 2651 | extern int bpf_jit_enable; |
2652 | 2652 | ||
2653 | extern bool netdev_has_upper_dev(struct net_device *dev, | 2653 | extern bool netdev_has_upper_dev(struct net_device *dev, |
2654 | struct net_device *upper_dev); | 2654 | struct net_device *upper_dev); |
2655 | extern bool netdev_has_any_upper_dev(struct net_device *dev); | 2655 | extern bool netdev_has_any_upper_dev(struct net_device *dev); |
2656 | extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev); | 2656 | extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev); |
2657 | extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); | 2657 | extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); |
2658 | extern int netdev_upper_dev_link(struct net_device *dev, | 2658 | extern int netdev_upper_dev_link(struct net_device *dev, |
2659 | struct net_device *upper_dev); | 2659 | struct net_device *upper_dev); |
2660 | extern int netdev_master_upper_dev_link(struct net_device *dev, | 2660 | extern int netdev_master_upper_dev_link(struct net_device *dev, |
2661 | struct net_device *upper_dev); | 2661 | struct net_device *upper_dev); |
2662 | extern void netdev_upper_dev_unlink(struct net_device *dev, | 2662 | extern void netdev_upper_dev_unlink(struct net_device *dev, |
2663 | struct net_device *upper_dev); | 2663 | struct net_device *upper_dev); |
2664 | extern int skb_checksum_help(struct sk_buff *skb); | 2664 | extern int skb_checksum_help(struct sk_buff *skb); |
2665 | extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, | 2665 | extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb, |
2666 | netdev_features_t features); | 2666 | netdev_features_t features, bool tx_path); |
2667 | |||
2668 | static inline | ||
2669 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) | ||
2670 | { | ||
2671 | return __skb_gso_segment(skb, features, true); | ||
2672 | } | ||
2673 | |||
2667 | #ifdef CONFIG_BUG | 2674 | #ifdef CONFIG_BUG |
2668 | extern void netdev_rx_csum_fault(struct net_device *dev); | 2675 | extern void netdev_rx_csum_fault(struct net_device *dev); |
2669 | #else | 2676 | #else |
2670 | static inline void netdev_rx_csum_fault(struct net_device *dev) | 2677 | static inline void netdev_rx_csum_fault(struct net_device *dev) |
2671 | { | 2678 | { |
2672 | } | 2679 | } |
2673 | #endif | 2680 | #endif |
2674 | /* rx skb timestamps */ | 2681 | /* rx skb timestamps */ |
2675 | extern void net_enable_timestamp(void); | 2682 | extern void net_enable_timestamp(void); |
2676 | extern void net_disable_timestamp(void); | 2683 | extern void net_disable_timestamp(void); |
2677 | 2684 | ||
2678 | #ifdef CONFIG_PROC_FS | 2685 | #ifdef CONFIG_PROC_FS |
2679 | extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); | 2686 | extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); |
2680 | extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); | 2687 | extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); |
2681 | extern void dev_seq_stop(struct seq_file *seq, void *v); | 2688 | extern void dev_seq_stop(struct seq_file *seq, void *v); |
2682 | #endif | 2689 | #endif |
2683 | 2690 | ||
2684 | extern int netdev_class_create_file(struct class_attribute *class_attr); | 2691 | extern int netdev_class_create_file(struct class_attribute *class_attr); |
2685 | extern void netdev_class_remove_file(struct class_attribute *class_attr); | 2692 | extern void netdev_class_remove_file(struct class_attribute *class_attr); |
2686 | 2693 | ||
2687 | extern struct kobj_ns_type_operations net_ns_type_operations; | 2694 | extern struct kobj_ns_type_operations net_ns_type_operations; |
2688 | 2695 | ||
2689 | extern const char *netdev_drivername(const struct net_device *dev); | 2696 | extern const char *netdev_drivername(const struct net_device *dev); |
2690 | 2697 | ||
2691 | extern void linkwatch_run_queue(void); | 2698 | extern void linkwatch_run_queue(void); |
2692 | 2699 | ||
2693 | static inline netdev_features_t netdev_get_wanted_features( | 2700 | static inline netdev_features_t netdev_get_wanted_features( |
2694 | struct net_device *dev) | 2701 | struct net_device *dev) |
2695 | { | 2702 | { |
2696 | return (dev->features & ~dev->hw_features) | dev->wanted_features; | 2703 | return (dev->features & ~dev->hw_features) | dev->wanted_features; |
2697 | } | 2704 | } |
2698 | netdev_features_t netdev_increment_features(netdev_features_t all, | 2705 | netdev_features_t netdev_increment_features(netdev_features_t all, |
2699 | netdev_features_t one, netdev_features_t mask); | 2706 | netdev_features_t one, netdev_features_t mask); |
2700 | int __netdev_update_features(struct net_device *dev); | 2707 | int __netdev_update_features(struct net_device *dev); |
2701 | void netdev_update_features(struct net_device *dev); | 2708 | void netdev_update_features(struct net_device *dev); |
2702 | void netdev_change_features(struct net_device *dev); | 2709 | void netdev_change_features(struct net_device *dev); |
2703 | 2710 | ||
2704 | void netif_stacked_transfer_operstate(const struct net_device *rootdev, | 2711 | void netif_stacked_transfer_operstate(const struct net_device *rootdev, |
2705 | struct net_device *dev); | 2712 | struct net_device *dev); |
2706 | 2713 | ||
2707 | netdev_features_t netif_skb_features(struct sk_buff *skb); | 2714 | netdev_features_t netif_skb_features(struct sk_buff *skb); |
2708 | 2715 | ||
2709 | static inline bool net_gso_ok(netdev_features_t features, int gso_type) | 2716 | static inline bool net_gso_ok(netdev_features_t features, int gso_type) |
2710 | { | 2717 | { |
2711 | netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT; | 2718 | netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT; |
2712 | 2719 | ||
2713 | /* check flags correspondence */ | 2720 | /* check flags correspondence */ |
2714 | BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); | 2721 | BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); |
2715 | BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT)); | 2722 | BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT)); |
2716 | BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); | 2723 | BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); |
2717 | BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); | 2724 | BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); |
2718 | BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT)); | 2725 | BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT)); |
2719 | BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); | 2726 | BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); |
2720 | 2727 | ||
2721 | return (features & feature) == feature; | 2728 | return (features & feature) == feature; |
2722 | } | 2729 | } |
2723 | 2730 | ||
2724 | static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features) | 2731 | static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features) |
2725 | { | 2732 | { |
2726 | return net_gso_ok(features, skb_shinfo(skb)->gso_type) && | 2733 | return net_gso_ok(features, skb_shinfo(skb)->gso_type) && |
2727 | (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); | 2734 | (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); |
2728 | } | 2735 | } |
2729 | 2736 | ||
2730 | static inline bool netif_needs_gso(struct sk_buff *skb, | 2737 | static inline bool netif_needs_gso(struct sk_buff *skb, |
2731 | netdev_features_t features) | 2738 | netdev_features_t features) |
2732 | { | 2739 | { |
2733 | return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || | 2740 | return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || |
2734 | unlikely((skb->ip_summed != CHECKSUM_PARTIAL) && | 2741 | unlikely((skb->ip_summed != CHECKSUM_PARTIAL) && |
2735 | (skb->ip_summed != CHECKSUM_UNNECESSARY))); | 2742 | (skb->ip_summed != CHECKSUM_UNNECESSARY))); |
2736 | } | 2743 | } |
2737 | 2744 | ||
2738 | static inline void netif_set_gso_max_size(struct net_device *dev, | 2745 | static inline void netif_set_gso_max_size(struct net_device *dev, |
2739 | unsigned int size) | 2746 | unsigned int size) |
2740 | { | 2747 | { |
2741 | dev->gso_max_size = size; | 2748 | dev->gso_max_size = size; |
2742 | } | 2749 | } |
2743 | 2750 | ||
2744 | static inline bool netif_is_bond_slave(struct net_device *dev) | 2751 | static inline bool netif_is_bond_slave(struct net_device *dev) |
2745 | { | 2752 | { |
2746 | return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; | 2753 | return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; |
2747 | } | 2754 | } |
2748 | 2755 | ||
2749 | static inline bool netif_supports_nofcs(struct net_device *dev) | 2756 | static inline bool netif_supports_nofcs(struct net_device *dev) |
2750 | { | 2757 | { |
2751 | return dev->priv_flags & IFF_SUPP_NOFCS; | 2758 | return dev->priv_flags & IFF_SUPP_NOFCS; |
2752 | } | 2759 | } |
2753 | 2760 | ||
2754 | extern struct pernet_operations __net_initdata loopback_net_ops; | 2761 | extern struct pernet_operations __net_initdata loopback_net_ops; |
2755 | 2762 | ||
2756 | /* Logging, debugging and troubleshooting/diagnostic helpers. */ | 2763 | /* Logging, debugging and troubleshooting/diagnostic helpers. */ |
2757 | 2764 | ||
2758 | /* netdev_printk helpers, similar to dev_printk */ | 2765 | /* netdev_printk helpers, similar to dev_printk */ |
2759 | 2766 | ||
2760 | static inline const char *netdev_name(const struct net_device *dev) | 2767 | static inline const char *netdev_name(const struct net_device *dev) |
2761 | { | 2768 | { |
2762 | if (dev->reg_state != NETREG_REGISTERED) | 2769 | if (dev->reg_state != NETREG_REGISTERED) |
2763 | return "(unregistered net_device)"; | 2770 | return "(unregistered net_device)"; |
2764 | return dev->name; | 2771 | return dev->name; |
2765 | } | 2772 | } |
2766 | 2773 | ||
2767 | extern __printf(3, 4) | 2774 | extern __printf(3, 4) |
2768 | int netdev_printk(const char *level, const struct net_device *dev, | 2775 | int netdev_printk(const char *level, const struct net_device *dev, |
2769 | const char *format, ...); | 2776 | const char *format, ...); |
2770 | extern __printf(2, 3) | 2777 | extern __printf(2, 3) |
2771 | int netdev_emerg(const struct net_device *dev, const char *format, ...); | 2778 | int netdev_emerg(const struct net_device *dev, const char *format, ...); |
2772 | extern __printf(2, 3) | 2779 | extern __printf(2, 3) |
2773 | int netdev_alert(const struct net_device *dev, const char *format, ...); | 2780 | int netdev_alert(const struct net_device *dev, const char *format, ...); |
2774 | extern __printf(2, 3) | 2781 | extern __printf(2, 3) |
2775 | int netdev_crit(const struct net_device *dev, const char *format, ...); | 2782 | int netdev_crit(const struct net_device *dev, const char *format, ...); |
2776 | extern __printf(2, 3) | 2783 | extern __printf(2, 3) |
2777 | int netdev_err(const struct net_device *dev, const char *format, ...); | 2784 | int netdev_err(const struct net_device *dev, const char *format, ...); |
2778 | extern __printf(2, 3) | 2785 | extern __printf(2, 3) |
2779 | int netdev_warn(const struct net_device *dev, const char *format, ...); | 2786 | int netdev_warn(const struct net_device *dev, const char *format, ...); |
2780 | extern __printf(2, 3) | 2787 | extern __printf(2, 3) |
2781 | int netdev_notice(const struct net_device *dev, const char *format, ...); | 2788 | int netdev_notice(const struct net_device *dev, const char *format, ...); |
2782 | extern __printf(2, 3) | 2789 | extern __printf(2, 3) |
2783 | int netdev_info(const struct net_device *dev, const char *format, ...); | 2790 | int netdev_info(const struct net_device *dev, const char *format, ...); |
2784 | 2791 | ||
2785 | #define MODULE_ALIAS_NETDEV(device) \ | 2792 | #define MODULE_ALIAS_NETDEV(device) \ |
2786 | MODULE_ALIAS("netdev-" device) | 2793 | MODULE_ALIAS("netdev-" device) |
2787 | 2794 | ||
2788 | #if defined(CONFIG_DYNAMIC_DEBUG) | 2795 | #if defined(CONFIG_DYNAMIC_DEBUG) |
2789 | #define netdev_dbg(__dev, format, args...) \ | 2796 | #define netdev_dbg(__dev, format, args...) \ |
2790 | do { \ | 2797 | do { \ |
2791 | dynamic_netdev_dbg(__dev, format, ##args); \ | 2798 | dynamic_netdev_dbg(__dev, format, ##args); \ |
2792 | } while (0) | 2799 | } while (0) |
2793 | #elif defined(DEBUG) | 2800 | #elif defined(DEBUG) |
2794 | #define netdev_dbg(__dev, format, args...) \ | 2801 | #define netdev_dbg(__dev, format, args...) \ |
2795 | netdev_printk(KERN_DEBUG, __dev, format, ##args) | 2802 | netdev_printk(KERN_DEBUG, __dev, format, ##args) |
2796 | #else | 2803 | #else |
2797 | #define netdev_dbg(__dev, format, args...) \ | 2804 | #define netdev_dbg(__dev, format, args...) \ |
2798 | ({ \ | 2805 | ({ \ |
2799 | if (0) \ | 2806 | if (0) \ |
2800 | netdev_printk(KERN_DEBUG, __dev, format, ##args); \ | 2807 | netdev_printk(KERN_DEBUG, __dev, format, ##args); \ |
2801 | 0; \ | 2808 | 0; \ |
2802 | }) | 2809 | }) |
2803 | #endif | 2810 | #endif |
2804 | 2811 | ||
2805 | #if defined(VERBOSE_DEBUG) | 2812 | #if defined(VERBOSE_DEBUG) |
2806 | #define netdev_vdbg netdev_dbg | 2813 | #define netdev_vdbg netdev_dbg |
2807 | #else | 2814 | #else |
2808 | 2815 | ||
2809 | #define netdev_vdbg(dev, format, args...) \ | 2816 | #define netdev_vdbg(dev, format, args...) \ |
2810 | ({ \ | 2817 | ({ \ |
2811 | if (0) \ | 2818 | if (0) \ |
2812 | netdev_printk(KERN_DEBUG, dev, format, ##args); \ | 2819 | netdev_printk(KERN_DEBUG, dev, format, ##args); \ |
2813 | 0; \ | 2820 | 0; \ |
2814 | }) | 2821 | }) |
2815 | #endif | 2822 | #endif |
2816 | 2823 | ||
2817 | /* | 2824 | /* |
2818 | * netdev_WARN() acts like dev_printk(), but with the key difference | 2825 | * netdev_WARN() acts like dev_printk(), but with the key difference |
2819 | * of using a WARN/WARN_ON to get the message out, including the | 2826 | * of using a WARN/WARN_ON to get the message out, including the |
2820 | * file/line information and a backtrace. | 2827 | * file/line information and a backtrace. |
2821 | */ | 2828 | */ |
2822 | #define netdev_WARN(dev, format, args...) \ | 2829 | #define netdev_WARN(dev, format, args...) \ |
2823 | WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args); | 2830 | WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args); |
2824 | 2831 | ||
2825 | /* netif printk helpers, similar to netdev_printk */ | 2832 | /* netif printk helpers, similar to netdev_printk */ |
2826 | 2833 | ||
2827 | #define netif_printk(priv, type, level, dev, fmt, args...) \ | 2834 | #define netif_printk(priv, type, level, dev, fmt, args...) \ |
2828 | do { \ | 2835 | do { \ |
2829 | if (netif_msg_##type(priv)) \ | 2836 | if (netif_msg_##type(priv)) \ |
2830 | netdev_printk(level, (dev), fmt, ##args); \ | 2837 | netdev_printk(level, (dev), fmt, ##args); \ |
2831 | } while (0) | 2838 | } while (0) |
2832 | 2839 | ||
2833 | #define netif_level(level, priv, type, dev, fmt, args...) \ | 2840 | #define netif_level(level, priv, type, dev, fmt, args...) \ |
2834 | do { \ | 2841 | do { \ |
2835 | if (netif_msg_##type(priv)) \ | 2842 | if (netif_msg_##type(priv)) \ |
2836 | netdev_##level(dev, fmt, ##args); \ | 2843 | netdev_##level(dev, fmt, ##args); \ |
2837 | } while (0) | 2844 | } while (0) |
2838 | 2845 | ||
2839 | #define netif_emerg(priv, type, dev, fmt, args...) \ | 2846 | #define netif_emerg(priv, type, dev, fmt, args...) \ |
2840 | netif_level(emerg, priv, type, dev, fmt, ##args) | 2847 | netif_level(emerg, priv, type, dev, fmt, ##args) |
2841 | #define netif_alert(priv, type, dev, fmt, args...) \ | 2848 | #define netif_alert(priv, type, dev, fmt, args...) \ |
2842 | netif_level(alert, priv, type, dev, fmt, ##args) | 2849 | netif_level(alert, priv, type, dev, fmt, ##args) |
2843 | #define netif_crit(priv, type, dev, fmt, args...) \ | 2850 | #define netif_crit(priv, type, dev, fmt, args...) \ |
2844 | netif_level(crit, priv, type, dev, fmt, ##args) | 2851 | netif_level(crit, priv, type, dev, fmt, ##args) |
2845 | #define netif_err(priv, type, dev, fmt, args...) \ | 2852 | #define netif_err(priv, type, dev, fmt, args...) \ |
2846 | netif_level(err, priv, type, dev, fmt, ##args) | 2853 | netif_level(err, priv, type, dev, fmt, ##args) |
2847 | #define netif_warn(priv, type, dev, fmt, args...) \ | 2854 | #define netif_warn(priv, type, dev, fmt, args...) \ |
2848 | netif_level(warn, priv, type, dev, fmt, ##args) | 2855 | netif_level(warn, priv, type, dev, fmt, ##args) |
2849 | #define netif_notice(priv, type, dev, fmt, args...) \ | 2856 | #define netif_notice(priv, type, dev, fmt, args...) \ |
2850 | netif_level(notice, priv, type, dev, fmt, ##args) | 2857 | netif_level(notice, priv, type, dev, fmt, ##args) |
2851 | #define netif_info(priv, type, dev, fmt, args...) \ | 2858 | #define netif_info(priv, type, dev, fmt, args...) \ |
2852 | netif_level(info, priv, type, dev, fmt, ##args) | 2859 | netif_level(info, priv, type, dev, fmt, ##args) |
2853 | 2860 | ||
2854 | #if defined(CONFIG_DYNAMIC_DEBUG) | 2861 | #if defined(CONFIG_DYNAMIC_DEBUG) |
2855 | #define netif_dbg(priv, type, netdev, format, args...) \ | 2862 | #define netif_dbg(priv, type, netdev, format, args...) \ |
2856 | do { \ | 2863 | do { \ |
2857 | if (netif_msg_##type(priv)) \ | 2864 | if (netif_msg_##type(priv)) \ |
2858 | dynamic_netdev_dbg(netdev, format, ##args); \ | 2865 | dynamic_netdev_dbg(netdev, format, ##args); \ |
2859 | } while (0) | 2866 | } while (0) |
2860 | #elif defined(DEBUG) | 2867 | #elif defined(DEBUG) |
2861 | #define netif_dbg(priv, type, dev, format, args...) \ | 2868 | #define netif_dbg(priv, type, dev, format, args...) \ |
2862 | netif_printk(priv, type, KERN_DEBUG, dev, format, ##args) | 2869 | netif_printk(priv, type, KERN_DEBUG, dev, format, ##args) |
2863 | #else | 2870 | #else |
2864 | #define netif_dbg(priv, type, dev, format, args...) \ | 2871 | #define netif_dbg(priv, type, dev, format, args...) \ |
2865 | ({ \ | 2872 | ({ \ |
2866 | if (0) \ | 2873 | if (0) \ |
2867 | netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ | 2874 | netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ |
2868 | 0; \ | 2875 | 0; \ |
2869 | }) | 2876 | }) |
2870 | #endif | 2877 | #endif |
2871 | 2878 | ||
2872 | #if defined(VERBOSE_DEBUG) | 2879 | #if defined(VERBOSE_DEBUG) |
2873 | #define netif_vdbg netif_dbg | 2880 | #define netif_vdbg netif_dbg |
2874 | #else | 2881 | #else |
2875 | #define netif_vdbg(priv, type, dev, format, args...) \ | 2882 | #define netif_vdbg(priv, type, dev, format, args...) \ |
2876 | ({ \ | 2883 | ({ \ |
2877 | if (0) \ | 2884 | if (0) \ |
2878 | netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ | 2885 | netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ |
2879 | 0; \ | 2886 | 0; \ |
2880 | }) | 2887 | }) |
2881 | #endif | 2888 | #endif |
2882 | 2889 | ||
2883 | #endif /* _LINUX_NETDEVICE_H */ | 2890 | #endif /* _LINUX_NETDEVICE_H */ |
2884 | 2891 |
net/core/dev.c
1 | /* | 1 | /* |
2 | * NET3 Protocol independent device support routines. | 2 | * NET3 Protocol independent device support routines. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public License | 5 | * modify it under the terms of the GNU General Public License |
6 | * as published by the Free Software Foundation; either version | 6 | * as published by the Free Software Foundation; either version |
7 | * 2 of the License, or (at your option) any later version. | 7 | * 2 of the License, or (at your option) any later version. |
8 | * | 8 | * |
9 | * Derived from the non IP parts of dev.c 1.0.19 | 9 | * Derived from the non IP parts of dev.c 1.0.19 |
10 | * Authors: Ross Biro | 10 | * Authors: Ross Biro |
11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
12 | * Mark Evans, <evansmp@uhura.aston.ac.uk> | 12 | * Mark Evans, <evansmp@uhura.aston.ac.uk> |
13 | * | 13 | * |
14 | * Additional Authors: | 14 | * Additional Authors: |
15 | * Florian la Roche <rzsfl@rz.uni-sb.de> | 15 | * Florian la Roche <rzsfl@rz.uni-sb.de> |
16 | * Alan Cox <gw4pts@gw4pts.ampr.org> | 16 | * Alan Cox <gw4pts@gw4pts.ampr.org> |
17 | * David Hinds <dahinds@users.sourceforge.net> | 17 | * David Hinds <dahinds@users.sourceforge.net> |
18 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> | 18 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> |
19 | * Adam Sulmicki <adam@cfar.umd.edu> | 19 | * Adam Sulmicki <adam@cfar.umd.edu> |
20 | * Pekka Riikonen <priikone@poesidon.pspt.fi> | 20 | * Pekka Riikonen <priikone@poesidon.pspt.fi> |
21 | * | 21 | * |
22 | * Changes: | 22 | * Changes: |
23 | * D.J. Barrow : Fixed bug where dev->refcnt gets set | 23 | * D.J. Barrow : Fixed bug where dev->refcnt gets set |
24 | * to 2 if register_netdev gets called | 24 | * to 2 if register_netdev gets called |
25 | * before net_dev_init & also removed a | 25 | * before net_dev_init & also removed a |
26 | * few lines of code in the process. | 26 | * few lines of code in the process. |
27 | * Alan Cox : device private ioctl copies fields back. | 27 | * Alan Cox : device private ioctl copies fields back. |
28 | * Alan Cox : Transmit queue code does relevant | 28 | * Alan Cox : Transmit queue code does relevant |
29 | * stunts to keep the queue safe. | 29 | * stunts to keep the queue safe. |
30 | * Alan Cox : Fixed double lock. | 30 | * Alan Cox : Fixed double lock. |
31 | * Alan Cox : Fixed promisc NULL pointer trap | 31 | * Alan Cox : Fixed promisc NULL pointer trap |
32 | * ???????? : Support the full private ioctl range | 32 | * ???????? : Support the full private ioctl range |
33 | * Alan Cox : Moved ioctl permission check into | 33 | * Alan Cox : Moved ioctl permission check into |
34 | * drivers | 34 | * drivers |
35 | * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI | 35 | * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI |
36 | * Alan Cox : 100 backlog just doesn't cut it when | 36 | * Alan Cox : 100 backlog just doesn't cut it when |
37 | * you start doing multicast video 8) | 37 | * you start doing multicast video 8) |
38 | * Alan Cox : Rewrote net_bh and list manager. | 38 | * Alan Cox : Rewrote net_bh and list manager. |
39 | * Alan Cox : Fix ETH_P_ALL echoback lengths. | 39 | * Alan Cox : Fix ETH_P_ALL echoback lengths. |
40 | * Alan Cox : Took out transmit every packet pass | 40 | * Alan Cox : Took out transmit every packet pass |
41 | * Saved a few bytes in the ioctl handler | 41 | * Saved a few bytes in the ioctl handler |
42 | * Alan Cox : Network driver sets packet type before | 42 | * Alan Cox : Network driver sets packet type before |
43 | * calling netif_rx. Saves a function | 43 | * calling netif_rx. Saves a function |
44 | * call a packet. | 44 | * call a packet. |
45 | * Alan Cox : Hashed net_bh() | 45 | * Alan Cox : Hashed net_bh() |
46 | * Richard Kooijman: Timestamp fixes. | 46 | * Richard Kooijman: Timestamp fixes. |
47 | * Alan Cox : Wrong field in SIOCGIFDSTADDR | 47 | * Alan Cox : Wrong field in SIOCGIFDSTADDR |
48 | * Alan Cox : Device lock protection. | 48 | * Alan Cox : Device lock protection. |
49 | * Alan Cox : Fixed nasty side effect of device close | 49 | * Alan Cox : Fixed nasty side effect of device close |
50 | * changes. | 50 | * changes. |
51 | * Rudi Cilibrasi : Pass the right thing to | 51 | * Rudi Cilibrasi : Pass the right thing to |
52 | * set_mac_address() | 52 | * set_mac_address() |
53 | * Dave Miller : 32bit quantity for the device lock to | 53 | * Dave Miller : 32bit quantity for the device lock to |
54 | * make it work out on a Sparc. | 54 | * make it work out on a Sparc. |
55 | * Bjorn Ekwall : Added KERNELD hack. | 55 | * Bjorn Ekwall : Added KERNELD hack. |
56 | * Alan Cox : Cleaned up the backlog initialise. | 56 | * Alan Cox : Cleaned up the backlog initialise. |
57 | * Craig Metz : SIOCGIFCONF fix if space for under | 57 | * Craig Metz : SIOCGIFCONF fix if space for under |
58 | * 1 device. | 58 | * 1 device. |
59 | * Thomas Bogendoerfer : Return ENODEV for dev_open, if there | 59 | * Thomas Bogendoerfer : Return ENODEV for dev_open, if there |
60 | * is no device open function. | 60 | * is no device open function. |
61 | * Andi Kleen : Fix error reporting for SIOCGIFCONF | 61 | * Andi Kleen : Fix error reporting for SIOCGIFCONF |
62 | * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF | 62 | * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF |
63 | * Cyrus Durgin : Cleaned for KMOD | 63 | * Cyrus Durgin : Cleaned for KMOD |
64 | * Adam Sulmicki : Bug Fix : Network Device Unload | 64 | * Adam Sulmicki : Bug Fix : Network Device Unload |
65 | * A network device unload needs to purge | 65 | * A network device unload needs to purge |
66 | * the backlog queue. | 66 | * the backlog queue. |
67 | * Paul Rusty Russell : SIOCSIFNAME | 67 | * Paul Rusty Russell : SIOCSIFNAME |
68 | * Pekka Riikonen : Netdev boot-time settings code | 68 | * Pekka Riikonen : Netdev boot-time settings code |
69 | * Andrew Morton : Make unregister_netdevice wait | 69 | * Andrew Morton : Make unregister_netdevice wait |
70 | * indefinitely on dev->refcnt | 70 | * indefinitely on dev->refcnt |
71 | * J Hadi Salim : - Backlog queue sampling | 71 | * J Hadi Salim : - Backlog queue sampling |
72 | * - netif_rx() feedback | 72 | * - netif_rx() feedback |
73 | */ | 73 | */ |
74 | 74 | ||
75 | #include <asm/uaccess.h> | 75 | #include <asm/uaccess.h> |
76 | #include <linux/bitops.h> | 76 | #include <linux/bitops.h> |
77 | #include <linux/capability.h> | 77 | #include <linux/capability.h> |
78 | #include <linux/cpu.h> | 78 | #include <linux/cpu.h> |
79 | #include <linux/types.h> | 79 | #include <linux/types.h> |
80 | #include <linux/kernel.h> | 80 | #include <linux/kernel.h> |
81 | #include <linux/hash.h> | 81 | #include <linux/hash.h> |
82 | #include <linux/slab.h> | 82 | #include <linux/slab.h> |
83 | #include <linux/sched.h> | 83 | #include <linux/sched.h> |
84 | #include <linux/mutex.h> | 84 | #include <linux/mutex.h> |
85 | #include <linux/string.h> | 85 | #include <linux/string.h> |
86 | #include <linux/mm.h> | 86 | #include <linux/mm.h> |
87 | #include <linux/socket.h> | 87 | #include <linux/socket.h> |
88 | #include <linux/sockios.h> | 88 | #include <linux/sockios.h> |
89 | #include <linux/errno.h> | 89 | #include <linux/errno.h> |
90 | #include <linux/interrupt.h> | 90 | #include <linux/interrupt.h> |
91 | #include <linux/if_ether.h> | 91 | #include <linux/if_ether.h> |
92 | #include <linux/netdevice.h> | 92 | #include <linux/netdevice.h> |
93 | #include <linux/etherdevice.h> | 93 | #include <linux/etherdevice.h> |
94 | #include <linux/ethtool.h> | 94 | #include <linux/ethtool.h> |
95 | #include <linux/notifier.h> | 95 | #include <linux/notifier.h> |
96 | #include <linux/skbuff.h> | 96 | #include <linux/skbuff.h> |
97 | #include <net/net_namespace.h> | 97 | #include <net/net_namespace.h> |
98 | #include <net/sock.h> | 98 | #include <net/sock.h> |
99 | #include <linux/rtnetlink.h> | 99 | #include <linux/rtnetlink.h> |
100 | #include <linux/proc_fs.h> | 100 | #include <linux/proc_fs.h> |
101 | #include <linux/seq_file.h> | 101 | #include <linux/seq_file.h> |
102 | #include <linux/stat.h> | 102 | #include <linux/stat.h> |
103 | #include <net/dst.h> | 103 | #include <net/dst.h> |
104 | #include <net/pkt_sched.h> | 104 | #include <net/pkt_sched.h> |
105 | #include <net/checksum.h> | 105 | #include <net/checksum.h> |
106 | #include <net/xfrm.h> | 106 | #include <net/xfrm.h> |
107 | #include <linux/highmem.h> | 107 | #include <linux/highmem.h> |
108 | #include <linux/init.h> | 108 | #include <linux/init.h> |
109 | #include <linux/kmod.h> | 109 | #include <linux/kmod.h> |
110 | #include <linux/module.h> | 110 | #include <linux/module.h> |
111 | #include <linux/netpoll.h> | 111 | #include <linux/netpoll.h> |
112 | #include <linux/rcupdate.h> | 112 | #include <linux/rcupdate.h> |
113 | #include <linux/delay.h> | 113 | #include <linux/delay.h> |
114 | #include <net/wext.h> | 114 | #include <net/wext.h> |
115 | #include <net/iw_handler.h> | 115 | #include <net/iw_handler.h> |
116 | #include <asm/current.h> | 116 | #include <asm/current.h> |
117 | #include <linux/audit.h> | 117 | #include <linux/audit.h> |
118 | #include <linux/dmaengine.h> | 118 | #include <linux/dmaengine.h> |
119 | #include <linux/err.h> | 119 | #include <linux/err.h> |
120 | #include <linux/ctype.h> | 120 | #include <linux/ctype.h> |
121 | #include <linux/if_arp.h> | 121 | #include <linux/if_arp.h> |
122 | #include <linux/if_vlan.h> | 122 | #include <linux/if_vlan.h> |
123 | #include <linux/ip.h> | 123 | #include <linux/ip.h> |
124 | #include <net/ip.h> | 124 | #include <net/ip.h> |
125 | #include <linux/ipv6.h> | 125 | #include <linux/ipv6.h> |
126 | #include <linux/in.h> | 126 | #include <linux/in.h> |
127 | #include <linux/jhash.h> | 127 | #include <linux/jhash.h> |
128 | #include <linux/random.h> | 128 | #include <linux/random.h> |
129 | #include <trace/events/napi.h> | 129 | #include <trace/events/napi.h> |
130 | #include <trace/events/net.h> | 130 | #include <trace/events/net.h> |
131 | #include <trace/events/skb.h> | 131 | #include <trace/events/skb.h> |
132 | #include <linux/pci.h> | 132 | #include <linux/pci.h> |
133 | #include <linux/inetdevice.h> | 133 | #include <linux/inetdevice.h> |
134 | #include <linux/cpu_rmap.h> | 134 | #include <linux/cpu_rmap.h> |
135 | #include <linux/net_tstamp.h> | 135 | #include <linux/net_tstamp.h> |
136 | #include <linux/static_key.h> | 136 | #include <linux/static_key.h> |
137 | 137 | ||
138 | #include "net-sysfs.h" | 138 | #include "net-sysfs.h" |
139 | 139 | ||
140 | /* Instead of increasing this, you should create a hash table. */ | 140 | /* Instead of increasing this, you should create a hash table. */ |
141 | #define MAX_GRO_SKBS 8 | 141 | #define MAX_GRO_SKBS 8 |
142 | 142 | ||
143 | /* This should be increased if a protocol with a bigger head is added. */ | 143 | /* This should be increased if a protocol with a bigger head is added. */ |
144 | #define GRO_MAX_HEAD (MAX_HEADER + 128) | 144 | #define GRO_MAX_HEAD (MAX_HEADER + 128) |
145 | 145 | ||
146 | /* | 146 | /* |
147 | * The list of packet types we will receive (as opposed to discard) | 147 | * The list of packet types we will receive (as opposed to discard) |
148 | * and the routines to invoke. | 148 | * and the routines to invoke. |
149 | * | 149 | * |
150 | * Why 16. Because with 16 the only overlap we get on a hash of the | 150 | * Why 16. Because with 16 the only overlap we get on a hash of the |
151 | * low nibble of the protocol value is RARP/SNAP/X.25. | 151 | * low nibble of the protocol value is RARP/SNAP/X.25. |
152 | * | 152 | * |
153 | * NOTE: That is no longer true with the addition of VLAN tags. Not | 153 | * NOTE: That is no longer true with the addition of VLAN tags. Not |
154 | * sure which should go first, but I bet it won't make much | 154 | * sure which should go first, but I bet it won't make much |
155 | * difference if we are running VLANs. The good news is that | 155 | * difference if we are running VLANs. The good news is that |
156 | * this protocol won't be in the list unless compiled in, so | 156 | * this protocol won't be in the list unless compiled in, so |
157 | * the average user (w/out VLANs) will not be adversely affected. | 157 | * the average user (w/out VLANs) will not be adversely affected. |
158 | * --BLG | 158 | * --BLG |
159 | * | 159 | * |
160 | * 0800 IP | 160 | * 0800 IP |
161 | * 8100 802.1Q VLAN | 161 | * 8100 802.1Q VLAN |
162 | * 0001 802.3 | 162 | * 0001 802.3 |
163 | * 0002 AX.25 | 163 | * 0002 AX.25 |
164 | * 0004 802.2 | 164 | * 0004 802.2 |
165 | * 8035 RARP | 165 | * 8035 RARP |
166 | * 0005 SNAP | 166 | * 0005 SNAP |
167 | * 0805 X.25 | 167 | * 0805 X.25 |
168 | * 0806 ARP | 168 | * 0806 ARP |
169 | * 8137 IPX | 169 | * 8137 IPX |
170 | * 0009 Localtalk | 170 | * 0009 Localtalk |
171 | * 86DD IPv6 | 171 | * 86DD IPv6 |
172 | */ | 172 | */ |
173 | 173 | ||
174 | #define PTYPE_HASH_SIZE (16) | 174 | #define PTYPE_HASH_SIZE (16) |
175 | #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) | 175 | #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) |
176 | 176 | ||
177 | static DEFINE_SPINLOCK(ptype_lock); | 177 | static DEFINE_SPINLOCK(ptype_lock); |
178 | static DEFINE_SPINLOCK(offload_lock); | 178 | static DEFINE_SPINLOCK(offload_lock); |
179 | static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; | 179 | static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; |
180 | static struct list_head ptype_all __read_mostly; /* Taps */ | 180 | static struct list_head ptype_all __read_mostly; /* Taps */ |
181 | static struct list_head offload_base __read_mostly; | 181 | static struct list_head offload_base __read_mostly; |
182 | 182 | ||
183 | /* | 183 | /* |
184 | * The @dev_base_head list is protected by @dev_base_lock and the rtnl | 184 | * The @dev_base_head list is protected by @dev_base_lock and the rtnl |
185 | * semaphore. | 185 | * semaphore. |
186 | * | 186 | * |
187 | * Pure readers hold dev_base_lock for reading, or rcu_read_lock() | 187 | * Pure readers hold dev_base_lock for reading, or rcu_read_lock() |
188 | * | 188 | * |
189 | * Writers must hold the rtnl semaphore while they loop through the | 189 | * Writers must hold the rtnl semaphore while they loop through the |
190 | * dev_base_head list, and hold dev_base_lock for writing when they do the | 190 | * dev_base_head list, and hold dev_base_lock for writing when they do the |
191 | * actual updates. This allows pure readers to access the list even | 191 | * actual updates. This allows pure readers to access the list even |
192 | * while a writer is preparing to update it. | 192 | * while a writer is preparing to update it. |
193 | * | 193 | * |
194 | * To put it another way, dev_base_lock is held for writing only to | 194 | * To put it another way, dev_base_lock is held for writing only to |
195 | * protect against pure readers; the rtnl semaphore provides the | 195 | * protect against pure readers; the rtnl semaphore provides the |
196 | * protection against other writers. | 196 | * protection against other writers. |
197 | * | 197 | * |
198 | * See, for example usages, register_netdevice() and | 198 | * See, for example usages, register_netdevice() and |
199 | * unregister_netdevice(), which must be called with the rtnl | 199 | * unregister_netdevice(), which must be called with the rtnl |
200 | * semaphore held. | 200 | * semaphore held. |
201 | */ | 201 | */ |
202 | DEFINE_RWLOCK(dev_base_lock); | 202 | DEFINE_RWLOCK(dev_base_lock); |
203 | EXPORT_SYMBOL(dev_base_lock); | 203 | EXPORT_SYMBOL(dev_base_lock); |
204 | 204 | ||
205 | seqcount_t devnet_rename_seq; | 205 | seqcount_t devnet_rename_seq; |
206 | 206 | ||
207 | static inline void dev_base_seq_inc(struct net *net) | 207 | static inline void dev_base_seq_inc(struct net *net) |
208 | { | 208 | { |
209 | while (++net->dev_base_seq == 0); | 209 | while (++net->dev_base_seq == 0); |
210 | } | 210 | } |
211 | 211 | ||
212 | static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) | 212 | static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) |
213 | { | 213 | { |
214 | unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); | 214 | unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); |
215 | 215 | ||
216 | return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)]; | 216 | return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)]; |
217 | } | 217 | } |
218 | 218 | ||
219 | static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) | 219 | static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) |
220 | { | 220 | { |
221 | return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; | 221 | return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; |
222 | } | 222 | } |
223 | 223 | ||
224 | static inline void rps_lock(struct softnet_data *sd) | 224 | static inline void rps_lock(struct softnet_data *sd) |
225 | { | 225 | { |
226 | #ifdef CONFIG_RPS | 226 | #ifdef CONFIG_RPS |
227 | spin_lock(&sd->input_pkt_queue.lock); | 227 | spin_lock(&sd->input_pkt_queue.lock); |
228 | #endif | 228 | #endif |
229 | } | 229 | } |
230 | 230 | ||
231 | static inline void rps_unlock(struct softnet_data *sd) | 231 | static inline void rps_unlock(struct softnet_data *sd) |
232 | { | 232 | { |
233 | #ifdef CONFIG_RPS | 233 | #ifdef CONFIG_RPS |
234 | spin_unlock(&sd->input_pkt_queue.lock); | 234 | spin_unlock(&sd->input_pkt_queue.lock); |
235 | #endif | 235 | #endif |
236 | } | 236 | } |
237 | 237 | ||
238 | /* Device list insertion */ | 238 | /* Device list insertion */ |
239 | static int list_netdevice(struct net_device *dev) | 239 | static int list_netdevice(struct net_device *dev) |
240 | { | 240 | { |
241 | struct net *net = dev_net(dev); | 241 | struct net *net = dev_net(dev); |
242 | 242 | ||
243 | ASSERT_RTNL(); | 243 | ASSERT_RTNL(); |
244 | 244 | ||
245 | write_lock_bh(&dev_base_lock); | 245 | write_lock_bh(&dev_base_lock); |
246 | list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); | 246 | list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); |
247 | hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); | 247 | hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); |
248 | hlist_add_head_rcu(&dev->index_hlist, | 248 | hlist_add_head_rcu(&dev->index_hlist, |
249 | dev_index_hash(net, dev->ifindex)); | 249 | dev_index_hash(net, dev->ifindex)); |
250 | write_unlock_bh(&dev_base_lock); | 250 | write_unlock_bh(&dev_base_lock); |
251 | 251 | ||
252 | dev_base_seq_inc(net); | 252 | dev_base_seq_inc(net); |
253 | 253 | ||
254 | return 0; | 254 | return 0; |
255 | } | 255 | } |
256 | 256 | ||
257 | /* Device list removal | 257 | /* Device list removal |
258 | * caller must respect a RCU grace period before freeing/reusing dev | 258 | * caller must respect a RCU grace period before freeing/reusing dev |
259 | */ | 259 | */ |
260 | static void unlist_netdevice(struct net_device *dev) | 260 | static void unlist_netdevice(struct net_device *dev) |
261 | { | 261 | { |
262 | ASSERT_RTNL(); | 262 | ASSERT_RTNL(); |
263 | 263 | ||
264 | /* Unlink dev from the device chain */ | 264 | /* Unlink dev from the device chain */ |
265 | write_lock_bh(&dev_base_lock); | 265 | write_lock_bh(&dev_base_lock); |
266 | list_del_rcu(&dev->dev_list); | 266 | list_del_rcu(&dev->dev_list); |
267 | hlist_del_rcu(&dev->name_hlist); | 267 | hlist_del_rcu(&dev->name_hlist); |
268 | hlist_del_rcu(&dev->index_hlist); | 268 | hlist_del_rcu(&dev->index_hlist); |
269 | write_unlock_bh(&dev_base_lock); | 269 | write_unlock_bh(&dev_base_lock); |
270 | 270 | ||
271 | dev_base_seq_inc(dev_net(dev)); | 271 | dev_base_seq_inc(dev_net(dev)); |
272 | } | 272 | } |
273 | 273 | ||
274 | /* | 274 | /* |
275 | * Our notifier list | 275 | * Our notifier list |
276 | */ | 276 | */ |
277 | 277 | ||
278 | static RAW_NOTIFIER_HEAD(netdev_chain); | 278 | static RAW_NOTIFIER_HEAD(netdev_chain); |
279 | 279 | ||
280 | /* | 280 | /* |
281 | * Device drivers call our routines to queue packets here. We empty the | 281 | * Device drivers call our routines to queue packets here. We empty the |
282 | * queue in the local softnet handler. | 282 | * queue in the local softnet handler. |
283 | */ | 283 | */ |
284 | 284 | ||
285 | DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); | 285 | DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); |
286 | EXPORT_PER_CPU_SYMBOL(softnet_data); | 286 | EXPORT_PER_CPU_SYMBOL(softnet_data); |
287 | 287 | ||
288 | #ifdef CONFIG_LOCKDEP | 288 | #ifdef CONFIG_LOCKDEP |
289 | /* | 289 | /* |
290 | * register_netdevice() inits txq->_xmit_lock and sets lockdep class | 290 | * register_netdevice() inits txq->_xmit_lock and sets lockdep class |
291 | * according to dev->type | 291 | * according to dev->type |
292 | */ | 292 | */ |
293 | static const unsigned short netdev_lock_type[] = | 293 | static const unsigned short netdev_lock_type[] = |
294 | {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, | 294 | {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, |
295 | ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, | 295 | ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, |
296 | ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, | 296 | ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, |
297 | ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, | 297 | ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, |
298 | ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, | 298 | ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, |
299 | ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, | 299 | ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, |
300 | ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, | 300 | ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, |
301 | ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, | 301 | ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, |
302 | ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, | 302 | ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, |
303 | ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, | 303 | ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, |
304 | ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, | 304 | ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, |
305 | ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, | 305 | ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, |
306 | ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, | 306 | ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, |
307 | ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE, | 307 | ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE, |
308 | ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; | 308 | ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; |
309 | 309 | ||
310 | static const char *const netdev_lock_name[] = | 310 | static const char *const netdev_lock_name[] = |
311 | {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", | 311 | {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", |
312 | "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", | 312 | "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", |
313 | "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", | 313 | "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", |
314 | "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", | 314 | "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", |
315 | "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", | 315 | "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", |
316 | "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", | 316 | "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", |
317 | "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", | 317 | "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", |
318 | "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", | 318 | "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", |
319 | "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", | 319 | "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", |
320 | "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", | 320 | "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", |
321 | "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", | 321 | "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", |
322 | "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", | 322 | "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", |
323 | "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", | 323 | "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", |
324 | "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE", | 324 | "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE", |
325 | "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; | 325 | "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; |
326 | 326 | ||
327 | static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; | 327 | static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; |
328 | static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; | 328 | static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; |
329 | 329 | ||
330 | static inline unsigned short netdev_lock_pos(unsigned short dev_type) | 330 | static inline unsigned short netdev_lock_pos(unsigned short dev_type) |
331 | { | 331 | { |
332 | int i; | 332 | int i; |
333 | 333 | ||
334 | for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) | 334 | for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) |
335 | if (netdev_lock_type[i] == dev_type) | 335 | if (netdev_lock_type[i] == dev_type) |
336 | return i; | 336 | return i; |
337 | /* the last key is used by default */ | 337 | /* the last key is used by default */ |
338 | return ARRAY_SIZE(netdev_lock_type) - 1; | 338 | return ARRAY_SIZE(netdev_lock_type) - 1; |
339 | } | 339 | } |
340 | 340 | ||
341 | static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, | 341 | static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, |
342 | unsigned short dev_type) | 342 | unsigned short dev_type) |
343 | { | 343 | { |
344 | int i; | 344 | int i; |
345 | 345 | ||
346 | i = netdev_lock_pos(dev_type); | 346 | i = netdev_lock_pos(dev_type); |
347 | lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], | 347 | lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], |
348 | netdev_lock_name[i]); | 348 | netdev_lock_name[i]); |
349 | } | 349 | } |
350 | 350 | ||
351 | static inline void netdev_set_addr_lockdep_class(struct net_device *dev) | 351 | static inline void netdev_set_addr_lockdep_class(struct net_device *dev) |
352 | { | 352 | { |
353 | int i; | 353 | int i; |
354 | 354 | ||
355 | i = netdev_lock_pos(dev->type); | 355 | i = netdev_lock_pos(dev->type); |
356 | lockdep_set_class_and_name(&dev->addr_list_lock, | 356 | lockdep_set_class_and_name(&dev->addr_list_lock, |
357 | &netdev_addr_lock_key[i], | 357 | &netdev_addr_lock_key[i], |
358 | netdev_lock_name[i]); | 358 | netdev_lock_name[i]); |
359 | } | 359 | } |
360 | #else | 360 | #else |
361 | static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, | 361 | static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, |
362 | unsigned short dev_type) | 362 | unsigned short dev_type) |
363 | { | 363 | { |
364 | } | 364 | } |
365 | static inline void netdev_set_addr_lockdep_class(struct net_device *dev) | 365 | static inline void netdev_set_addr_lockdep_class(struct net_device *dev) |
366 | { | 366 | { |
367 | } | 367 | } |
368 | #endif | 368 | #endif |
369 | 369 | ||
370 | /******************************************************************************* | 370 | /******************************************************************************* |
371 | 371 | ||
372 | Protocol management and registration routines | 372 | Protocol management and registration routines |
373 | 373 | ||
374 | *******************************************************************************/ | 374 | *******************************************************************************/ |
375 | 375 | ||
376 | /* | 376 | /* |
377 | * Add a protocol ID to the list. Now that the input handler is | 377 | * Add a protocol ID to the list. Now that the input handler is |
378 | * smarter we can dispense with all the messy stuff that used to be | 378 | * smarter we can dispense with all the messy stuff that used to be |
379 | * here. | 379 | * here. |
380 | * | 380 | * |
381 | * BEWARE!!! Protocol handlers, mangling input packets, | 381 | * BEWARE!!! Protocol handlers, mangling input packets, |
382 | * MUST BE last in hash buckets and checking protocol handlers | 382 | * MUST BE last in hash buckets and checking protocol handlers |
383 | * MUST start from promiscuous ptype_all chain in net_bh. | 383 | * MUST start from promiscuous ptype_all chain in net_bh. |
384 | * It is true now, do not change it. | 384 | * It is true now, do not change it. |
385 | * Explanation follows: if protocol handler, mangling packet, will | 385 | * Explanation follows: if protocol handler, mangling packet, will |
386 | * be the first on list, it is not able to sense, that packet | 386 | * be the first on list, it is not able to sense, that packet |
387 | * is cloned and should be copied-on-write, so that it will | 387 | * is cloned and should be copied-on-write, so that it will |
388 | * change it and subsequent readers will get broken packet. | 388 | * change it and subsequent readers will get broken packet. |
389 | * --ANK (980803) | 389 | * --ANK (980803) |
390 | */ | 390 | */ |
391 | 391 | ||
392 | static inline struct list_head *ptype_head(const struct packet_type *pt) | 392 | static inline struct list_head *ptype_head(const struct packet_type *pt) |
393 | { | 393 | { |
394 | if (pt->type == htons(ETH_P_ALL)) | 394 | if (pt->type == htons(ETH_P_ALL)) |
395 | return &ptype_all; | 395 | return &ptype_all; |
396 | else | 396 | else |
397 | return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; | 397 | return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; |
398 | } | 398 | } |
399 | 399 | ||
400 | /** | 400 | /** |
401 | * dev_add_pack - add packet handler | 401 | * dev_add_pack - add packet handler |
402 | * @pt: packet type declaration | 402 | * @pt: packet type declaration |
403 | * | 403 | * |
404 | * Add a protocol handler to the networking stack. The passed &packet_type | 404 | * Add a protocol handler to the networking stack. The passed &packet_type |
405 | * is linked into kernel lists and may not be freed until it has been | 405 | * is linked into kernel lists and may not be freed until it has been |
406 | * removed from the kernel lists. | 406 | * removed from the kernel lists. |
407 | * | 407 | * |
408 | * This call does not sleep therefore it can not | 408 | * This call does not sleep therefore it can not |
409 | * guarantee all CPU's that are in middle of receiving packets | 409 | * guarantee all CPU's that are in middle of receiving packets |
410 | * will see the new packet type (until the next received packet). | 410 | * will see the new packet type (until the next received packet). |
411 | */ | 411 | */ |
412 | 412 | ||
413 | void dev_add_pack(struct packet_type *pt) | 413 | void dev_add_pack(struct packet_type *pt) |
414 | { | 414 | { |
415 | struct list_head *head = ptype_head(pt); | 415 | struct list_head *head = ptype_head(pt); |
416 | 416 | ||
417 | spin_lock(&ptype_lock); | 417 | spin_lock(&ptype_lock); |
418 | list_add_rcu(&pt->list, head); | 418 | list_add_rcu(&pt->list, head); |
419 | spin_unlock(&ptype_lock); | 419 | spin_unlock(&ptype_lock); |
420 | } | 420 | } |
421 | EXPORT_SYMBOL(dev_add_pack); | 421 | EXPORT_SYMBOL(dev_add_pack); |
422 | 422 | ||
423 | /** | 423 | /** |
424 | * __dev_remove_pack - remove packet handler | 424 | * __dev_remove_pack - remove packet handler |
425 | * @pt: packet type declaration | 425 | * @pt: packet type declaration |
426 | * | 426 | * |
427 | * Remove a protocol handler that was previously added to the kernel | 427 | * Remove a protocol handler that was previously added to the kernel |
428 | * protocol handlers by dev_add_pack(). The passed &packet_type is removed | 428 | * protocol handlers by dev_add_pack(). The passed &packet_type is removed |
429 | * from the kernel lists and can be freed or reused once this function | 429 | * from the kernel lists and can be freed or reused once this function |
430 | * returns. | 430 | * returns. |
431 | * | 431 | * |
432 | * The packet type might still be in use by receivers | 432 | * The packet type might still be in use by receivers |
433 | * and must not be freed until after all the CPU's have gone | 433 | * and must not be freed until after all the CPU's have gone |
434 | * through a quiescent state. | 434 | * through a quiescent state. |
435 | */ | 435 | */ |
436 | void __dev_remove_pack(struct packet_type *pt) | 436 | void __dev_remove_pack(struct packet_type *pt) |
437 | { | 437 | { |
438 | struct list_head *head = ptype_head(pt); | 438 | struct list_head *head = ptype_head(pt); |
439 | struct packet_type *pt1; | 439 | struct packet_type *pt1; |
440 | 440 | ||
441 | spin_lock(&ptype_lock); | 441 | spin_lock(&ptype_lock); |
442 | 442 | ||
443 | list_for_each_entry(pt1, head, list) { | 443 | list_for_each_entry(pt1, head, list) { |
444 | if (pt == pt1) { | 444 | if (pt == pt1) { |
445 | list_del_rcu(&pt->list); | 445 | list_del_rcu(&pt->list); |
446 | goto out; | 446 | goto out; |
447 | } | 447 | } |
448 | } | 448 | } |
449 | 449 | ||
450 | pr_warn("dev_remove_pack: %p not found\n", pt); | 450 | pr_warn("dev_remove_pack: %p not found\n", pt); |
451 | out: | 451 | out: |
452 | spin_unlock(&ptype_lock); | 452 | spin_unlock(&ptype_lock); |
453 | } | 453 | } |
454 | EXPORT_SYMBOL(__dev_remove_pack); | 454 | EXPORT_SYMBOL(__dev_remove_pack); |
455 | 455 | ||
456 | /** | 456 | /** |
457 | * dev_remove_pack - remove packet handler | 457 | * dev_remove_pack - remove packet handler |
458 | * @pt: packet type declaration | 458 | * @pt: packet type declaration |
459 | * | 459 | * |
460 | * Remove a protocol handler that was previously added to the kernel | 460 | * Remove a protocol handler that was previously added to the kernel |
461 | * protocol handlers by dev_add_pack(). The passed &packet_type is removed | 461 | * protocol handlers by dev_add_pack(). The passed &packet_type is removed |
462 | * from the kernel lists and can be freed or reused once this function | 462 | * from the kernel lists and can be freed or reused once this function |
463 | * returns. | 463 | * returns. |
464 | * | 464 | * |
465 | * This call sleeps to guarantee that no CPU is looking at the packet | 465 | * This call sleeps to guarantee that no CPU is looking at the packet |
466 | * type after return. | 466 | * type after return. |
467 | */ | 467 | */ |
468 | void dev_remove_pack(struct packet_type *pt) | 468 | void dev_remove_pack(struct packet_type *pt) |
469 | { | 469 | { |
470 | __dev_remove_pack(pt); | 470 | __dev_remove_pack(pt); |
471 | 471 | ||
472 | synchronize_net(); | 472 | synchronize_net(); |
473 | } | 473 | } |
474 | EXPORT_SYMBOL(dev_remove_pack); | 474 | EXPORT_SYMBOL(dev_remove_pack); |
475 | 475 | ||
476 | 476 | ||
477 | /** | 477 | /** |
478 | * dev_add_offload - register offload handlers | 478 | * dev_add_offload - register offload handlers |
479 | * @po: protocol offload declaration | 479 | * @po: protocol offload declaration |
480 | * | 480 | * |
481 | * Add protocol offload handlers to the networking stack. The passed | 481 | * Add protocol offload handlers to the networking stack. The passed |
482 | * &proto_offload is linked into kernel lists and may not be freed until | 482 | * &proto_offload is linked into kernel lists and may not be freed until |
483 | * it has been removed from the kernel lists. | 483 | * it has been removed from the kernel lists. |
484 | * | 484 | * |
485 | * This call does not sleep therefore it can not | 485 | * This call does not sleep therefore it can not |
486 | * guarantee all CPU's that are in middle of receiving packets | 486 | * guarantee all CPU's that are in middle of receiving packets |
487 | * will see the new offload handlers (until the next received packet). | 487 | * will see the new offload handlers (until the next received packet). |
488 | */ | 488 | */ |
489 | void dev_add_offload(struct packet_offload *po) | 489 | void dev_add_offload(struct packet_offload *po) |
490 | { | 490 | { |
491 | struct list_head *head = &offload_base; | 491 | struct list_head *head = &offload_base; |
492 | 492 | ||
493 | spin_lock(&offload_lock); | 493 | spin_lock(&offload_lock); |
494 | list_add_rcu(&po->list, head); | 494 | list_add_rcu(&po->list, head); |
495 | spin_unlock(&offload_lock); | 495 | spin_unlock(&offload_lock); |
496 | } | 496 | } |
497 | EXPORT_SYMBOL(dev_add_offload); | 497 | EXPORT_SYMBOL(dev_add_offload); |
498 | 498 | ||
499 | /** | 499 | /** |
500 | * __dev_remove_offload - remove offload handler | 500 | * __dev_remove_offload - remove offload handler |
501 | * @po: packet offload declaration | 501 | * @po: packet offload declaration |
502 | * | 502 | * |
503 | * Remove a protocol offload handler that was previously added to the | 503 | * Remove a protocol offload handler that was previously added to the |
504 | * kernel offload handlers by dev_add_offload(). The passed &offload_type | 504 | * kernel offload handlers by dev_add_offload(). The passed &offload_type |
505 | * is removed from the kernel lists and can be freed or reused once this | 505 | * is removed from the kernel lists and can be freed or reused once this |
506 | * function returns. | 506 | * function returns. |
507 | * | 507 | * |
508 | * The packet type might still be in use by receivers | 508 | * The packet type might still be in use by receivers |
509 | * and must not be freed until after all the CPU's have gone | 509 | * and must not be freed until after all the CPU's have gone |
510 | * through a quiescent state. | 510 | * through a quiescent state. |
511 | */ | 511 | */ |
512 | void __dev_remove_offload(struct packet_offload *po) | 512 | void __dev_remove_offload(struct packet_offload *po) |
513 | { | 513 | { |
514 | struct list_head *head = &offload_base; | 514 | struct list_head *head = &offload_base; |
515 | struct packet_offload *po1; | 515 | struct packet_offload *po1; |
516 | 516 | ||
517 | spin_lock(&offload_lock); | 517 | spin_lock(&offload_lock); |
518 | 518 | ||
519 | list_for_each_entry(po1, head, list) { | 519 | list_for_each_entry(po1, head, list) { |
520 | if (po == po1) { | 520 | if (po == po1) { |
521 | list_del_rcu(&po->list); | 521 | list_del_rcu(&po->list); |
522 | goto out; | 522 | goto out; |
523 | } | 523 | } |
524 | } | 524 | } |
525 | 525 | ||
526 | pr_warn("dev_remove_offload: %p not found\n", po); | 526 | pr_warn("dev_remove_offload: %p not found\n", po); |
527 | out: | 527 | out: |
528 | spin_unlock(&offload_lock); | 528 | spin_unlock(&offload_lock); |
529 | } | 529 | } |
530 | EXPORT_SYMBOL(__dev_remove_offload); | 530 | EXPORT_SYMBOL(__dev_remove_offload); |
531 | 531 | ||
532 | /** | 532 | /** |
533 | * dev_remove_offload - remove packet offload handler | 533 | * dev_remove_offload - remove packet offload handler |
534 | * @po: packet offload declaration | 534 | * @po: packet offload declaration |
535 | * | 535 | * |
536 | * Remove a packet offload handler that was previously added to the kernel | 536 | * Remove a packet offload handler that was previously added to the kernel |
537 | * offload handlers by dev_add_offload(). The passed &offload_type is | 537 | * offload handlers by dev_add_offload(). The passed &offload_type is |
538 | * removed from the kernel lists and can be freed or reused once this | 538 | * removed from the kernel lists and can be freed or reused once this |
539 | * function returns. | 539 | * function returns. |
540 | * | 540 | * |
541 | * This call sleeps to guarantee that no CPU is looking at the packet | 541 | * This call sleeps to guarantee that no CPU is looking at the packet |
542 | * type after return. | 542 | * type after return. |
543 | */ | 543 | */ |
544 | void dev_remove_offload(struct packet_offload *po) | 544 | void dev_remove_offload(struct packet_offload *po) |
545 | { | 545 | { |
546 | __dev_remove_offload(po); | 546 | __dev_remove_offload(po); |
547 | 547 | ||
548 | synchronize_net(); | 548 | synchronize_net(); |
549 | } | 549 | } |
550 | EXPORT_SYMBOL(dev_remove_offload); | 550 | EXPORT_SYMBOL(dev_remove_offload); |
551 | 551 | ||
552 | /****************************************************************************** | 552 | /****************************************************************************** |
553 | 553 | ||
554 | Device Boot-time Settings Routines | 554 | Device Boot-time Settings Routines |
555 | 555 | ||
556 | *******************************************************************************/ | 556 | *******************************************************************************/ |
557 | 557 | ||
558 | /* Boot time configuration table */ | 558 | /* Boot time configuration table */ |
559 | static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; | 559 | static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; |
560 | 560 | ||
561 | /** | 561 | /** |
562 | * netdev_boot_setup_add - add new setup entry | 562 | * netdev_boot_setup_add - add new setup entry |
563 | * @name: name of the device | 563 | * @name: name of the device |
564 | * @map: configured settings for the device | 564 | * @map: configured settings for the device |
565 | * | 565 | * |
566 | * Adds new setup entry to the dev_boot_setup list. The function | 566 | * Adds new setup entry to the dev_boot_setup list. The function |
567 | * returns 0 on error and 1 on success. This is a generic routine to | 567 | * returns 0 on error and 1 on success. This is a generic routine to |
568 | * all netdevices. | 568 | * all netdevices. |
569 | */ | 569 | */ |
570 | static int netdev_boot_setup_add(char *name, struct ifmap *map) | 570 | static int netdev_boot_setup_add(char *name, struct ifmap *map) |
571 | { | 571 | { |
572 | struct netdev_boot_setup *s; | 572 | struct netdev_boot_setup *s; |
573 | int i; | 573 | int i; |
574 | 574 | ||
575 | s = dev_boot_setup; | 575 | s = dev_boot_setup; |
576 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { | 576 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { |
577 | if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { | 577 | if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { |
578 | memset(s[i].name, 0, sizeof(s[i].name)); | 578 | memset(s[i].name, 0, sizeof(s[i].name)); |
579 | strlcpy(s[i].name, name, IFNAMSIZ); | 579 | strlcpy(s[i].name, name, IFNAMSIZ); |
580 | memcpy(&s[i].map, map, sizeof(s[i].map)); | 580 | memcpy(&s[i].map, map, sizeof(s[i].map)); |
581 | break; | 581 | break; |
582 | } | 582 | } |
583 | } | 583 | } |
584 | 584 | ||
585 | return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; | 585 | return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; |
586 | } | 586 | } |
587 | 587 | ||
588 | /** | 588 | /** |
589 | * netdev_boot_setup_check - check boot time settings | 589 | * netdev_boot_setup_check - check boot time settings |
590 | * @dev: the netdevice | 590 | * @dev: the netdevice |
591 | * | 591 | * |
592 | * Check boot time settings for the device. | 592 | * Check boot time settings for the device. |
593 | * The found settings are set for the device to be used | 593 | * The found settings are set for the device to be used |
594 | * later in the device probing. | 594 | * later in the device probing. |
595 | * Returns 0 if no settings found, 1 if they are. | 595 | * Returns 0 if no settings found, 1 if they are. |
596 | */ | 596 | */ |
597 | int netdev_boot_setup_check(struct net_device *dev) | 597 | int netdev_boot_setup_check(struct net_device *dev) |
598 | { | 598 | { |
599 | struct netdev_boot_setup *s = dev_boot_setup; | 599 | struct netdev_boot_setup *s = dev_boot_setup; |
600 | int i; | 600 | int i; |
601 | 601 | ||
602 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { | 602 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { |
603 | if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && | 603 | if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && |
604 | !strcmp(dev->name, s[i].name)) { | 604 | !strcmp(dev->name, s[i].name)) { |
605 | dev->irq = s[i].map.irq; | 605 | dev->irq = s[i].map.irq; |
606 | dev->base_addr = s[i].map.base_addr; | 606 | dev->base_addr = s[i].map.base_addr; |
607 | dev->mem_start = s[i].map.mem_start; | 607 | dev->mem_start = s[i].map.mem_start; |
608 | dev->mem_end = s[i].map.mem_end; | 608 | dev->mem_end = s[i].map.mem_end; |
609 | return 1; | 609 | return 1; |
610 | } | 610 | } |
611 | } | 611 | } |
612 | return 0; | 612 | return 0; |
613 | } | 613 | } |
614 | EXPORT_SYMBOL(netdev_boot_setup_check); | 614 | EXPORT_SYMBOL(netdev_boot_setup_check); |
615 | 615 | ||
616 | 616 | ||
617 | /** | 617 | /** |
618 | * netdev_boot_base - get address from boot time settings | 618 | * netdev_boot_base - get address from boot time settings |
619 | * @prefix: prefix for network device | 619 | * @prefix: prefix for network device |
620 | * @unit: id for network device | 620 | * @unit: id for network device |
621 | * | 621 | * |
622 | * Check boot time settings for the base address of device. | 622 | * Check boot time settings for the base address of device. |
623 | * The found settings are set for the device to be used | 623 | * The found settings are set for the device to be used |
624 | * later in the device probing. | 624 | * later in the device probing. |
625 | * Returns 0 if no settings found. | 625 | * Returns 0 if no settings found. |
626 | */ | 626 | */ |
627 | unsigned long netdev_boot_base(const char *prefix, int unit) | 627 | unsigned long netdev_boot_base(const char *prefix, int unit) |
628 | { | 628 | { |
629 | const struct netdev_boot_setup *s = dev_boot_setup; | 629 | const struct netdev_boot_setup *s = dev_boot_setup; |
630 | char name[IFNAMSIZ]; | 630 | char name[IFNAMSIZ]; |
631 | int i; | 631 | int i; |
632 | 632 | ||
633 | sprintf(name, "%s%d", prefix, unit); | 633 | sprintf(name, "%s%d", prefix, unit); |
634 | 634 | ||
635 | /* | 635 | /* |
636 | * If device already registered then return base of 1 | 636 | * If device already registered then return base of 1 |
637 | * to indicate not to probe for this interface | 637 | * to indicate not to probe for this interface |
638 | */ | 638 | */ |
639 | if (__dev_get_by_name(&init_net, name)) | 639 | if (__dev_get_by_name(&init_net, name)) |
640 | return 1; | 640 | return 1; |
641 | 641 | ||
642 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) | 642 | for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) |
643 | if (!strcmp(name, s[i].name)) | 643 | if (!strcmp(name, s[i].name)) |
644 | return s[i].map.base_addr; | 644 | return s[i].map.base_addr; |
645 | return 0; | 645 | return 0; |
646 | } | 646 | } |
647 | 647 | ||
648 | /* | 648 | /* |
649 | * Saves at boot time configured settings for any netdevice. | 649 | * Saves at boot time configured settings for any netdevice. |
650 | */ | 650 | */ |
651 | int __init netdev_boot_setup(char *str) | 651 | int __init netdev_boot_setup(char *str) |
652 | { | 652 | { |
653 | int ints[5]; | 653 | int ints[5]; |
654 | struct ifmap map; | 654 | struct ifmap map; |
655 | 655 | ||
656 | str = get_options(str, ARRAY_SIZE(ints), ints); | 656 | str = get_options(str, ARRAY_SIZE(ints), ints); |
657 | if (!str || !*str) | 657 | if (!str || !*str) |
658 | return 0; | 658 | return 0; |
659 | 659 | ||
660 | /* Save settings */ | 660 | /* Save settings */ |
661 | memset(&map, 0, sizeof(map)); | 661 | memset(&map, 0, sizeof(map)); |
662 | if (ints[0] > 0) | 662 | if (ints[0] > 0) |
663 | map.irq = ints[1]; | 663 | map.irq = ints[1]; |
664 | if (ints[0] > 1) | 664 | if (ints[0] > 1) |
665 | map.base_addr = ints[2]; | 665 | map.base_addr = ints[2]; |
666 | if (ints[0] > 2) | 666 | if (ints[0] > 2) |
667 | map.mem_start = ints[3]; | 667 | map.mem_start = ints[3]; |
668 | if (ints[0] > 3) | 668 | if (ints[0] > 3) |
669 | map.mem_end = ints[4]; | 669 | map.mem_end = ints[4]; |
670 | 670 | ||
671 | /* Add new entry to the list */ | 671 | /* Add new entry to the list */ |
672 | return netdev_boot_setup_add(str, &map); | 672 | return netdev_boot_setup_add(str, &map); |
673 | } | 673 | } |
674 | 674 | ||
675 | __setup("netdev=", netdev_boot_setup); | 675 | __setup("netdev=", netdev_boot_setup); |
676 | 676 | ||
677 | /******************************************************************************* | 677 | /******************************************************************************* |
678 | 678 | ||
679 | Device Interface Subroutines | 679 | Device Interface Subroutines |
680 | 680 | ||
681 | *******************************************************************************/ | 681 | *******************************************************************************/ |
682 | 682 | ||
683 | /** | 683 | /** |
684 | * __dev_get_by_name - find a device by its name | 684 | * __dev_get_by_name - find a device by its name |
685 | * @net: the applicable net namespace | 685 | * @net: the applicable net namespace |
686 | * @name: name to find | 686 | * @name: name to find |
687 | * | 687 | * |
688 | * Find an interface by name. Must be called under RTNL semaphore | 688 | * Find an interface by name. Must be called under RTNL semaphore |
689 | * or @dev_base_lock. If the name is found a pointer to the device | 689 | * or @dev_base_lock. If the name is found a pointer to the device |
690 | * is returned. If the name is not found then %NULL is returned. The | 690 | * is returned. If the name is not found then %NULL is returned. The |
691 | * reference counters are not incremented so the caller must be | 691 | * reference counters are not incremented so the caller must be |
692 | * careful with locks. | 692 | * careful with locks. |
693 | */ | 693 | */ |
694 | 694 | ||
695 | struct net_device *__dev_get_by_name(struct net *net, const char *name) | 695 | struct net_device *__dev_get_by_name(struct net *net, const char *name) |
696 | { | 696 | { |
697 | struct hlist_node *p; | 697 | struct hlist_node *p; |
698 | struct net_device *dev; | 698 | struct net_device *dev; |
699 | struct hlist_head *head = dev_name_hash(net, name); | 699 | struct hlist_head *head = dev_name_hash(net, name); |
700 | 700 | ||
701 | hlist_for_each_entry(dev, p, head, name_hlist) | 701 | hlist_for_each_entry(dev, p, head, name_hlist) |
702 | if (!strncmp(dev->name, name, IFNAMSIZ)) | 702 | if (!strncmp(dev->name, name, IFNAMSIZ)) |
703 | return dev; | 703 | return dev; |
704 | 704 | ||
705 | return NULL; | 705 | return NULL; |
706 | } | 706 | } |
707 | EXPORT_SYMBOL(__dev_get_by_name); | 707 | EXPORT_SYMBOL(__dev_get_by_name); |
708 | 708 | ||
709 | /** | 709 | /** |
710 | * dev_get_by_name_rcu - find a device by its name | 710 | * dev_get_by_name_rcu - find a device by its name |
711 | * @net: the applicable net namespace | 711 | * @net: the applicable net namespace |
712 | * @name: name to find | 712 | * @name: name to find |
713 | * | 713 | * |
714 | * Find an interface by name. | 714 | * Find an interface by name. |
715 | * If the name is found a pointer to the device is returned. | 715 | * If the name is found a pointer to the device is returned. |
716 | * If the name is not found then %NULL is returned. | 716 | * If the name is not found then %NULL is returned. |
717 | * The reference counters are not incremented so the caller must be | 717 | * The reference counters are not incremented so the caller must be |
718 | * careful with locks. The caller must hold RCU lock. | 718 | * careful with locks. The caller must hold RCU lock. |
719 | */ | 719 | */ |
720 | 720 | ||
721 | struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) | 721 | struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) |
722 | { | 722 | { |
723 | struct hlist_node *p; | 723 | struct hlist_node *p; |
724 | struct net_device *dev; | 724 | struct net_device *dev; |
725 | struct hlist_head *head = dev_name_hash(net, name); | 725 | struct hlist_head *head = dev_name_hash(net, name); |
726 | 726 | ||
727 | hlist_for_each_entry_rcu(dev, p, head, name_hlist) | 727 | hlist_for_each_entry_rcu(dev, p, head, name_hlist) |
728 | if (!strncmp(dev->name, name, IFNAMSIZ)) | 728 | if (!strncmp(dev->name, name, IFNAMSIZ)) |
729 | return dev; | 729 | return dev; |
730 | 730 | ||
731 | return NULL; | 731 | return NULL; |
732 | } | 732 | } |
733 | EXPORT_SYMBOL(dev_get_by_name_rcu); | 733 | EXPORT_SYMBOL(dev_get_by_name_rcu); |
734 | 734 | ||
735 | /** | 735 | /** |
736 | * dev_get_by_name - find a device by its name | 736 | * dev_get_by_name - find a device by its name |
737 | * @net: the applicable net namespace | 737 | * @net: the applicable net namespace |
738 | * @name: name to find | 738 | * @name: name to find |
739 | * | 739 | * |
740 | * Find an interface by name. This can be called from any | 740 | * Find an interface by name. This can be called from any |
741 | * context and does its own locking. The returned handle has | 741 | * context and does its own locking. The returned handle has |
742 | * the usage count incremented and the caller must use dev_put() to | 742 | * the usage count incremented and the caller must use dev_put() to |
743 | * release it when it is no longer needed. %NULL is returned if no | 743 | * release it when it is no longer needed. %NULL is returned if no |
744 | * matching device is found. | 744 | * matching device is found. |
745 | */ | 745 | */ |
746 | 746 | ||
747 | struct net_device *dev_get_by_name(struct net *net, const char *name) | 747 | struct net_device *dev_get_by_name(struct net *net, const char *name) |
748 | { | 748 | { |
749 | struct net_device *dev; | 749 | struct net_device *dev; |
750 | 750 | ||
751 | rcu_read_lock(); | 751 | rcu_read_lock(); |
752 | dev = dev_get_by_name_rcu(net, name); | 752 | dev = dev_get_by_name_rcu(net, name); |
753 | if (dev) | 753 | if (dev) |
754 | dev_hold(dev); | 754 | dev_hold(dev); |
755 | rcu_read_unlock(); | 755 | rcu_read_unlock(); |
756 | return dev; | 756 | return dev; |
757 | } | 757 | } |
758 | EXPORT_SYMBOL(dev_get_by_name); | 758 | EXPORT_SYMBOL(dev_get_by_name); |
759 | 759 | ||
760 | /** | 760 | /** |
761 | * __dev_get_by_index - find a device by its ifindex | 761 | * __dev_get_by_index - find a device by its ifindex |
762 | * @net: the applicable net namespace | 762 | * @net: the applicable net namespace |
763 | * @ifindex: index of device | 763 | * @ifindex: index of device |
764 | * | 764 | * |
765 | * Search for an interface by index. Returns %NULL if the device | 765 | * Search for an interface by index. Returns %NULL if the device |
766 | * is not found or a pointer to the device. The device has not | 766 | * is not found or a pointer to the device. The device has not |
767 | * had its reference counter increased so the caller must be careful | 767 | * had its reference counter increased so the caller must be careful |
768 | * about locking. The caller must hold either the RTNL semaphore | 768 | * about locking. The caller must hold either the RTNL semaphore |
769 | * or @dev_base_lock. | 769 | * or @dev_base_lock. |
770 | */ | 770 | */ |
771 | 771 | ||
772 | struct net_device *__dev_get_by_index(struct net *net, int ifindex) | 772 | struct net_device *__dev_get_by_index(struct net *net, int ifindex) |
773 | { | 773 | { |
774 | struct hlist_node *p; | 774 | struct hlist_node *p; |
775 | struct net_device *dev; | 775 | struct net_device *dev; |
776 | struct hlist_head *head = dev_index_hash(net, ifindex); | 776 | struct hlist_head *head = dev_index_hash(net, ifindex); |
777 | 777 | ||
778 | hlist_for_each_entry(dev, p, head, index_hlist) | 778 | hlist_for_each_entry(dev, p, head, index_hlist) |
779 | if (dev->ifindex == ifindex) | 779 | if (dev->ifindex == ifindex) |
780 | return dev; | 780 | return dev; |
781 | 781 | ||
782 | return NULL; | 782 | return NULL; |
783 | } | 783 | } |
784 | EXPORT_SYMBOL(__dev_get_by_index); | 784 | EXPORT_SYMBOL(__dev_get_by_index); |
785 | 785 | ||
786 | /** | 786 | /** |
787 | * dev_get_by_index_rcu - find a device by its ifindex | 787 | * dev_get_by_index_rcu - find a device by its ifindex |
788 | * @net: the applicable net namespace | 788 | * @net: the applicable net namespace |
789 | * @ifindex: index of device | 789 | * @ifindex: index of device |
790 | * | 790 | * |
791 | * Search for an interface by index. Returns %NULL if the device | 791 | * Search for an interface by index. Returns %NULL if the device |
792 | * is not found or a pointer to the device. The device has not | 792 | * is not found or a pointer to the device. The device has not |
793 | * had its reference counter increased so the caller must be careful | 793 | * had its reference counter increased so the caller must be careful |
794 | * about locking. The caller must hold RCU lock. | 794 | * about locking. The caller must hold RCU lock. |
795 | */ | 795 | */ |
796 | 796 | ||
797 | struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) | 797 | struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) |
798 | { | 798 | { |
799 | struct hlist_node *p; | 799 | struct hlist_node *p; |
800 | struct net_device *dev; | 800 | struct net_device *dev; |
801 | struct hlist_head *head = dev_index_hash(net, ifindex); | 801 | struct hlist_head *head = dev_index_hash(net, ifindex); |
802 | 802 | ||
803 | hlist_for_each_entry_rcu(dev, p, head, index_hlist) | 803 | hlist_for_each_entry_rcu(dev, p, head, index_hlist) |
804 | if (dev->ifindex == ifindex) | 804 | if (dev->ifindex == ifindex) |
805 | return dev; | 805 | return dev; |
806 | 806 | ||
807 | return NULL; | 807 | return NULL; |
808 | } | 808 | } |
809 | EXPORT_SYMBOL(dev_get_by_index_rcu); | 809 | EXPORT_SYMBOL(dev_get_by_index_rcu); |
810 | 810 | ||
811 | 811 | ||
812 | /** | 812 | /** |
813 | * dev_get_by_index - find a device by its ifindex | 813 | * dev_get_by_index - find a device by its ifindex |
814 | * @net: the applicable net namespace | 814 | * @net: the applicable net namespace |
815 | * @ifindex: index of device | 815 | * @ifindex: index of device |
816 | * | 816 | * |
817 | * Search for an interface by index. Returns NULL if the device | 817 | * Search for an interface by index. Returns NULL if the device |
818 | * is not found or a pointer to the device. The device returned has | 818 | * is not found or a pointer to the device. The device returned has |
819 | * had a reference added and the pointer is safe until the user calls | 819 | * had a reference added and the pointer is safe until the user calls |
820 | * dev_put to indicate they have finished with it. | 820 | * dev_put to indicate they have finished with it. |
821 | */ | 821 | */ |
822 | 822 | ||
823 | struct net_device *dev_get_by_index(struct net *net, int ifindex) | 823 | struct net_device *dev_get_by_index(struct net *net, int ifindex) |
824 | { | 824 | { |
825 | struct net_device *dev; | 825 | struct net_device *dev; |
826 | 826 | ||
827 | rcu_read_lock(); | 827 | rcu_read_lock(); |
828 | dev = dev_get_by_index_rcu(net, ifindex); | 828 | dev = dev_get_by_index_rcu(net, ifindex); |
829 | if (dev) | 829 | if (dev) |
830 | dev_hold(dev); | 830 | dev_hold(dev); |
831 | rcu_read_unlock(); | 831 | rcu_read_unlock(); |
832 | return dev; | 832 | return dev; |
833 | } | 833 | } |
834 | EXPORT_SYMBOL(dev_get_by_index); | 834 | EXPORT_SYMBOL(dev_get_by_index); |
835 | 835 | ||
836 | /** | 836 | /** |
837 | * dev_getbyhwaddr_rcu - find a device by its hardware address | 837 | * dev_getbyhwaddr_rcu - find a device by its hardware address |
838 | * @net: the applicable net namespace | 838 | * @net: the applicable net namespace |
839 | * @type: media type of device | 839 | * @type: media type of device |
840 | * @ha: hardware address | 840 | * @ha: hardware address |
841 | * | 841 | * |
842 | * Search for an interface by MAC address. Returns NULL if the device | 842 | * Search for an interface by MAC address. Returns NULL if the device |
843 | * is not found or a pointer to the device. | 843 | * is not found or a pointer to the device. |
844 | * The caller must hold RCU or RTNL. | 844 | * The caller must hold RCU or RTNL. |
845 | * The returned device has not had its ref count increased | 845 | * The returned device has not had its ref count increased |
846 | * and the caller must therefore be careful about locking | 846 | * and the caller must therefore be careful about locking |
847 | * | 847 | * |
848 | */ | 848 | */ |
849 | 849 | ||
850 | struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, | 850 | struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, |
851 | const char *ha) | 851 | const char *ha) |
852 | { | 852 | { |
853 | struct net_device *dev; | 853 | struct net_device *dev; |
854 | 854 | ||
855 | for_each_netdev_rcu(net, dev) | 855 | for_each_netdev_rcu(net, dev) |
856 | if (dev->type == type && | 856 | if (dev->type == type && |
857 | !memcmp(dev->dev_addr, ha, dev->addr_len)) | 857 | !memcmp(dev->dev_addr, ha, dev->addr_len)) |
858 | return dev; | 858 | return dev; |
859 | 859 | ||
860 | return NULL; | 860 | return NULL; |
861 | } | 861 | } |
862 | EXPORT_SYMBOL(dev_getbyhwaddr_rcu); | 862 | EXPORT_SYMBOL(dev_getbyhwaddr_rcu); |
863 | 863 | ||
864 | struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) | 864 | struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) |
865 | { | 865 | { |
866 | struct net_device *dev; | 866 | struct net_device *dev; |
867 | 867 | ||
868 | ASSERT_RTNL(); | 868 | ASSERT_RTNL(); |
869 | for_each_netdev(net, dev) | 869 | for_each_netdev(net, dev) |
870 | if (dev->type == type) | 870 | if (dev->type == type) |
871 | return dev; | 871 | return dev; |
872 | 872 | ||
873 | return NULL; | 873 | return NULL; |
874 | } | 874 | } |
875 | EXPORT_SYMBOL(__dev_getfirstbyhwtype); | 875 | EXPORT_SYMBOL(__dev_getfirstbyhwtype); |
876 | 876 | ||
877 | struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) | 877 | struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) |
878 | { | 878 | { |
879 | struct net_device *dev, *ret = NULL; | 879 | struct net_device *dev, *ret = NULL; |
880 | 880 | ||
881 | rcu_read_lock(); | 881 | rcu_read_lock(); |
882 | for_each_netdev_rcu(net, dev) | 882 | for_each_netdev_rcu(net, dev) |
883 | if (dev->type == type) { | 883 | if (dev->type == type) { |
884 | dev_hold(dev); | 884 | dev_hold(dev); |
885 | ret = dev; | 885 | ret = dev; |
886 | break; | 886 | break; |
887 | } | 887 | } |
888 | rcu_read_unlock(); | 888 | rcu_read_unlock(); |
889 | return ret; | 889 | return ret; |
890 | } | 890 | } |
891 | EXPORT_SYMBOL(dev_getfirstbyhwtype); | 891 | EXPORT_SYMBOL(dev_getfirstbyhwtype); |
892 | 892 | ||
893 | /** | 893 | /** |
894 | * dev_get_by_flags_rcu - find any device with given flags | 894 | * dev_get_by_flags_rcu - find any device with given flags |
895 | * @net: the applicable net namespace | 895 | * @net: the applicable net namespace |
896 | * @if_flags: IFF_* values | 896 | * @if_flags: IFF_* values |
897 | * @mask: bitmask of bits in if_flags to check | 897 | * @mask: bitmask of bits in if_flags to check |
898 | * | 898 | * |
899 | * Search for any interface with the given flags. Returns NULL if a device | 899 | * Search for any interface with the given flags. Returns NULL if a device |
900 | * is not found or a pointer to the device. Must be called inside | 900 | * is not found or a pointer to the device. Must be called inside |
901 | * rcu_read_lock(), and result refcount is unchanged. | 901 | * rcu_read_lock(), and result refcount is unchanged. |
902 | */ | 902 | */ |
903 | 903 | ||
904 | struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags, | 904 | struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags, |
905 | unsigned short mask) | 905 | unsigned short mask) |
906 | { | 906 | { |
907 | struct net_device *dev, *ret; | 907 | struct net_device *dev, *ret; |
908 | 908 | ||
909 | ret = NULL; | 909 | ret = NULL; |
910 | for_each_netdev_rcu(net, dev) { | 910 | for_each_netdev_rcu(net, dev) { |
911 | if (((dev->flags ^ if_flags) & mask) == 0) { | 911 | if (((dev->flags ^ if_flags) & mask) == 0) { |
912 | ret = dev; | 912 | ret = dev; |
913 | break; | 913 | break; |
914 | } | 914 | } |
915 | } | 915 | } |
916 | return ret; | 916 | return ret; |
917 | } | 917 | } |
918 | EXPORT_SYMBOL(dev_get_by_flags_rcu); | 918 | EXPORT_SYMBOL(dev_get_by_flags_rcu); |
919 | 919 | ||
920 | /** | 920 | /** |
921 | * dev_valid_name - check if name is okay for network device | 921 | * dev_valid_name - check if name is okay for network device |
922 | * @name: name string | 922 | * @name: name string |
923 | * | 923 | * |
924 | * Network device names need to be valid file names to | 924 | * Network device names need to be valid file names to |
925 | * to allow sysfs to work. We also disallow any kind of | 925 | * to allow sysfs to work. We also disallow any kind of |
926 | * whitespace. | 926 | * whitespace. |
927 | */ | 927 | */ |
928 | bool dev_valid_name(const char *name) | 928 | bool dev_valid_name(const char *name) |
929 | { | 929 | { |
930 | if (*name == '\0') | 930 | if (*name == '\0') |
931 | return false; | 931 | return false; |
932 | if (strlen(name) >= IFNAMSIZ) | 932 | if (strlen(name) >= IFNAMSIZ) |
933 | return false; | 933 | return false; |
934 | if (!strcmp(name, ".") || !strcmp(name, "..")) | 934 | if (!strcmp(name, ".") || !strcmp(name, "..")) |
935 | return false; | 935 | return false; |
936 | 936 | ||
937 | while (*name) { | 937 | while (*name) { |
938 | if (*name == '/' || isspace(*name)) | 938 | if (*name == '/' || isspace(*name)) |
939 | return false; | 939 | return false; |
940 | name++; | 940 | name++; |
941 | } | 941 | } |
942 | return true; | 942 | return true; |
943 | } | 943 | } |
944 | EXPORT_SYMBOL(dev_valid_name); | 944 | EXPORT_SYMBOL(dev_valid_name); |
945 | 945 | ||
946 | /** | 946 | /** |
947 | * __dev_alloc_name - allocate a name for a device | 947 | * __dev_alloc_name - allocate a name for a device |
948 | * @net: network namespace to allocate the device name in | 948 | * @net: network namespace to allocate the device name in |
949 | * @name: name format string | 949 | * @name: name format string |
950 | * @buf: scratch buffer and result name string | 950 | * @buf: scratch buffer and result name string |
951 | * | 951 | * |
952 | * Passed a format string - eg "lt%d" it will try and find a suitable | 952 | * Passed a format string - eg "lt%d" it will try and find a suitable |
953 | * id. It scans list of devices to build up a free map, then chooses | 953 | * id. It scans list of devices to build up a free map, then chooses |
954 | * the first empty slot. The caller must hold the dev_base or rtnl lock | 954 | * the first empty slot. The caller must hold the dev_base or rtnl lock |
955 | * while allocating the name and adding the device in order to avoid | 955 | * while allocating the name and adding the device in order to avoid |
956 | * duplicates. | 956 | * duplicates. |
957 | * Limited to bits_per_byte * page size devices (ie 32K on most platforms). | 957 | * Limited to bits_per_byte * page size devices (ie 32K on most platforms). |
958 | * Returns the number of the unit assigned or a negative errno code. | 958 | * Returns the number of the unit assigned or a negative errno code. |
959 | */ | 959 | */ |
960 | 960 | ||
961 | static int __dev_alloc_name(struct net *net, const char *name, char *buf) | 961 | static int __dev_alloc_name(struct net *net, const char *name, char *buf) |
962 | { | 962 | { |
963 | int i = 0; | 963 | int i = 0; |
964 | const char *p; | 964 | const char *p; |
965 | const int max_netdevices = 8*PAGE_SIZE; | 965 | const int max_netdevices = 8*PAGE_SIZE; |
966 | unsigned long *inuse; | 966 | unsigned long *inuse; |
967 | struct net_device *d; | 967 | struct net_device *d; |
968 | 968 | ||
969 | p = strnchr(name, IFNAMSIZ-1, '%'); | 969 | p = strnchr(name, IFNAMSIZ-1, '%'); |
970 | if (p) { | 970 | if (p) { |
971 | /* | 971 | /* |
972 | * Verify the string as this thing may have come from | 972 | * Verify the string as this thing may have come from |
973 | * the user. There must be either one "%d" and no other "%" | 973 | * the user. There must be either one "%d" and no other "%" |
974 | * characters. | 974 | * characters. |
975 | */ | 975 | */ |
976 | if (p[1] != 'd' || strchr(p + 2, '%')) | 976 | if (p[1] != 'd' || strchr(p + 2, '%')) |
977 | return -EINVAL; | 977 | return -EINVAL; |
978 | 978 | ||
979 | /* Use one page as a bit array of possible slots */ | 979 | /* Use one page as a bit array of possible slots */ |
980 | inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC); | 980 | inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC); |
981 | if (!inuse) | 981 | if (!inuse) |
982 | return -ENOMEM; | 982 | return -ENOMEM; |
983 | 983 | ||
984 | for_each_netdev(net, d) { | 984 | for_each_netdev(net, d) { |
985 | if (!sscanf(d->name, name, &i)) | 985 | if (!sscanf(d->name, name, &i)) |
986 | continue; | 986 | continue; |
987 | if (i < 0 || i >= max_netdevices) | 987 | if (i < 0 || i >= max_netdevices) |
988 | continue; | 988 | continue; |
989 | 989 | ||
990 | /* avoid cases where sscanf is not exact inverse of printf */ | 990 | /* avoid cases where sscanf is not exact inverse of printf */ |
991 | snprintf(buf, IFNAMSIZ, name, i); | 991 | snprintf(buf, IFNAMSIZ, name, i); |
992 | if (!strncmp(buf, d->name, IFNAMSIZ)) | 992 | if (!strncmp(buf, d->name, IFNAMSIZ)) |
993 | set_bit(i, inuse); | 993 | set_bit(i, inuse); |
994 | } | 994 | } |
995 | 995 | ||
996 | i = find_first_zero_bit(inuse, max_netdevices); | 996 | i = find_first_zero_bit(inuse, max_netdevices); |
997 | free_page((unsigned long) inuse); | 997 | free_page((unsigned long) inuse); |
998 | } | 998 | } |
999 | 999 | ||
1000 | if (buf != name) | 1000 | if (buf != name) |
1001 | snprintf(buf, IFNAMSIZ, name, i); | 1001 | snprintf(buf, IFNAMSIZ, name, i); |
1002 | if (!__dev_get_by_name(net, buf)) | 1002 | if (!__dev_get_by_name(net, buf)) |
1003 | return i; | 1003 | return i; |
1004 | 1004 | ||
1005 | /* It is possible to run out of possible slots | 1005 | /* It is possible to run out of possible slots |
1006 | * when the name is long and there isn't enough space left | 1006 | * when the name is long and there isn't enough space left |
1007 | * for the digits, or if all bits are used. | 1007 | * for the digits, or if all bits are used. |
1008 | */ | 1008 | */ |
1009 | return -ENFILE; | 1009 | return -ENFILE; |
1010 | } | 1010 | } |
1011 | 1011 | ||
1012 | /** | 1012 | /** |
1013 | * dev_alloc_name - allocate a name for a device | 1013 | * dev_alloc_name - allocate a name for a device |
1014 | * @dev: device | 1014 | * @dev: device |
1015 | * @name: name format string | 1015 | * @name: name format string |
1016 | * | 1016 | * |
1017 | * Passed a format string - eg "lt%d" it will try and find a suitable | 1017 | * Passed a format string - eg "lt%d" it will try and find a suitable |
1018 | * id. It scans list of devices to build up a free map, then chooses | 1018 | * id. It scans list of devices to build up a free map, then chooses |
1019 | * the first empty slot. The caller must hold the dev_base or rtnl lock | 1019 | * the first empty slot. The caller must hold the dev_base or rtnl lock |
1020 | * while allocating the name and adding the device in order to avoid | 1020 | * while allocating the name and adding the device in order to avoid |
1021 | * duplicates. | 1021 | * duplicates. |
1022 | * Limited to bits_per_byte * page size devices (ie 32K on most platforms). | 1022 | * Limited to bits_per_byte * page size devices (ie 32K on most platforms). |
1023 | * Returns the number of the unit assigned or a negative errno code. | 1023 | * Returns the number of the unit assigned or a negative errno code. |
1024 | */ | 1024 | */ |
1025 | 1025 | ||
1026 | int dev_alloc_name(struct net_device *dev, const char *name) | 1026 | int dev_alloc_name(struct net_device *dev, const char *name) |
1027 | { | 1027 | { |
1028 | char buf[IFNAMSIZ]; | 1028 | char buf[IFNAMSIZ]; |
1029 | struct net *net; | 1029 | struct net *net; |
1030 | int ret; | 1030 | int ret; |
1031 | 1031 | ||
1032 | BUG_ON(!dev_net(dev)); | 1032 | BUG_ON(!dev_net(dev)); |
1033 | net = dev_net(dev); | 1033 | net = dev_net(dev); |
1034 | ret = __dev_alloc_name(net, name, buf); | 1034 | ret = __dev_alloc_name(net, name, buf); |
1035 | if (ret >= 0) | 1035 | if (ret >= 0) |
1036 | strlcpy(dev->name, buf, IFNAMSIZ); | 1036 | strlcpy(dev->name, buf, IFNAMSIZ); |
1037 | return ret; | 1037 | return ret; |
1038 | } | 1038 | } |
1039 | EXPORT_SYMBOL(dev_alloc_name); | 1039 | EXPORT_SYMBOL(dev_alloc_name); |
1040 | 1040 | ||
1041 | static int dev_alloc_name_ns(struct net *net, | 1041 | static int dev_alloc_name_ns(struct net *net, |
1042 | struct net_device *dev, | 1042 | struct net_device *dev, |
1043 | const char *name) | 1043 | const char *name) |
1044 | { | 1044 | { |
1045 | char buf[IFNAMSIZ]; | 1045 | char buf[IFNAMSIZ]; |
1046 | int ret; | 1046 | int ret; |
1047 | 1047 | ||
1048 | ret = __dev_alloc_name(net, name, buf); | 1048 | ret = __dev_alloc_name(net, name, buf); |
1049 | if (ret >= 0) | 1049 | if (ret >= 0) |
1050 | strlcpy(dev->name, buf, IFNAMSIZ); | 1050 | strlcpy(dev->name, buf, IFNAMSIZ); |
1051 | return ret; | 1051 | return ret; |
1052 | } | 1052 | } |
1053 | 1053 | ||
1054 | static int dev_get_valid_name(struct net *net, | 1054 | static int dev_get_valid_name(struct net *net, |
1055 | struct net_device *dev, | 1055 | struct net_device *dev, |
1056 | const char *name) | 1056 | const char *name) |
1057 | { | 1057 | { |
1058 | BUG_ON(!net); | 1058 | BUG_ON(!net); |
1059 | 1059 | ||
1060 | if (!dev_valid_name(name)) | 1060 | if (!dev_valid_name(name)) |
1061 | return -EINVAL; | 1061 | return -EINVAL; |
1062 | 1062 | ||
1063 | if (strchr(name, '%')) | 1063 | if (strchr(name, '%')) |
1064 | return dev_alloc_name_ns(net, dev, name); | 1064 | return dev_alloc_name_ns(net, dev, name); |
1065 | else if (__dev_get_by_name(net, name)) | 1065 | else if (__dev_get_by_name(net, name)) |
1066 | return -EEXIST; | 1066 | return -EEXIST; |
1067 | else if (dev->name != name) | 1067 | else if (dev->name != name) |
1068 | strlcpy(dev->name, name, IFNAMSIZ); | 1068 | strlcpy(dev->name, name, IFNAMSIZ); |
1069 | 1069 | ||
1070 | return 0; | 1070 | return 0; |
1071 | } | 1071 | } |
1072 | 1072 | ||
1073 | /** | 1073 | /** |
1074 | * dev_change_name - change name of a device | 1074 | * dev_change_name - change name of a device |
1075 | * @dev: device | 1075 | * @dev: device |
1076 | * @newname: name (or format string) must be at least IFNAMSIZ | 1076 | * @newname: name (or format string) must be at least IFNAMSIZ |
1077 | * | 1077 | * |
1078 | * Change name of a device, can pass format strings "eth%d". | 1078 | * Change name of a device, can pass format strings "eth%d". |
1079 | * for wildcarding. | 1079 | * for wildcarding. |
1080 | */ | 1080 | */ |
1081 | int dev_change_name(struct net_device *dev, const char *newname) | 1081 | int dev_change_name(struct net_device *dev, const char *newname) |
1082 | { | 1082 | { |
1083 | char oldname[IFNAMSIZ]; | 1083 | char oldname[IFNAMSIZ]; |
1084 | int err = 0; | 1084 | int err = 0; |
1085 | int ret; | 1085 | int ret; |
1086 | struct net *net; | 1086 | struct net *net; |
1087 | 1087 | ||
1088 | ASSERT_RTNL(); | 1088 | ASSERT_RTNL(); |
1089 | BUG_ON(!dev_net(dev)); | 1089 | BUG_ON(!dev_net(dev)); |
1090 | 1090 | ||
1091 | net = dev_net(dev); | 1091 | net = dev_net(dev); |
1092 | if (dev->flags & IFF_UP) | 1092 | if (dev->flags & IFF_UP) |
1093 | return -EBUSY; | 1093 | return -EBUSY; |
1094 | 1094 | ||
1095 | write_seqcount_begin(&devnet_rename_seq); | 1095 | write_seqcount_begin(&devnet_rename_seq); |
1096 | 1096 | ||
1097 | if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { | 1097 | if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { |
1098 | write_seqcount_end(&devnet_rename_seq); | 1098 | write_seqcount_end(&devnet_rename_seq); |
1099 | return 0; | 1099 | return 0; |
1100 | } | 1100 | } |
1101 | 1101 | ||
1102 | memcpy(oldname, dev->name, IFNAMSIZ); | 1102 | memcpy(oldname, dev->name, IFNAMSIZ); |
1103 | 1103 | ||
1104 | err = dev_get_valid_name(net, dev, newname); | 1104 | err = dev_get_valid_name(net, dev, newname); |
1105 | if (err < 0) { | 1105 | if (err < 0) { |
1106 | write_seqcount_end(&devnet_rename_seq); | 1106 | write_seqcount_end(&devnet_rename_seq); |
1107 | return err; | 1107 | return err; |
1108 | } | 1108 | } |
1109 | 1109 | ||
1110 | rollback: | 1110 | rollback: |
1111 | ret = device_rename(&dev->dev, dev->name); | 1111 | ret = device_rename(&dev->dev, dev->name); |
1112 | if (ret) { | 1112 | if (ret) { |
1113 | memcpy(dev->name, oldname, IFNAMSIZ); | 1113 | memcpy(dev->name, oldname, IFNAMSIZ); |
1114 | write_seqcount_end(&devnet_rename_seq); | 1114 | write_seqcount_end(&devnet_rename_seq); |
1115 | return ret; | 1115 | return ret; |
1116 | } | 1116 | } |
1117 | 1117 | ||
1118 | write_seqcount_end(&devnet_rename_seq); | 1118 | write_seqcount_end(&devnet_rename_seq); |
1119 | 1119 | ||
1120 | write_lock_bh(&dev_base_lock); | 1120 | write_lock_bh(&dev_base_lock); |
1121 | hlist_del_rcu(&dev->name_hlist); | 1121 | hlist_del_rcu(&dev->name_hlist); |
1122 | write_unlock_bh(&dev_base_lock); | 1122 | write_unlock_bh(&dev_base_lock); |
1123 | 1123 | ||
1124 | synchronize_rcu(); | 1124 | synchronize_rcu(); |
1125 | 1125 | ||
1126 | write_lock_bh(&dev_base_lock); | 1126 | write_lock_bh(&dev_base_lock); |
1127 | hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); | 1127 | hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); |
1128 | write_unlock_bh(&dev_base_lock); | 1128 | write_unlock_bh(&dev_base_lock); |
1129 | 1129 | ||
1130 | ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); | 1130 | ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); |
1131 | ret = notifier_to_errno(ret); | 1131 | ret = notifier_to_errno(ret); |
1132 | 1132 | ||
1133 | if (ret) { | 1133 | if (ret) { |
1134 | /* err >= 0 after dev_alloc_name() or stores the first errno */ | 1134 | /* err >= 0 after dev_alloc_name() or stores the first errno */ |
1135 | if (err >= 0) { | 1135 | if (err >= 0) { |
1136 | err = ret; | 1136 | err = ret; |
1137 | write_seqcount_begin(&devnet_rename_seq); | 1137 | write_seqcount_begin(&devnet_rename_seq); |
1138 | memcpy(dev->name, oldname, IFNAMSIZ); | 1138 | memcpy(dev->name, oldname, IFNAMSIZ); |
1139 | goto rollback; | 1139 | goto rollback; |
1140 | } else { | 1140 | } else { |
1141 | pr_err("%s: name change rollback failed: %d\n", | 1141 | pr_err("%s: name change rollback failed: %d\n", |
1142 | dev->name, ret); | 1142 | dev->name, ret); |
1143 | } | 1143 | } |
1144 | } | 1144 | } |
1145 | 1145 | ||
1146 | return err; | 1146 | return err; |
1147 | } | 1147 | } |
1148 | 1148 | ||
1149 | /** | 1149 | /** |
1150 | * dev_set_alias - change ifalias of a device | 1150 | * dev_set_alias - change ifalias of a device |
1151 | * @dev: device | 1151 | * @dev: device |
1152 | * @alias: name up to IFALIASZ | 1152 | * @alias: name up to IFALIASZ |
1153 | * @len: limit of bytes to copy from info | 1153 | * @len: limit of bytes to copy from info |
1154 | * | 1154 | * |
1155 | * Set ifalias for a device, | 1155 | * Set ifalias for a device, |
1156 | */ | 1156 | */ |
1157 | int dev_set_alias(struct net_device *dev, const char *alias, size_t len) | 1157 | int dev_set_alias(struct net_device *dev, const char *alias, size_t len) |
1158 | { | 1158 | { |
1159 | char *new_ifalias; | 1159 | char *new_ifalias; |
1160 | 1160 | ||
1161 | ASSERT_RTNL(); | 1161 | ASSERT_RTNL(); |
1162 | 1162 | ||
1163 | if (len >= IFALIASZ) | 1163 | if (len >= IFALIASZ) |
1164 | return -EINVAL; | 1164 | return -EINVAL; |
1165 | 1165 | ||
1166 | if (!len) { | 1166 | if (!len) { |
1167 | kfree(dev->ifalias); | 1167 | kfree(dev->ifalias); |
1168 | dev->ifalias = NULL; | 1168 | dev->ifalias = NULL; |
1169 | return 0; | 1169 | return 0; |
1170 | } | 1170 | } |
1171 | 1171 | ||
1172 | new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); | 1172 | new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); |
1173 | if (!new_ifalias) | 1173 | if (!new_ifalias) |
1174 | return -ENOMEM; | 1174 | return -ENOMEM; |
1175 | dev->ifalias = new_ifalias; | 1175 | dev->ifalias = new_ifalias; |
1176 | 1176 | ||
1177 | strlcpy(dev->ifalias, alias, len+1); | 1177 | strlcpy(dev->ifalias, alias, len+1); |
1178 | return len; | 1178 | return len; |
1179 | } | 1179 | } |
1180 | 1180 | ||
1181 | 1181 | ||
1182 | /** | 1182 | /** |
1183 | * netdev_features_change - device changes features | 1183 | * netdev_features_change - device changes features |
1184 | * @dev: device to cause notification | 1184 | * @dev: device to cause notification |
1185 | * | 1185 | * |
1186 | * Called to indicate a device has changed features. | 1186 | * Called to indicate a device has changed features. |
1187 | */ | 1187 | */ |
1188 | void netdev_features_change(struct net_device *dev) | 1188 | void netdev_features_change(struct net_device *dev) |
1189 | { | 1189 | { |
1190 | call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev); | 1190 | call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev); |
1191 | } | 1191 | } |
1192 | EXPORT_SYMBOL(netdev_features_change); | 1192 | EXPORT_SYMBOL(netdev_features_change); |
1193 | 1193 | ||
1194 | /** | 1194 | /** |
1195 | * netdev_state_change - device changes state | 1195 | * netdev_state_change - device changes state |
1196 | * @dev: device to cause notification | 1196 | * @dev: device to cause notification |
1197 | * | 1197 | * |
1198 | * Called to indicate a device has changed state. This function calls | 1198 | * Called to indicate a device has changed state. This function calls |
1199 | * the notifier chains for netdev_chain and sends a NEWLINK message | 1199 | * the notifier chains for netdev_chain and sends a NEWLINK message |
1200 | * to the routing socket. | 1200 | * to the routing socket. |
1201 | */ | 1201 | */ |
1202 | void netdev_state_change(struct net_device *dev) | 1202 | void netdev_state_change(struct net_device *dev) |
1203 | { | 1203 | { |
1204 | if (dev->flags & IFF_UP) { | 1204 | if (dev->flags & IFF_UP) { |
1205 | call_netdevice_notifiers(NETDEV_CHANGE, dev); | 1205 | call_netdevice_notifiers(NETDEV_CHANGE, dev); |
1206 | rtmsg_ifinfo(RTM_NEWLINK, dev, 0); | 1206 | rtmsg_ifinfo(RTM_NEWLINK, dev, 0); |
1207 | } | 1207 | } |
1208 | } | 1208 | } |
1209 | EXPORT_SYMBOL(netdev_state_change); | 1209 | EXPORT_SYMBOL(netdev_state_change); |
1210 | 1210 | ||
1211 | /** | 1211 | /** |
1212 | * netdev_notify_peers - notify network peers about existence of @dev | 1212 | * netdev_notify_peers - notify network peers about existence of @dev |
1213 | * @dev: network device | 1213 | * @dev: network device |
1214 | * | 1214 | * |
1215 | * Generate traffic such that interested network peers are aware of | 1215 | * Generate traffic such that interested network peers are aware of |
1216 | * @dev, such as by generating a gratuitous ARP. This may be used when | 1216 | * @dev, such as by generating a gratuitous ARP. This may be used when |
1217 | * a device wants to inform the rest of the network about some sort of | 1217 | * a device wants to inform the rest of the network about some sort of |
1218 | * reconfiguration such as a failover event or virtual machine | 1218 | * reconfiguration such as a failover event or virtual machine |
1219 | * migration. | 1219 | * migration. |
1220 | */ | 1220 | */ |
1221 | void netdev_notify_peers(struct net_device *dev) | 1221 | void netdev_notify_peers(struct net_device *dev) |
1222 | { | 1222 | { |
1223 | rtnl_lock(); | 1223 | rtnl_lock(); |
1224 | call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); | 1224 | call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); |
1225 | rtnl_unlock(); | 1225 | rtnl_unlock(); |
1226 | } | 1226 | } |
1227 | EXPORT_SYMBOL(netdev_notify_peers); | 1227 | EXPORT_SYMBOL(netdev_notify_peers); |
1228 | 1228 | ||
1229 | /** | 1229 | /** |
1230 | * dev_load - load a network module | 1230 | * dev_load - load a network module |
1231 | * @net: the applicable net namespace | 1231 | * @net: the applicable net namespace |
1232 | * @name: name of interface | 1232 | * @name: name of interface |
1233 | * | 1233 | * |
1234 | * If a network interface is not present and the process has suitable | 1234 | * If a network interface is not present and the process has suitable |
1235 | * privileges this function loads the module. If module loading is not | 1235 | * privileges this function loads the module. If module loading is not |
1236 | * available in this kernel then it becomes a nop. | 1236 | * available in this kernel then it becomes a nop. |
1237 | */ | 1237 | */ |
1238 | 1238 | ||
1239 | void dev_load(struct net *net, const char *name) | 1239 | void dev_load(struct net *net, const char *name) |
1240 | { | 1240 | { |
1241 | struct net_device *dev; | 1241 | struct net_device *dev; |
1242 | int no_module; | 1242 | int no_module; |
1243 | 1243 | ||
1244 | rcu_read_lock(); | 1244 | rcu_read_lock(); |
1245 | dev = dev_get_by_name_rcu(net, name); | 1245 | dev = dev_get_by_name_rcu(net, name); |
1246 | rcu_read_unlock(); | 1246 | rcu_read_unlock(); |
1247 | 1247 | ||
1248 | no_module = !dev; | 1248 | no_module = !dev; |
1249 | if (no_module && capable(CAP_NET_ADMIN)) | 1249 | if (no_module && capable(CAP_NET_ADMIN)) |
1250 | no_module = request_module("netdev-%s", name); | 1250 | no_module = request_module("netdev-%s", name); |
1251 | if (no_module && capable(CAP_SYS_MODULE)) { | 1251 | if (no_module && capable(CAP_SYS_MODULE)) { |
1252 | if (!request_module("%s", name)) | 1252 | if (!request_module("%s", name)) |
1253 | pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", | 1253 | pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", |
1254 | name); | 1254 | name); |
1255 | } | 1255 | } |
1256 | } | 1256 | } |
1257 | EXPORT_SYMBOL(dev_load); | 1257 | EXPORT_SYMBOL(dev_load); |
1258 | 1258 | ||
1259 | static int __dev_open(struct net_device *dev) | 1259 | static int __dev_open(struct net_device *dev) |
1260 | { | 1260 | { |
1261 | const struct net_device_ops *ops = dev->netdev_ops; | 1261 | const struct net_device_ops *ops = dev->netdev_ops; |
1262 | int ret; | 1262 | int ret; |
1263 | 1263 | ||
1264 | ASSERT_RTNL(); | 1264 | ASSERT_RTNL(); |
1265 | 1265 | ||
1266 | if (!netif_device_present(dev)) | 1266 | if (!netif_device_present(dev)) |
1267 | return -ENODEV; | 1267 | return -ENODEV; |
1268 | 1268 | ||
1269 | /* Block netpoll from trying to do any rx path servicing. | 1269 | /* Block netpoll from trying to do any rx path servicing. |
1270 | * If we don't do this there is a chance ndo_poll_controller | 1270 | * If we don't do this there is a chance ndo_poll_controller |
1271 | * or ndo_poll may be running while we open the device | 1271 | * or ndo_poll may be running while we open the device |
1272 | */ | 1272 | */ |
1273 | ret = netpoll_rx_disable(dev); | 1273 | ret = netpoll_rx_disable(dev); |
1274 | if (ret) | 1274 | if (ret) |
1275 | return ret; | 1275 | return ret; |
1276 | 1276 | ||
1277 | ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); | 1277 | ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); |
1278 | ret = notifier_to_errno(ret); | 1278 | ret = notifier_to_errno(ret); |
1279 | if (ret) | 1279 | if (ret) |
1280 | return ret; | 1280 | return ret; |
1281 | 1281 | ||
1282 | set_bit(__LINK_STATE_START, &dev->state); | 1282 | set_bit(__LINK_STATE_START, &dev->state); |
1283 | 1283 | ||
1284 | if (ops->ndo_validate_addr) | 1284 | if (ops->ndo_validate_addr) |
1285 | ret = ops->ndo_validate_addr(dev); | 1285 | ret = ops->ndo_validate_addr(dev); |
1286 | 1286 | ||
1287 | if (!ret && ops->ndo_open) | 1287 | if (!ret && ops->ndo_open) |
1288 | ret = ops->ndo_open(dev); | 1288 | ret = ops->ndo_open(dev); |
1289 | 1289 | ||
1290 | netpoll_rx_enable(dev); | 1290 | netpoll_rx_enable(dev); |
1291 | 1291 | ||
1292 | if (ret) | 1292 | if (ret) |
1293 | clear_bit(__LINK_STATE_START, &dev->state); | 1293 | clear_bit(__LINK_STATE_START, &dev->state); |
1294 | else { | 1294 | else { |
1295 | dev->flags |= IFF_UP; | 1295 | dev->flags |= IFF_UP; |
1296 | net_dmaengine_get(); | 1296 | net_dmaengine_get(); |
1297 | dev_set_rx_mode(dev); | 1297 | dev_set_rx_mode(dev); |
1298 | dev_activate(dev); | 1298 | dev_activate(dev); |
1299 | add_device_randomness(dev->dev_addr, dev->addr_len); | 1299 | add_device_randomness(dev->dev_addr, dev->addr_len); |
1300 | } | 1300 | } |
1301 | 1301 | ||
1302 | return ret; | 1302 | return ret; |
1303 | } | 1303 | } |
1304 | 1304 | ||
1305 | /** | 1305 | /** |
1306 | * dev_open - prepare an interface for use. | 1306 | * dev_open - prepare an interface for use. |
1307 | * @dev: device to open | 1307 | * @dev: device to open |
1308 | * | 1308 | * |
1309 | * Takes a device from down to up state. The device's private open | 1309 | * Takes a device from down to up state. The device's private open |
1310 | * function is invoked and then the multicast lists are loaded. Finally | 1310 | * function is invoked and then the multicast lists are loaded. Finally |
1311 | * the device is moved into the up state and a %NETDEV_UP message is | 1311 | * the device is moved into the up state and a %NETDEV_UP message is |
1312 | * sent to the netdev notifier chain. | 1312 | * sent to the netdev notifier chain. |
1313 | * | 1313 | * |
1314 | * Calling this function on an active interface is a nop. On a failure | 1314 | * Calling this function on an active interface is a nop. On a failure |
1315 | * a negative errno code is returned. | 1315 | * a negative errno code is returned. |
1316 | */ | 1316 | */ |
1317 | int dev_open(struct net_device *dev) | 1317 | int dev_open(struct net_device *dev) |
1318 | { | 1318 | { |
1319 | int ret; | 1319 | int ret; |
1320 | 1320 | ||
1321 | if (dev->flags & IFF_UP) | 1321 | if (dev->flags & IFF_UP) |
1322 | return 0; | 1322 | return 0; |
1323 | 1323 | ||
1324 | ret = __dev_open(dev); | 1324 | ret = __dev_open(dev); |
1325 | if (ret < 0) | 1325 | if (ret < 0) |
1326 | return ret; | 1326 | return ret; |
1327 | 1327 | ||
1328 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); | 1328 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); |
1329 | call_netdevice_notifiers(NETDEV_UP, dev); | 1329 | call_netdevice_notifiers(NETDEV_UP, dev); |
1330 | 1330 | ||
1331 | return ret; | 1331 | return ret; |
1332 | } | 1332 | } |
1333 | EXPORT_SYMBOL(dev_open); | 1333 | EXPORT_SYMBOL(dev_open); |
1334 | 1334 | ||
1335 | static int __dev_close_many(struct list_head *head) | 1335 | static int __dev_close_many(struct list_head *head) |
1336 | { | 1336 | { |
1337 | struct net_device *dev; | 1337 | struct net_device *dev; |
1338 | 1338 | ||
1339 | ASSERT_RTNL(); | 1339 | ASSERT_RTNL(); |
1340 | might_sleep(); | 1340 | might_sleep(); |
1341 | 1341 | ||
1342 | list_for_each_entry(dev, head, unreg_list) { | 1342 | list_for_each_entry(dev, head, unreg_list) { |
1343 | call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); | 1343 | call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); |
1344 | 1344 | ||
1345 | clear_bit(__LINK_STATE_START, &dev->state); | 1345 | clear_bit(__LINK_STATE_START, &dev->state); |
1346 | 1346 | ||
1347 | /* Synchronize to scheduled poll. We cannot touch poll list, it | 1347 | /* Synchronize to scheduled poll. We cannot touch poll list, it |
1348 | * can be even on different cpu. So just clear netif_running(). | 1348 | * can be even on different cpu. So just clear netif_running(). |
1349 | * | 1349 | * |
1350 | * dev->stop() will invoke napi_disable() on all of it's | 1350 | * dev->stop() will invoke napi_disable() on all of it's |
1351 | * napi_struct instances on this device. | 1351 | * napi_struct instances on this device. |
1352 | */ | 1352 | */ |
1353 | smp_mb__after_clear_bit(); /* Commit netif_running(). */ | 1353 | smp_mb__after_clear_bit(); /* Commit netif_running(). */ |
1354 | } | 1354 | } |
1355 | 1355 | ||
1356 | dev_deactivate_many(head); | 1356 | dev_deactivate_many(head); |
1357 | 1357 | ||
1358 | list_for_each_entry(dev, head, unreg_list) { | 1358 | list_for_each_entry(dev, head, unreg_list) { |
1359 | const struct net_device_ops *ops = dev->netdev_ops; | 1359 | const struct net_device_ops *ops = dev->netdev_ops; |
1360 | 1360 | ||
1361 | /* | 1361 | /* |
1362 | * Call the device specific close. This cannot fail. | 1362 | * Call the device specific close. This cannot fail. |
1363 | * Only if device is UP | 1363 | * Only if device is UP |
1364 | * | 1364 | * |
1365 | * We allow it to be called even after a DETACH hot-plug | 1365 | * We allow it to be called even after a DETACH hot-plug |
1366 | * event. | 1366 | * event. |
1367 | */ | 1367 | */ |
1368 | if (ops->ndo_stop) | 1368 | if (ops->ndo_stop) |
1369 | ops->ndo_stop(dev); | 1369 | ops->ndo_stop(dev); |
1370 | 1370 | ||
1371 | dev->flags &= ~IFF_UP; | 1371 | dev->flags &= ~IFF_UP; |
1372 | net_dmaengine_put(); | 1372 | net_dmaengine_put(); |
1373 | } | 1373 | } |
1374 | 1374 | ||
1375 | return 0; | 1375 | return 0; |
1376 | } | 1376 | } |
1377 | 1377 | ||
1378 | static int __dev_close(struct net_device *dev) | 1378 | static int __dev_close(struct net_device *dev) |
1379 | { | 1379 | { |
1380 | int retval; | 1380 | int retval; |
1381 | LIST_HEAD(single); | 1381 | LIST_HEAD(single); |
1382 | 1382 | ||
1383 | /* Temporarily disable netpoll until the interface is down */ | 1383 | /* Temporarily disable netpoll until the interface is down */ |
1384 | retval = netpoll_rx_disable(dev); | 1384 | retval = netpoll_rx_disable(dev); |
1385 | if (retval) | 1385 | if (retval) |
1386 | return retval; | 1386 | return retval; |
1387 | 1387 | ||
1388 | list_add(&dev->unreg_list, &single); | 1388 | list_add(&dev->unreg_list, &single); |
1389 | retval = __dev_close_many(&single); | 1389 | retval = __dev_close_many(&single); |
1390 | list_del(&single); | 1390 | list_del(&single); |
1391 | 1391 | ||
1392 | netpoll_rx_enable(dev); | 1392 | netpoll_rx_enable(dev); |
1393 | return retval; | 1393 | return retval; |
1394 | } | 1394 | } |
1395 | 1395 | ||
1396 | static int dev_close_many(struct list_head *head) | 1396 | static int dev_close_many(struct list_head *head) |
1397 | { | 1397 | { |
1398 | struct net_device *dev, *tmp; | 1398 | struct net_device *dev, *tmp; |
1399 | LIST_HEAD(tmp_list); | 1399 | LIST_HEAD(tmp_list); |
1400 | 1400 | ||
1401 | list_for_each_entry_safe(dev, tmp, head, unreg_list) | 1401 | list_for_each_entry_safe(dev, tmp, head, unreg_list) |
1402 | if (!(dev->flags & IFF_UP)) | 1402 | if (!(dev->flags & IFF_UP)) |
1403 | list_move(&dev->unreg_list, &tmp_list); | 1403 | list_move(&dev->unreg_list, &tmp_list); |
1404 | 1404 | ||
1405 | __dev_close_many(head); | 1405 | __dev_close_many(head); |
1406 | 1406 | ||
1407 | list_for_each_entry(dev, head, unreg_list) { | 1407 | list_for_each_entry(dev, head, unreg_list) { |
1408 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); | 1408 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); |
1409 | call_netdevice_notifiers(NETDEV_DOWN, dev); | 1409 | call_netdevice_notifiers(NETDEV_DOWN, dev); |
1410 | } | 1410 | } |
1411 | 1411 | ||
1412 | /* rollback_registered_many needs the complete original list */ | 1412 | /* rollback_registered_many needs the complete original list */ |
1413 | list_splice(&tmp_list, head); | 1413 | list_splice(&tmp_list, head); |
1414 | return 0; | 1414 | return 0; |
1415 | } | 1415 | } |
1416 | 1416 | ||
1417 | /** | 1417 | /** |
1418 | * dev_close - shutdown an interface. | 1418 | * dev_close - shutdown an interface. |
1419 | * @dev: device to shutdown | 1419 | * @dev: device to shutdown |
1420 | * | 1420 | * |
1421 | * This function moves an active device into down state. A | 1421 | * This function moves an active device into down state. A |
1422 | * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device | 1422 | * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device |
1423 | * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier | 1423 | * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier |
1424 | * chain. | 1424 | * chain. |
1425 | */ | 1425 | */ |
1426 | int dev_close(struct net_device *dev) | 1426 | int dev_close(struct net_device *dev) |
1427 | { | 1427 | { |
1428 | int ret = 0; | 1428 | int ret = 0; |
1429 | if (dev->flags & IFF_UP) { | 1429 | if (dev->flags & IFF_UP) { |
1430 | LIST_HEAD(single); | 1430 | LIST_HEAD(single); |
1431 | 1431 | ||
1432 | /* Block netpoll rx while the interface is going down */ | 1432 | /* Block netpoll rx while the interface is going down */ |
1433 | ret = netpoll_rx_disable(dev); | 1433 | ret = netpoll_rx_disable(dev); |
1434 | if (ret) | 1434 | if (ret) |
1435 | return ret; | 1435 | return ret; |
1436 | 1436 | ||
1437 | list_add(&dev->unreg_list, &single); | 1437 | list_add(&dev->unreg_list, &single); |
1438 | dev_close_many(&single); | 1438 | dev_close_many(&single); |
1439 | list_del(&single); | 1439 | list_del(&single); |
1440 | 1440 | ||
1441 | netpoll_rx_enable(dev); | 1441 | netpoll_rx_enable(dev); |
1442 | } | 1442 | } |
1443 | return ret; | 1443 | return ret; |
1444 | } | 1444 | } |
1445 | EXPORT_SYMBOL(dev_close); | 1445 | EXPORT_SYMBOL(dev_close); |
1446 | 1446 | ||
1447 | 1447 | ||
1448 | /** | 1448 | /** |
1449 | * dev_disable_lro - disable Large Receive Offload on a device | 1449 | * dev_disable_lro - disable Large Receive Offload on a device |
1450 | * @dev: device | 1450 | * @dev: device |
1451 | * | 1451 | * |
1452 | * Disable Large Receive Offload (LRO) on a net device. Must be | 1452 | * Disable Large Receive Offload (LRO) on a net device. Must be |
1453 | * called under RTNL. This is needed if received packets may be | 1453 | * called under RTNL. This is needed if received packets may be |
1454 | * forwarded to another interface. | 1454 | * forwarded to another interface. |
1455 | */ | 1455 | */ |
1456 | void dev_disable_lro(struct net_device *dev) | 1456 | void dev_disable_lro(struct net_device *dev) |
1457 | { | 1457 | { |
1458 | /* | 1458 | /* |
1459 | * If we're trying to disable lro on a vlan device | 1459 | * If we're trying to disable lro on a vlan device |
1460 | * use the underlying physical device instead | 1460 | * use the underlying physical device instead |
1461 | */ | 1461 | */ |
1462 | if (is_vlan_dev(dev)) | 1462 | if (is_vlan_dev(dev)) |
1463 | dev = vlan_dev_real_dev(dev); | 1463 | dev = vlan_dev_real_dev(dev); |
1464 | 1464 | ||
1465 | dev->wanted_features &= ~NETIF_F_LRO; | 1465 | dev->wanted_features &= ~NETIF_F_LRO; |
1466 | netdev_update_features(dev); | 1466 | netdev_update_features(dev); |
1467 | 1467 | ||
1468 | if (unlikely(dev->features & NETIF_F_LRO)) | 1468 | if (unlikely(dev->features & NETIF_F_LRO)) |
1469 | netdev_WARN(dev, "failed to disable LRO!\n"); | 1469 | netdev_WARN(dev, "failed to disable LRO!\n"); |
1470 | } | 1470 | } |
1471 | EXPORT_SYMBOL(dev_disable_lro); | 1471 | EXPORT_SYMBOL(dev_disable_lro); |
1472 | 1472 | ||
1473 | 1473 | ||
1474 | static int dev_boot_phase = 1; | 1474 | static int dev_boot_phase = 1; |
1475 | 1475 | ||
1476 | /** | 1476 | /** |
1477 | * register_netdevice_notifier - register a network notifier block | 1477 | * register_netdevice_notifier - register a network notifier block |
1478 | * @nb: notifier | 1478 | * @nb: notifier |
1479 | * | 1479 | * |
1480 | * Register a notifier to be called when network device events occur. | 1480 | * Register a notifier to be called when network device events occur. |
1481 | * The notifier passed is linked into the kernel structures and must | 1481 | * The notifier passed is linked into the kernel structures and must |
1482 | * not be reused until it has been unregistered. A negative errno code | 1482 | * not be reused until it has been unregistered. A negative errno code |
1483 | * is returned on a failure. | 1483 | * is returned on a failure. |
1484 | * | 1484 | * |
1485 | * When registered all registration and up events are replayed | 1485 | * When registered all registration and up events are replayed |
1486 | * to the new notifier to allow device to have a race free | 1486 | * to the new notifier to allow device to have a race free |
1487 | * view of the network device list. | 1487 | * view of the network device list. |
1488 | */ | 1488 | */ |
1489 | 1489 | ||
1490 | int register_netdevice_notifier(struct notifier_block *nb) | 1490 | int register_netdevice_notifier(struct notifier_block *nb) |
1491 | { | 1491 | { |
1492 | struct net_device *dev; | 1492 | struct net_device *dev; |
1493 | struct net_device *last; | 1493 | struct net_device *last; |
1494 | struct net *net; | 1494 | struct net *net; |
1495 | int err; | 1495 | int err; |
1496 | 1496 | ||
1497 | rtnl_lock(); | 1497 | rtnl_lock(); |
1498 | err = raw_notifier_chain_register(&netdev_chain, nb); | 1498 | err = raw_notifier_chain_register(&netdev_chain, nb); |
1499 | if (err) | 1499 | if (err) |
1500 | goto unlock; | 1500 | goto unlock; |
1501 | if (dev_boot_phase) | 1501 | if (dev_boot_phase) |
1502 | goto unlock; | 1502 | goto unlock; |
1503 | for_each_net(net) { | 1503 | for_each_net(net) { |
1504 | for_each_netdev(net, dev) { | 1504 | for_each_netdev(net, dev) { |
1505 | err = nb->notifier_call(nb, NETDEV_REGISTER, dev); | 1505 | err = nb->notifier_call(nb, NETDEV_REGISTER, dev); |
1506 | err = notifier_to_errno(err); | 1506 | err = notifier_to_errno(err); |
1507 | if (err) | 1507 | if (err) |
1508 | goto rollback; | 1508 | goto rollback; |
1509 | 1509 | ||
1510 | if (!(dev->flags & IFF_UP)) | 1510 | if (!(dev->flags & IFF_UP)) |
1511 | continue; | 1511 | continue; |
1512 | 1512 | ||
1513 | nb->notifier_call(nb, NETDEV_UP, dev); | 1513 | nb->notifier_call(nb, NETDEV_UP, dev); |
1514 | } | 1514 | } |
1515 | } | 1515 | } |
1516 | 1516 | ||
1517 | unlock: | 1517 | unlock: |
1518 | rtnl_unlock(); | 1518 | rtnl_unlock(); |
1519 | return err; | 1519 | return err; |
1520 | 1520 | ||
1521 | rollback: | 1521 | rollback: |
1522 | last = dev; | 1522 | last = dev; |
1523 | for_each_net(net) { | 1523 | for_each_net(net) { |
1524 | for_each_netdev(net, dev) { | 1524 | for_each_netdev(net, dev) { |
1525 | if (dev == last) | 1525 | if (dev == last) |
1526 | goto outroll; | 1526 | goto outroll; |
1527 | 1527 | ||
1528 | if (dev->flags & IFF_UP) { | 1528 | if (dev->flags & IFF_UP) { |
1529 | nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); | 1529 | nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); |
1530 | nb->notifier_call(nb, NETDEV_DOWN, dev); | 1530 | nb->notifier_call(nb, NETDEV_DOWN, dev); |
1531 | } | 1531 | } |
1532 | nb->notifier_call(nb, NETDEV_UNREGISTER, dev); | 1532 | nb->notifier_call(nb, NETDEV_UNREGISTER, dev); |
1533 | } | 1533 | } |
1534 | } | 1534 | } |
1535 | 1535 | ||
1536 | outroll: | 1536 | outroll: |
1537 | raw_notifier_chain_unregister(&netdev_chain, nb); | 1537 | raw_notifier_chain_unregister(&netdev_chain, nb); |
1538 | goto unlock; | 1538 | goto unlock; |
1539 | } | 1539 | } |
1540 | EXPORT_SYMBOL(register_netdevice_notifier); | 1540 | EXPORT_SYMBOL(register_netdevice_notifier); |
1541 | 1541 | ||
1542 | /** | 1542 | /** |
1543 | * unregister_netdevice_notifier - unregister a network notifier block | 1543 | * unregister_netdevice_notifier - unregister a network notifier block |
1544 | * @nb: notifier | 1544 | * @nb: notifier |
1545 | * | 1545 | * |
1546 | * Unregister a notifier previously registered by | 1546 | * Unregister a notifier previously registered by |
1547 | * register_netdevice_notifier(). The notifier is unlinked into the | 1547 | * register_netdevice_notifier(). The notifier is unlinked into the |
1548 | * kernel structures and may then be reused. A negative errno code | 1548 | * kernel structures and may then be reused. A negative errno code |
1549 | * is returned on a failure. | 1549 | * is returned on a failure. |
1550 | * | 1550 | * |
1551 | * After unregistering unregister and down device events are synthesized | 1551 | * After unregistering unregister and down device events are synthesized |
1552 | * for all devices on the device list to the removed notifier to remove | 1552 | * for all devices on the device list to the removed notifier to remove |
1553 | * the need for special case cleanup code. | 1553 | * the need for special case cleanup code. |
1554 | */ | 1554 | */ |
1555 | 1555 | ||
1556 | int unregister_netdevice_notifier(struct notifier_block *nb) | 1556 | int unregister_netdevice_notifier(struct notifier_block *nb) |
1557 | { | 1557 | { |
1558 | struct net_device *dev; | 1558 | struct net_device *dev; |
1559 | struct net *net; | 1559 | struct net *net; |
1560 | int err; | 1560 | int err; |
1561 | 1561 | ||
1562 | rtnl_lock(); | 1562 | rtnl_lock(); |
1563 | err = raw_notifier_chain_unregister(&netdev_chain, nb); | 1563 | err = raw_notifier_chain_unregister(&netdev_chain, nb); |
1564 | if (err) | 1564 | if (err) |
1565 | goto unlock; | 1565 | goto unlock; |
1566 | 1566 | ||
1567 | for_each_net(net) { | 1567 | for_each_net(net) { |
1568 | for_each_netdev(net, dev) { | 1568 | for_each_netdev(net, dev) { |
1569 | if (dev->flags & IFF_UP) { | 1569 | if (dev->flags & IFF_UP) { |
1570 | nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); | 1570 | nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); |
1571 | nb->notifier_call(nb, NETDEV_DOWN, dev); | 1571 | nb->notifier_call(nb, NETDEV_DOWN, dev); |
1572 | } | 1572 | } |
1573 | nb->notifier_call(nb, NETDEV_UNREGISTER, dev); | 1573 | nb->notifier_call(nb, NETDEV_UNREGISTER, dev); |
1574 | } | 1574 | } |
1575 | } | 1575 | } |
1576 | unlock: | 1576 | unlock: |
1577 | rtnl_unlock(); | 1577 | rtnl_unlock(); |
1578 | return err; | 1578 | return err; |
1579 | } | 1579 | } |
1580 | EXPORT_SYMBOL(unregister_netdevice_notifier); | 1580 | EXPORT_SYMBOL(unregister_netdevice_notifier); |
1581 | 1581 | ||
1582 | /** | 1582 | /** |
1583 | * call_netdevice_notifiers - call all network notifier blocks | 1583 | * call_netdevice_notifiers - call all network notifier blocks |
1584 | * @val: value passed unmodified to notifier function | 1584 | * @val: value passed unmodified to notifier function |
1585 | * @dev: net_device pointer passed unmodified to notifier function | 1585 | * @dev: net_device pointer passed unmodified to notifier function |
1586 | * | 1586 | * |
1587 | * Call all network notifier blocks. Parameters and return value | 1587 | * Call all network notifier blocks. Parameters and return value |
1588 | * are as for raw_notifier_call_chain(). | 1588 | * are as for raw_notifier_call_chain(). |
1589 | */ | 1589 | */ |
1590 | 1590 | ||
1591 | int call_netdevice_notifiers(unsigned long val, struct net_device *dev) | 1591 | int call_netdevice_notifiers(unsigned long val, struct net_device *dev) |
1592 | { | 1592 | { |
1593 | ASSERT_RTNL(); | 1593 | ASSERT_RTNL(); |
1594 | return raw_notifier_call_chain(&netdev_chain, val, dev); | 1594 | return raw_notifier_call_chain(&netdev_chain, val, dev); |
1595 | } | 1595 | } |
1596 | EXPORT_SYMBOL(call_netdevice_notifiers); | 1596 | EXPORT_SYMBOL(call_netdevice_notifiers); |
1597 | 1597 | ||
1598 | static struct static_key netstamp_needed __read_mostly; | 1598 | static struct static_key netstamp_needed __read_mostly; |
1599 | #ifdef HAVE_JUMP_LABEL | 1599 | #ifdef HAVE_JUMP_LABEL |
1600 | /* We are not allowed to call static_key_slow_dec() from irq context | 1600 | /* We are not allowed to call static_key_slow_dec() from irq context |
1601 | * If net_disable_timestamp() is called from irq context, defer the | 1601 | * If net_disable_timestamp() is called from irq context, defer the |
1602 | * static_key_slow_dec() calls. | 1602 | * static_key_slow_dec() calls. |
1603 | */ | 1603 | */ |
1604 | static atomic_t netstamp_needed_deferred; | 1604 | static atomic_t netstamp_needed_deferred; |
1605 | #endif | 1605 | #endif |
1606 | 1606 | ||
1607 | void net_enable_timestamp(void) | 1607 | void net_enable_timestamp(void) |
1608 | { | 1608 | { |
1609 | #ifdef HAVE_JUMP_LABEL | 1609 | #ifdef HAVE_JUMP_LABEL |
1610 | int deferred = atomic_xchg(&netstamp_needed_deferred, 0); | 1610 | int deferred = atomic_xchg(&netstamp_needed_deferred, 0); |
1611 | 1611 | ||
1612 | if (deferred) { | 1612 | if (deferred) { |
1613 | while (--deferred) | 1613 | while (--deferred) |
1614 | static_key_slow_dec(&netstamp_needed); | 1614 | static_key_slow_dec(&netstamp_needed); |
1615 | return; | 1615 | return; |
1616 | } | 1616 | } |
1617 | #endif | 1617 | #endif |
1618 | WARN_ON(in_interrupt()); | 1618 | WARN_ON(in_interrupt()); |
1619 | static_key_slow_inc(&netstamp_needed); | 1619 | static_key_slow_inc(&netstamp_needed); |
1620 | } | 1620 | } |
1621 | EXPORT_SYMBOL(net_enable_timestamp); | 1621 | EXPORT_SYMBOL(net_enable_timestamp); |
1622 | 1622 | ||
1623 | void net_disable_timestamp(void) | 1623 | void net_disable_timestamp(void) |
1624 | { | 1624 | { |
1625 | #ifdef HAVE_JUMP_LABEL | 1625 | #ifdef HAVE_JUMP_LABEL |
1626 | if (in_interrupt()) { | 1626 | if (in_interrupt()) { |
1627 | atomic_inc(&netstamp_needed_deferred); | 1627 | atomic_inc(&netstamp_needed_deferred); |
1628 | return; | 1628 | return; |
1629 | } | 1629 | } |
1630 | #endif | 1630 | #endif |
1631 | static_key_slow_dec(&netstamp_needed); | 1631 | static_key_slow_dec(&netstamp_needed); |
1632 | } | 1632 | } |
1633 | EXPORT_SYMBOL(net_disable_timestamp); | 1633 | EXPORT_SYMBOL(net_disable_timestamp); |
1634 | 1634 | ||
1635 | static inline void net_timestamp_set(struct sk_buff *skb) | 1635 | static inline void net_timestamp_set(struct sk_buff *skb) |
1636 | { | 1636 | { |
1637 | skb->tstamp.tv64 = 0; | 1637 | skb->tstamp.tv64 = 0; |
1638 | if (static_key_false(&netstamp_needed)) | 1638 | if (static_key_false(&netstamp_needed)) |
1639 | __net_timestamp(skb); | 1639 | __net_timestamp(skb); |
1640 | } | 1640 | } |
1641 | 1641 | ||
1642 | #define net_timestamp_check(COND, SKB) \ | 1642 | #define net_timestamp_check(COND, SKB) \ |
1643 | if (static_key_false(&netstamp_needed)) { \ | 1643 | if (static_key_false(&netstamp_needed)) { \ |
1644 | if ((COND) && !(SKB)->tstamp.tv64) \ | 1644 | if ((COND) && !(SKB)->tstamp.tv64) \ |
1645 | __net_timestamp(SKB); \ | 1645 | __net_timestamp(SKB); \ |
1646 | } \ | 1646 | } \ |
1647 | 1647 | ||
1648 | static int net_hwtstamp_validate(struct ifreq *ifr) | 1648 | static int net_hwtstamp_validate(struct ifreq *ifr) |
1649 | { | 1649 | { |
1650 | struct hwtstamp_config cfg; | 1650 | struct hwtstamp_config cfg; |
1651 | enum hwtstamp_tx_types tx_type; | 1651 | enum hwtstamp_tx_types tx_type; |
1652 | enum hwtstamp_rx_filters rx_filter; | 1652 | enum hwtstamp_rx_filters rx_filter; |
1653 | int tx_type_valid = 0; | 1653 | int tx_type_valid = 0; |
1654 | int rx_filter_valid = 0; | 1654 | int rx_filter_valid = 0; |
1655 | 1655 | ||
1656 | if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) | 1656 | if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) |
1657 | return -EFAULT; | 1657 | return -EFAULT; |
1658 | 1658 | ||
1659 | if (cfg.flags) /* reserved for future extensions */ | 1659 | if (cfg.flags) /* reserved for future extensions */ |
1660 | return -EINVAL; | 1660 | return -EINVAL; |
1661 | 1661 | ||
1662 | tx_type = cfg.tx_type; | 1662 | tx_type = cfg.tx_type; |
1663 | rx_filter = cfg.rx_filter; | 1663 | rx_filter = cfg.rx_filter; |
1664 | 1664 | ||
1665 | switch (tx_type) { | 1665 | switch (tx_type) { |
1666 | case HWTSTAMP_TX_OFF: | 1666 | case HWTSTAMP_TX_OFF: |
1667 | case HWTSTAMP_TX_ON: | 1667 | case HWTSTAMP_TX_ON: |
1668 | case HWTSTAMP_TX_ONESTEP_SYNC: | 1668 | case HWTSTAMP_TX_ONESTEP_SYNC: |
1669 | tx_type_valid = 1; | 1669 | tx_type_valid = 1; |
1670 | break; | 1670 | break; |
1671 | } | 1671 | } |
1672 | 1672 | ||
1673 | switch (rx_filter) { | 1673 | switch (rx_filter) { |
1674 | case HWTSTAMP_FILTER_NONE: | 1674 | case HWTSTAMP_FILTER_NONE: |
1675 | case HWTSTAMP_FILTER_ALL: | 1675 | case HWTSTAMP_FILTER_ALL: |
1676 | case HWTSTAMP_FILTER_SOME: | 1676 | case HWTSTAMP_FILTER_SOME: |
1677 | case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: | 1677 | case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: |
1678 | case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: | 1678 | case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: |
1679 | case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: | 1679 | case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: |
1680 | case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: | 1680 | case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: |
1681 | case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: | 1681 | case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: |
1682 | case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: | 1682 | case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: |
1683 | case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: | 1683 | case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: |
1684 | case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: | 1684 | case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: |
1685 | case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: | 1685 | case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: |
1686 | case HWTSTAMP_FILTER_PTP_V2_EVENT: | 1686 | case HWTSTAMP_FILTER_PTP_V2_EVENT: |
1687 | case HWTSTAMP_FILTER_PTP_V2_SYNC: | 1687 | case HWTSTAMP_FILTER_PTP_V2_SYNC: |
1688 | case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: | 1688 | case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: |
1689 | rx_filter_valid = 1; | 1689 | rx_filter_valid = 1; |
1690 | break; | 1690 | break; |
1691 | } | 1691 | } |
1692 | 1692 | ||
1693 | if (!tx_type_valid || !rx_filter_valid) | 1693 | if (!tx_type_valid || !rx_filter_valid) |
1694 | return -ERANGE; | 1694 | return -ERANGE; |
1695 | 1695 | ||
1696 | return 0; | 1696 | return 0; |
1697 | } | 1697 | } |
1698 | 1698 | ||
1699 | static inline bool is_skb_forwardable(struct net_device *dev, | 1699 | static inline bool is_skb_forwardable(struct net_device *dev, |
1700 | struct sk_buff *skb) | 1700 | struct sk_buff *skb) |
1701 | { | 1701 | { |
1702 | unsigned int len; | 1702 | unsigned int len; |
1703 | 1703 | ||
1704 | if (!(dev->flags & IFF_UP)) | 1704 | if (!(dev->flags & IFF_UP)) |
1705 | return false; | 1705 | return false; |
1706 | 1706 | ||
1707 | len = dev->mtu + dev->hard_header_len + VLAN_HLEN; | 1707 | len = dev->mtu + dev->hard_header_len + VLAN_HLEN; |
1708 | if (skb->len <= len) | 1708 | if (skb->len <= len) |
1709 | return true; | 1709 | return true; |
1710 | 1710 | ||
1711 | /* if TSO is enabled, we don't care about the length as the packet | 1711 | /* if TSO is enabled, we don't care about the length as the packet |
1712 | * could be forwarded without being segmented before | 1712 | * could be forwarded without being segmented before |
1713 | */ | 1713 | */ |
1714 | if (skb_is_gso(skb)) | 1714 | if (skb_is_gso(skb)) |
1715 | return true; | 1715 | return true; |
1716 | 1716 | ||
1717 | return false; | 1717 | return false; |
1718 | } | 1718 | } |
1719 | 1719 | ||
1720 | /** | 1720 | /** |
1721 | * dev_forward_skb - loopback an skb to another netif | 1721 | * dev_forward_skb - loopback an skb to another netif |
1722 | * | 1722 | * |
1723 | * @dev: destination network device | 1723 | * @dev: destination network device |
1724 | * @skb: buffer to forward | 1724 | * @skb: buffer to forward |
1725 | * | 1725 | * |
1726 | * return values: | 1726 | * return values: |
1727 | * NET_RX_SUCCESS (no congestion) | 1727 | * NET_RX_SUCCESS (no congestion) |
1728 | * NET_RX_DROP (packet was dropped, but freed) | 1728 | * NET_RX_DROP (packet was dropped, but freed) |
1729 | * | 1729 | * |
1730 | * dev_forward_skb can be used for injecting an skb from the | 1730 | * dev_forward_skb can be used for injecting an skb from the |
1731 | * start_xmit function of one device into the receive queue | 1731 | * start_xmit function of one device into the receive queue |
1732 | * of another device. | 1732 | * of another device. |
1733 | * | 1733 | * |
1734 | * The receiving device may be in another namespace, so | 1734 | * The receiving device may be in another namespace, so |
1735 | * we have to clear all information in the skb that could | 1735 | * we have to clear all information in the skb that could |
1736 | * impact namespace isolation. | 1736 | * impact namespace isolation. |
1737 | */ | 1737 | */ |
1738 | int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) | 1738 | int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) |
1739 | { | 1739 | { |
1740 | if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { | 1740 | if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { |
1741 | if (skb_copy_ubufs(skb, GFP_ATOMIC)) { | 1741 | if (skb_copy_ubufs(skb, GFP_ATOMIC)) { |
1742 | atomic_long_inc(&dev->rx_dropped); | 1742 | atomic_long_inc(&dev->rx_dropped); |
1743 | kfree_skb(skb); | 1743 | kfree_skb(skb); |
1744 | return NET_RX_DROP; | 1744 | return NET_RX_DROP; |
1745 | } | 1745 | } |
1746 | } | 1746 | } |
1747 | 1747 | ||
1748 | skb_orphan(skb); | 1748 | skb_orphan(skb); |
1749 | nf_reset(skb); | 1749 | nf_reset(skb); |
1750 | 1750 | ||
1751 | if (unlikely(!is_skb_forwardable(dev, skb))) { | 1751 | if (unlikely(!is_skb_forwardable(dev, skb))) { |
1752 | atomic_long_inc(&dev->rx_dropped); | 1752 | atomic_long_inc(&dev->rx_dropped); |
1753 | kfree_skb(skb); | 1753 | kfree_skb(skb); |
1754 | return NET_RX_DROP; | 1754 | return NET_RX_DROP; |
1755 | } | 1755 | } |
1756 | skb->skb_iif = 0; | 1756 | skb->skb_iif = 0; |
1757 | skb->dev = dev; | 1757 | skb->dev = dev; |
1758 | skb_dst_drop(skb); | 1758 | skb_dst_drop(skb); |
1759 | skb->tstamp.tv64 = 0; | 1759 | skb->tstamp.tv64 = 0; |
1760 | skb->pkt_type = PACKET_HOST; | 1760 | skb->pkt_type = PACKET_HOST; |
1761 | skb->protocol = eth_type_trans(skb, dev); | 1761 | skb->protocol = eth_type_trans(skb, dev); |
1762 | skb->mark = 0; | 1762 | skb->mark = 0; |
1763 | secpath_reset(skb); | 1763 | secpath_reset(skb); |
1764 | nf_reset(skb); | 1764 | nf_reset(skb); |
1765 | return netif_rx(skb); | 1765 | return netif_rx(skb); |
1766 | } | 1766 | } |
1767 | EXPORT_SYMBOL_GPL(dev_forward_skb); | 1767 | EXPORT_SYMBOL_GPL(dev_forward_skb); |
1768 | 1768 | ||
1769 | static inline int deliver_skb(struct sk_buff *skb, | 1769 | static inline int deliver_skb(struct sk_buff *skb, |
1770 | struct packet_type *pt_prev, | 1770 | struct packet_type *pt_prev, |
1771 | struct net_device *orig_dev) | 1771 | struct net_device *orig_dev) |
1772 | { | 1772 | { |
1773 | if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) | 1773 | if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) |
1774 | return -ENOMEM; | 1774 | return -ENOMEM; |
1775 | atomic_inc(&skb->users); | 1775 | atomic_inc(&skb->users); |
1776 | return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | 1776 | return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
1777 | } | 1777 | } |
1778 | 1778 | ||
1779 | static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) | 1779 | static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) |
1780 | { | 1780 | { |
1781 | if (!ptype->af_packet_priv || !skb->sk) | 1781 | if (!ptype->af_packet_priv || !skb->sk) |
1782 | return false; | 1782 | return false; |
1783 | 1783 | ||
1784 | if (ptype->id_match) | 1784 | if (ptype->id_match) |
1785 | return ptype->id_match(ptype, skb->sk); | 1785 | return ptype->id_match(ptype, skb->sk); |
1786 | else if ((struct sock *)ptype->af_packet_priv == skb->sk) | 1786 | else if ((struct sock *)ptype->af_packet_priv == skb->sk) |
1787 | return true; | 1787 | return true; |
1788 | 1788 | ||
1789 | return false; | 1789 | return false; |
1790 | } | 1790 | } |
1791 | 1791 | ||
1792 | /* | 1792 | /* |
1793 | * Support routine. Sends outgoing frames to any network | 1793 | * Support routine. Sends outgoing frames to any network |
1794 | * taps currently in use. | 1794 | * taps currently in use. |
1795 | */ | 1795 | */ |
1796 | 1796 | ||
1797 | static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | 1797 | static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) |
1798 | { | 1798 | { |
1799 | struct packet_type *ptype; | 1799 | struct packet_type *ptype; |
1800 | struct sk_buff *skb2 = NULL; | 1800 | struct sk_buff *skb2 = NULL; |
1801 | struct packet_type *pt_prev = NULL; | 1801 | struct packet_type *pt_prev = NULL; |
1802 | 1802 | ||
1803 | rcu_read_lock(); | 1803 | rcu_read_lock(); |
1804 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | 1804 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
1805 | /* Never send packets back to the socket | 1805 | /* Never send packets back to the socket |
1806 | * they originated from - MvS (miquels@drinkel.ow.org) | 1806 | * they originated from - MvS (miquels@drinkel.ow.org) |
1807 | */ | 1807 | */ |
1808 | if ((ptype->dev == dev || !ptype->dev) && | 1808 | if ((ptype->dev == dev || !ptype->dev) && |
1809 | (!skb_loop_sk(ptype, skb))) { | 1809 | (!skb_loop_sk(ptype, skb))) { |
1810 | if (pt_prev) { | 1810 | if (pt_prev) { |
1811 | deliver_skb(skb2, pt_prev, skb->dev); | 1811 | deliver_skb(skb2, pt_prev, skb->dev); |
1812 | pt_prev = ptype; | 1812 | pt_prev = ptype; |
1813 | continue; | 1813 | continue; |
1814 | } | 1814 | } |
1815 | 1815 | ||
1816 | skb2 = skb_clone(skb, GFP_ATOMIC); | 1816 | skb2 = skb_clone(skb, GFP_ATOMIC); |
1817 | if (!skb2) | 1817 | if (!skb2) |
1818 | break; | 1818 | break; |
1819 | 1819 | ||
1820 | net_timestamp_set(skb2); | 1820 | net_timestamp_set(skb2); |
1821 | 1821 | ||
1822 | /* skb->nh should be correctly | 1822 | /* skb->nh should be correctly |
1823 | set by sender, so that the second statement is | 1823 | set by sender, so that the second statement is |
1824 | just protection against buggy protocols. | 1824 | just protection against buggy protocols. |
1825 | */ | 1825 | */ |
1826 | skb_reset_mac_header(skb2); | 1826 | skb_reset_mac_header(skb2); |
1827 | 1827 | ||
1828 | if (skb_network_header(skb2) < skb2->data || | 1828 | if (skb_network_header(skb2) < skb2->data || |
1829 | skb2->network_header > skb2->tail) { | 1829 | skb2->network_header > skb2->tail) { |
1830 | net_crit_ratelimited("protocol %04x is buggy, dev %s\n", | 1830 | net_crit_ratelimited("protocol %04x is buggy, dev %s\n", |
1831 | ntohs(skb2->protocol), | 1831 | ntohs(skb2->protocol), |
1832 | dev->name); | 1832 | dev->name); |
1833 | skb_reset_network_header(skb2); | 1833 | skb_reset_network_header(skb2); |
1834 | } | 1834 | } |
1835 | 1835 | ||
1836 | skb2->transport_header = skb2->network_header; | 1836 | skb2->transport_header = skb2->network_header; |
1837 | skb2->pkt_type = PACKET_OUTGOING; | 1837 | skb2->pkt_type = PACKET_OUTGOING; |
1838 | pt_prev = ptype; | 1838 | pt_prev = ptype; |
1839 | } | 1839 | } |
1840 | } | 1840 | } |
1841 | if (pt_prev) | 1841 | if (pt_prev) |
1842 | pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); | 1842 | pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); |
1843 | rcu_read_unlock(); | 1843 | rcu_read_unlock(); |
1844 | } | 1844 | } |
1845 | 1845 | ||
1846 | /** | 1846 | /** |
1847 | * netif_setup_tc - Handle tc mappings on real_num_tx_queues change | 1847 | * netif_setup_tc - Handle tc mappings on real_num_tx_queues change |
1848 | * @dev: Network device | 1848 | * @dev: Network device |
1849 | * @txq: number of queues available | 1849 | * @txq: number of queues available |
1850 | * | 1850 | * |
1851 | * If real_num_tx_queues is changed the tc mappings may no longer be | 1851 | * If real_num_tx_queues is changed the tc mappings may no longer be |
1852 | * valid. To resolve this verify the tc mapping remains valid and if | 1852 | * valid. To resolve this verify the tc mapping remains valid and if |
1853 | * not NULL the mapping. With no priorities mapping to this | 1853 | * not NULL the mapping. With no priorities mapping to this |
1854 | * offset/count pair it will no longer be used. In the worst case TC0 | 1854 | * offset/count pair it will no longer be used. In the worst case TC0 |
1855 | * is invalid nothing can be done so disable priority mappings. If is | 1855 | * is invalid nothing can be done so disable priority mappings. If is |
1856 | * expected that drivers will fix this mapping if they can before | 1856 | * expected that drivers will fix this mapping if they can before |
1857 | * calling netif_set_real_num_tx_queues. | 1857 | * calling netif_set_real_num_tx_queues. |
1858 | */ | 1858 | */ |
1859 | static void netif_setup_tc(struct net_device *dev, unsigned int txq) | 1859 | static void netif_setup_tc(struct net_device *dev, unsigned int txq) |
1860 | { | 1860 | { |
1861 | int i; | 1861 | int i; |
1862 | struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; | 1862 | struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; |
1863 | 1863 | ||
1864 | /* If TC0 is invalidated disable TC mapping */ | 1864 | /* If TC0 is invalidated disable TC mapping */ |
1865 | if (tc->offset + tc->count > txq) { | 1865 | if (tc->offset + tc->count > txq) { |
1866 | pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n"); | 1866 | pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n"); |
1867 | dev->num_tc = 0; | 1867 | dev->num_tc = 0; |
1868 | return; | 1868 | return; |
1869 | } | 1869 | } |
1870 | 1870 | ||
1871 | /* Invalidated prio to tc mappings set to TC0 */ | 1871 | /* Invalidated prio to tc mappings set to TC0 */ |
1872 | for (i = 1; i < TC_BITMASK + 1; i++) { | 1872 | for (i = 1; i < TC_BITMASK + 1; i++) { |
1873 | int q = netdev_get_prio_tc_map(dev, i); | 1873 | int q = netdev_get_prio_tc_map(dev, i); |
1874 | 1874 | ||
1875 | tc = &dev->tc_to_txq[q]; | 1875 | tc = &dev->tc_to_txq[q]; |
1876 | if (tc->offset + tc->count > txq) { | 1876 | if (tc->offset + tc->count > txq) { |
1877 | pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n", | 1877 | pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n", |
1878 | i, q); | 1878 | i, q); |
1879 | netdev_set_prio_tc_map(dev, i, 0); | 1879 | netdev_set_prio_tc_map(dev, i, 0); |
1880 | } | 1880 | } |
1881 | } | 1881 | } |
1882 | } | 1882 | } |
1883 | 1883 | ||
1884 | #ifdef CONFIG_XPS | 1884 | #ifdef CONFIG_XPS |
1885 | static DEFINE_MUTEX(xps_map_mutex); | 1885 | static DEFINE_MUTEX(xps_map_mutex); |
1886 | #define xmap_dereference(P) \ | 1886 | #define xmap_dereference(P) \ |
1887 | rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) | 1887 | rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) |
1888 | 1888 | ||
1889 | static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, | 1889 | static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, |
1890 | int cpu, u16 index) | 1890 | int cpu, u16 index) |
1891 | { | 1891 | { |
1892 | struct xps_map *map = NULL; | 1892 | struct xps_map *map = NULL; |
1893 | int pos; | 1893 | int pos; |
1894 | 1894 | ||
1895 | if (dev_maps) | 1895 | if (dev_maps) |
1896 | map = xmap_dereference(dev_maps->cpu_map[cpu]); | 1896 | map = xmap_dereference(dev_maps->cpu_map[cpu]); |
1897 | 1897 | ||
1898 | for (pos = 0; map && pos < map->len; pos++) { | 1898 | for (pos = 0; map && pos < map->len; pos++) { |
1899 | if (map->queues[pos] == index) { | 1899 | if (map->queues[pos] == index) { |
1900 | if (map->len > 1) { | 1900 | if (map->len > 1) { |
1901 | map->queues[pos] = map->queues[--map->len]; | 1901 | map->queues[pos] = map->queues[--map->len]; |
1902 | } else { | 1902 | } else { |
1903 | RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL); | 1903 | RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL); |
1904 | kfree_rcu(map, rcu); | 1904 | kfree_rcu(map, rcu); |
1905 | map = NULL; | 1905 | map = NULL; |
1906 | } | 1906 | } |
1907 | break; | 1907 | break; |
1908 | } | 1908 | } |
1909 | } | 1909 | } |
1910 | 1910 | ||
1911 | return map; | 1911 | return map; |
1912 | } | 1912 | } |
1913 | 1913 | ||
1914 | static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) | 1914 | static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) |
1915 | { | 1915 | { |
1916 | struct xps_dev_maps *dev_maps; | 1916 | struct xps_dev_maps *dev_maps; |
1917 | int cpu, i; | 1917 | int cpu, i; |
1918 | bool active = false; | 1918 | bool active = false; |
1919 | 1919 | ||
1920 | mutex_lock(&xps_map_mutex); | 1920 | mutex_lock(&xps_map_mutex); |
1921 | dev_maps = xmap_dereference(dev->xps_maps); | 1921 | dev_maps = xmap_dereference(dev->xps_maps); |
1922 | 1922 | ||
1923 | if (!dev_maps) | 1923 | if (!dev_maps) |
1924 | goto out_no_maps; | 1924 | goto out_no_maps; |
1925 | 1925 | ||
1926 | for_each_possible_cpu(cpu) { | 1926 | for_each_possible_cpu(cpu) { |
1927 | for (i = index; i < dev->num_tx_queues; i++) { | 1927 | for (i = index; i < dev->num_tx_queues; i++) { |
1928 | if (!remove_xps_queue(dev_maps, cpu, i)) | 1928 | if (!remove_xps_queue(dev_maps, cpu, i)) |
1929 | break; | 1929 | break; |
1930 | } | 1930 | } |
1931 | if (i == dev->num_tx_queues) | 1931 | if (i == dev->num_tx_queues) |
1932 | active = true; | 1932 | active = true; |
1933 | } | 1933 | } |
1934 | 1934 | ||
1935 | if (!active) { | 1935 | if (!active) { |
1936 | RCU_INIT_POINTER(dev->xps_maps, NULL); | 1936 | RCU_INIT_POINTER(dev->xps_maps, NULL); |
1937 | kfree_rcu(dev_maps, rcu); | 1937 | kfree_rcu(dev_maps, rcu); |
1938 | } | 1938 | } |
1939 | 1939 | ||
1940 | for (i = index; i < dev->num_tx_queues; i++) | 1940 | for (i = index; i < dev->num_tx_queues; i++) |
1941 | netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), | 1941 | netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), |
1942 | NUMA_NO_NODE); | 1942 | NUMA_NO_NODE); |
1943 | 1943 | ||
1944 | out_no_maps: | 1944 | out_no_maps: |
1945 | mutex_unlock(&xps_map_mutex); | 1945 | mutex_unlock(&xps_map_mutex); |
1946 | } | 1946 | } |
1947 | 1947 | ||
1948 | static struct xps_map *expand_xps_map(struct xps_map *map, | 1948 | static struct xps_map *expand_xps_map(struct xps_map *map, |
1949 | int cpu, u16 index) | 1949 | int cpu, u16 index) |
1950 | { | 1950 | { |
1951 | struct xps_map *new_map; | 1951 | struct xps_map *new_map; |
1952 | int alloc_len = XPS_MIN_MAP_ALLOC; | 1952 | int alloc_len = XPS_MIN_MAP_ALLOC; |
1953 | int i, pos; | 1953 | int i, pos; |
1954 | 1954 | ||
1955 | for (pos = 0; map && pos < map->len; pos++) { | 1955 | for (pos = 0; map && pos < map->len; pos++) { |
1956 | if (map->queues[pos] != index) | 1956 | if (map->queues[pos] != index) |
1957 | continue; | 1957 | continue; |
1958 | return map; | 1958 | return map; |
1959 | } | 1959 | } |
1960 | 1960 | ||
1961 | /* Need to add queue to this CPU's existing map */ | 1961 | /* Need to add queue to this CPU's existing map */ |
1962 | if (map) { | 1962 | if (map) { |
1963 | if (pos < map->alloc_len) | 1963 | if (pos < map->alloc_len) |
1964 | return map; | 1964 | return map; |
1965 | 1965 | ||
1966 | alloc_len = map->alloc_len * 2; | 1966 | alloc_len = map->alloc_len * 2; |
1967 | } | 1967 | } |
1968 | 1968 | ||
1969 | /* Need to allocate new map to store queue on this CPU's map */ | 1969 | /* Need to allocate new map to store queue on this CPU's map */ |
1970 | new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL, | 1970 | new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL, |
1971 | cpu_to_node(cpu)); | 1971 | cpu_to_node(cpu)); |
1972 | if (!new_map) | 1972 | if (!new_map) |
1973 | return NULL; | 1973 | return NULL; |
1974 | 1974 | ||
1975 | for (i = 0; i < pos; i++) | 1975 | for (i = 0; i < pos; i++) |
1976 | new_map->queues[i] = map->queues[i]; | 1976 | new_map->queues[i] = map->queues[i]; |
1977 | new_map->alloc_len = alloc_len; | 1977 | new_map->alloc_len = alloc_len; |
1978 | new_map->len = pos; | 1978 | new_map->len = pos; |
1979 | 1979 | ||
1980 | return new_map; | 1980 | return new_map; |
1981 | } | 1981 | } |
1982 | 1982 | ||
1983 | int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index) | 1983 | int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index) |
1984 | { | 1984 | { |
1985 | struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; | 1985 | struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; |
1986 | struct xps_map *map, *new_map; | 1986 | struct xps_map *map, *new_map; |
1987 | int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES); | 1987 | int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES); |
1988 | int cpu, numa_node_id = -2; | 1988 | int cpu, numa_node_id = -2; |
1989 | bool active = false; | 1989 | bool active = false; |
1990 | 1990 | ||
1991 | mutex_lock(&xps_map_mutex); | 1991 | mutex_lock(&xps_map_mutex); |
1992 | 1992 | ||
1993 | dev_maps = xmap_dereference(dev->xps_maps); | 1993 | dev_maps = xmap_dereference(dev->xps_maps); |
1994 | 1994 | ||
1995 | /* allocate memory for queue storage */ | 1995 | /* allocate memory for queue storage */ |
1996 | for_each_online_cpu(cpu) { | 1996 | for_each_online_cpu(cpu) { |
1997 | if (!cpumask_test_cpu(cpu, mask)) | 1997 | if (!cpumask_test_cpu(cpu, mask)) |
1998 | continue; | 1998 | continue; |
1999 | 1999 | ||
2000 | if (!new_dev_maps) | 2000 | if (!new_dev_maps) |
2001 | new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); | 2001 | new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); |
2002 | if (!new_dev_maps) | 2002 | if (!new_dev_maps) |
2003 | return -ENOMEM; | 2003 | return -ENOMEM; |
2004 | 2004 | ||
2005 | map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : | 2005 | map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : |
2006 | NULL; | 2006 | NULL; |
2007 | 2007 | ||
2008 | map = expand_xps_map(map, cpu, index); | 2008 | map = expand_xps_map(map, cpu, index); |
2009 | if (!map) | 2009 | if (!map) |
2010 | goto error; | 2010 | goto error; |
2011 | 2011 | ||
2012 | RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); | 2012 | RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); |
2013 | } | 2013 | } |
2014 | 2014 | ||
2015 | if (!new_dev_maps) | 2015 | if (!new_dev_maps) |
2016 | goto out_no_new_maps; | 2016 | goto out_no_new_maps; |
2017 | 2017 | ||
2018 | for_each_possible_cpu(cpu) { | 2018 | for_each_possible_cpu(cpu) { |
2019 | if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { | 2019 | if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { |
2020 | /* add queue to CPU maps */ | 2020 | /* add queue to CPU maps */ |
2021 | int pos = 0; | 2021 | int pos = 0; |
2022 | 2022 | ||
2023 | map = xmap_dereference(new_dev_maps->cpu_map[cpu]); | 2023 | map = xmap_dereference(new_dev_maps->cpu_map[cpu]); |
2024 | while ((pos < map->len) && (map->queues[pos] != index)) | 2024 | while ((pos < map->len) && (map->queues[pos] != index)) |
2025 | pos++; | 2025 | pos++; |
2026 | 2026 | ||
2027 | if (pos == map->len) | 2027 | if (pos == map->len) |
2028 | map->queues[map->len++] = index; | 2028 | map->queues[map->len++] = index; |
2029 | #ifdef CONFIG_NUMA | 2029 | #ifdef CONFIG_NUMA |
2030 | if (numa_node_id == -2) | 2030 | if (numa_node_id == -2) |
2031 | numa_node_id = cpu_to_node(cpu); | 2031 | numa_node_id = cpu_to_node(cpu); |
2032 | else if (numa_node_id != cpu_to_node(cpu)) | 2032 | else if (numa_node_id != cpu_to_node(cpu)) |
2033 | numa_node_id = -1; | 2033 | numa_node_id = -1; |
2034 | #endif | 2034 | #endif |
2035 | } else if (dev_maps) { | 2035 | } else if (dev_maps) { |
2036 | /* fill in the new device map from the old device map */ | 2036 | /* fill in the new device map from the old device map */ |
2037 | map = xmap_dereference(dev_maps->cpu_map[cpu]); | 2037 | map = xmap_dereference(dev_maps->cpu_map[cpu]); |
2038 | RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); | 2038 | RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); |
2039 | } | 2039 | } |
2040 | 2040 | ||
2041 | } | 2041 | } |
2042 | 2042 | ||
2043 | rcu_assign_pointer(dev->xps_maps, new_dev_maps); | 2043 | rcu_assign_pointer(dev->xps_maps, new_dev_maps); |
2044 | 2044 | ||
2045 | /* Cleanup old maps */ | 2045 | /* Cleanup old maps */ |
2046 | if (dev_maps) { | 2046 | if (dev_maps) { |
2047 | for_each_possible_cpu(cpu) { | 2047 | for_each_possible_cpu(cpu) { |
2048 | new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); | 2048 | new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); |
2049 | map = xmap_dereference(dev_maps->cpu_map[cpu]); | 2049 | map = xmap_dereference(dev_maps->cpu_map[cpu]); |
2050 | if (map && map != new_map) | 2050 | if (map && map != new_map) |
2051 | kfree_rcu(map, rcu); | 2051 | kfree_rcu(map, rcu); |
2052 | } | 2052 | } |
2053 | 2053 | ||
2054 | kfree_rcu(dev_maps, rcu); | 2054 | kfree_rcu(dev_maps, rcu); |
2055 | } | 2055 | } |
2056 | 2056 | ||
2057 | dev_maps = new_dev_maps; | 2057 | dev_maps = new_dev_maps; |
2058 | active = true; | 2058 | active = true; |
2059 | 2059 | ||
2060 | out_no_new_maps: | 2060 | out_no_new_maps: |
2061 | /* update Tx queue numa node */ | 2061 | /* update Tx queue numa node */ |
2062 | netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), | 2062 | netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), |
2063 | (numa_node_id >= 0) ? numa_node_id : | 2063 | (numa_node_id >= 0) ? numa_node_id : |
2064 | NUMA_NO_NODE); | 2064 | NUMA_NO_NODE); |
2065 | 2065 | ||
2066 | if (!dev_maps) | 2066 | if (!dev_maps) |
2067 | goto out_no_maps; | 2067 | goto out_no_maps; |
2068 | 2068 | ||
2069 | /* removes queue from unused CPUs */ | 2069 | /* removes queue from unused CPUs */ |
2070 | for_each_possible_cpu(cpu) { | 2070 | for_each_possible_cpu(cpu) { |
2071 | if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) | 2071 | if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) |
2072 | continue; | 2072 | continue; |
2073 | 2073 | ||
2074 | if (remove_xps_queue(dev_maps, cpu, index)) | 2074 | if (remove_xps_queue(dev_maps, cpu, index)) |
2075 | active = true; | 2075 | active = true; |
2076 | } | 2076 | } |
2077 | 2077 | ||
2078 | /* free map if not active */ | 2078 | /* free map if not active */ |
2079 | if (!active) { | 2079 | if (!active) { |
2080 | RCU_INIT_POINTER(dev->xps_maps, NULL); | 2080 | RCU_INIT_POINTER(dev->xps_maps, NULL); |
2081 | kfree_rcu(dev_maps, rcu); | 2081 | kfree_rcu(dev_maps, rcu); |
2082 | } | 2082 | } |
2083 | 2083 | ||
2084 | out_no_maps: | 2084 | out_no_maps: |
2085 | mutex_unlock(&xps_map_mutex); | 2085 | mutex_unlock(&xps_map_mutex); |
2086 | 2086 | ||
2087 | return 0; | 2087 | return 0; |
2088 | error: | 2088 | error: |
2089 | /* remove any maps that we added */ | 2089 | /* remove any maps that we added */ |
2090 | for_each_possible_cpu(cpu) { | 2090 | for_each_possible_cpu(cpu) { |
2091 | new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); | 2091 | new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); |
2092 | map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : | 2092 | map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : |
2093 | NULL; | 2093 | NULL; |
2094 | if (new_map && new_map != map) | 2094 | if (new_map && new_map != map) |
2095 | kfree(new_map); | 2095 | kfree(new_map); |
2096 | } | 2096 | } |
2097 | 2097 | ||
2098 | mutex_unlock(&xps_map_mutex); | 2098 | mutex_unlock(&xps_map_mutex); |
2099 | 2099 | ||
2100 | kfree(new_dev_maps); | 2100 | kfree(new_dev_maps); |
2101 | return -ENOMEM; | 2101 | return -ENOMEM; |
2102 | } | 2102 | } |
2103 | EXPORT_SYMBOL(netif_set_xps_queue); | 2103 | EXPORT_SYMBOL(netif_set_xps_queue); |
2104 | 2104 | ||
2105 | #endif | 2105 | #endif |
2106 | /* | 2106 | /* |
2107 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues | 2107 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues |
2108 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. | 2108 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. |
2109 | */ | 2109 | */ |
2110 | int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) | 2110 | int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) |
2111 | { | 2111 | { |
2112 | int rc; | 2112 | int rc; |
2113 | 2113 | ||
2114 | if (txq < 1 || txq > dev->num_tx_queues) | 2114 | if (txq < 1 || txq > dev->num_tx_queues) |
2115 | return -EINVAL; | 2115 | return -EINVAL; |
2116 | 2116 | ||
2117 | if (dev->reg_state == NETREG_REGISTERED || | 2117 | if (dev->reg_state == NETREG_REGISTERED || |
2118 | dev->reg_state == NETREG_UNREGISTERING) { | 2118 | dev->reg_state == NETREG_UNREGISTERING) { |
2119 | ASSERT_RTNL(); | 2119 | ASSERT_RTNL(); |
2120 | 2120 | ||
2121 | rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, | 2121 | rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, |
2122 | txq); | 2122 | txq); |
2123 | if (rc) | 2123 | if (rc) |
2124 | return rc; | 2124 | return rc; |
2125 | 2125 | ||
2126 | if (dev->num_tc) | 2126 | if (dev->num_tc) |
2127 | netif_setup_tc(dev, txq); | 2127 | netif_setup_tc(dev, txq); |
2128 | 2128 | ||
2129 | if (txq < dev->real_num_tx_queues) { | 2129 | if (txq < dev->real_num_tx_queues) { |
2130 | qdisc_reset_all_tx_gt(dev, txq); | 2130 | qdisc_reset_all_tx_gt(dev, txq); |
2131 | #ifdef CONFIG_XPS | 2131 | #ifdef CONFIG_XPS |
2132 | netif_reset_xps_queues_gt(dev, txq); | 2132 | netif_reset_xps_queues_gt(dev, txq); |
2133 | #endif | 2133 | #endif |
2134 | } | 2134 | } |
2135 | } | 2135 | } |
2136 | 2136 | ||
2137 | dev->real_num_tx_queues = txq; | 2137 | dev->real_num_tx_queues = txq; |
2138 | return 0; | 2138 | return 0; |
2139 | } | 2139 | } |
2140 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); | 2140 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); |
2141 | 2141 | ||
2142 | #ifdef CONFIG_RPS | 2142 | #ifdef CONFIG_RPS |
2143 | /** | 2143 | /** |
2144 | * netif_set_real_num_rx_queues - set actual number of RX queues used | 2144 | * netif_set_real_num_rx_queues - set actual number of RX queues used |
2145 | * @dev: Network device | 2145 | * @dev: Network device |
2146 | * @rxq: Actual number of RX queues | 2146 | * @rxq: Actual number of RX queues |
2147 | * | 2147 | * |
2148 | * This must be called either with the rtnl_lock held or before | 2148 | * This must be called either with the rtnl_lock held or before |
2149 | * registration of the net device. Returns 0 on success, or a | 2149 | * registration of the net device. Returns 0 on success, or a |
2150 | * negative error code. If called before registration, it always | 2150 | * negative error code. If called before registration, it always |
2151 | * succeeds. | 2151 | * succeeds. |
2152 | */ | 2152 | */ |
2153 | int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) | 2153 | int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) |
2154 | { | 2154 | { |
2155 | int rc; | 2155 | int rc; |
2156 | 2156 | ||
2157 | if (rxq < 1 || rxq > dev->num_rx_queues) | 2157 | if (rxq < 1 || rxq > dev->num_rx_queues) |
2158 | return -EINVAL; | 2158 | return -EINVAL; |
2159 | 2159 | ||
2160 | if (dev->reg_state == NETREG_REGISTERED) { | 2160 | if (dev->reg_state == NETREG_REGISTERED) { |
2161 | ASSERT_RTNL(); | 2161 | ASSERT_RTNL(); |
2162 | 2162 | ||
2163 | rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues, | 2163 | rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues, |
2164 | rxq); | 2164 | rxq); |
2165 | if (rc) | 2165 | if (rc) |
2166 | return rc; | 2166 | return rc; |
2167 | } | 2167 | } |
2168 | 2168 | ||
2169 | dev->real_num_rx_queues = rxq; | 2169 | dev->real_num_rx_queues = rxq; |
2170 | return 0; | 2170 | return 0; |
2171 | } | 2171 | } |
2172 | EXPORT_SYMBOL(netif_set_real_num_rx_queues); | 2172 | EXPORT_SYMBOL(netif_set_real_num_rx_queues); |
2173 | #endif | 2173 | #endif |
2174 | 2174 | ||
2175 | /** | 2175 | /** |
2176 | * netif_get_num_default_rss_queues - default number of RSS queues | 2176 | * netif_get_num_default_rss_queues - default number of RSS queues |
2177 | * | 2177 | * |
2178 | * This routine should set an upper limit on the number of RSS queues | 2178 | * This routine should set an upper limit on the number of RSS queues |
2179 | * used by default by multiqueue devices. | 2179 | * used by default by multiqueue devices. |
2180 | */ | 2180 | */ |
2181 | int netif_get_num_default_rss_queues(void) | 2181 | int netif_get_num_default_rss_queues(void) |
2182 | { | 2182 | { |
2183 | return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus()); | 2183 | return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus()); |
2184 | } | 2184 | } |
2185 | EXPORT_SYMBOL(netif_get_num_default_rss_queues); | 2185 | EXPORT_SYMBOL(netif_get_num_default_rss_queues); |
2186 | 2186 | ||
2187 | static inline void __netif_reschedule(struct Qdisc *q) | 2187 | static inline void __netif_reschedule(struct Qdisc *q) |
2188 | { | 2188 | { |
2189 | struct softnet_data *sd; | 2189 | struct softnet_data *sd; |
2190 | unsigned long flags; | 2190 | unsigned long flags; |
2191 | 2191 | ||
2192 | local_irq_save(flags); | 2192 | local_irq_save(flags); |
2193 | sd = &__get_cpu_var(softnet_data); | 2193 | sd = &__get_cpu_var(softnet_data); |
2194 | q->next_sched = NULL; | 2194 | q->next_sched = NULL; |
2195 | *sd->output_queue_tailp = q; | 2195 | *sd->output_queue_tailp = q; |
2196 | sd->output_queue_tailp = &q->next_sched; | 2196 | sd->output_queue_tailp = &q->next_sched; |
2197 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | 2197 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
2198 | local_irq_restore(flags); | 2198 | local_irq_restore(flags); |
2199 | } | 2199 | } |
2200 | 2200 | ||
2201 | void __netif_schedule(struct Qdisc *q) | 2201 | void __netif_schedule(struct Qdisc *q) |
2202 | { | 2202 | { |
2203 | if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) | 2203 | if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) |
2204 | __netif_reschedule(q); | 2204 | __netif_reschedule(q); |
2205 | } | 2205 | } |
2206 | EXPORT_SYMBOL(__netif_schedule); | 2206 | EXPORT_SYMBOL(__netif_schedule); |
2207 | 2207 | ||
2208 | void dev_kfree_skb_irq(struct sk_buff *skb) | 2208 | void dev_kfree_skb_irq(struct sk_buff *skb) |
2209 | { | 2209 | { |
2210 | if (atomic_dec_and_test(&skb->users)) { | 2210 | if (atomic_dec_and_test(&skb->users)) { |
2211 | struct softnet_data *sd; | 2211 | struct softnet_data *sd; |
2212 | unsigned long flags; | 2212 | unsigned long flags; |
2213 | 2213 | ||
2214 | local_irq_save(flags); | 2214 | local_irq_save(flags); |
2215 | sd = &__get_cpu_var(softnet_data); | 2215 | sd = &__get_cpu_var(softnet_data); |
2216 | skb->next = sd->completion_queue; | 2216 | skb->next = sd->completion_queue; |
2217 | sd->completion_queue = skb; | 2217 | sd->completion_queue = skb; |
2218 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | 2218 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
2219 | local_irq_restore(flags); | 2219 | local_irq_restore(flags); |
2220 | } | 2220 | } |
2221 | } | 2221 | } |
2222 | EXPORT_SYMBOL(dev_kfree_skb_irq); | 2222 | EXPORT_SYMBOL(dev_kfree_skb_irq); |
2223 | 2223 | ||
2224 | void dev_kfree_skb_any(struct sk_buff *skb) | 2224 | void dev_kfree_skb_any(struct sk_buff *skb) |
2225 | { | 2225 | { |
2226 | if (in_irq() || irqs_disabled()) | 2226 | if (in_irq() || irqs_disabled()) |
2227 | dev_kfree_skb_irq(skb); | 2227 | dev_kfree_skb_irq(skb); |
2228 | else | 2228 | else |
2229 | dev_kfree_skb(skb); | 2229 | dev_kfree_skb(skb); |
2230 | } | 2230 | } |
2231 | EXPORT_SYMBOL(dev_kfree_skb_any); | 2231 | EXPORT_SYMBOL(dev_kfree_skb_any); |
2232 | 2232 | ||
2233 | 2233 | ||
2234 | /** | 2234 | /** |
2235 | * netif_device_detach - mark device as removed | 2235 | * netif_device_detach - mark device as removed |
2236 | * @dev: network device | 2236 | * @dev: network device |
2237 | * | 2237 | * |
2238 | * Mark device as removed from system and therefore no longer available. | 2238 | * Mark device as removed from system and therefore no longer available. |
2239 | */ | 2239 | */ |
2240 | void netif_device_detach(struct net_device *dev) | 2240 | void netif_device_detach(struct net_device *dev) |
2241 | { | 2241 | { |
2242 | if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && | 2242 | if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && |
2243 | netif_running(dev)) { | 2243 | netif_running(dev)) { |
2244 | netif_tx_stop_all_queues(dev); | 2244 | netif_tx_stop_all_queues(dev); |
2245 | } | 2245 | } |
2246 | } | 2246 | } |
2247 | EXPORT_SYMBOL(netif_device_detach); | 2247 | EXPORT_SYMBOL(netif_device_detach); |
2248 | 2248 | ||
2249 | /** | 2249 | /** |
2250 | * netif_device_attach - mark device as attached | 2250 | * netif_device_attach - mark device as attached |
2251 | * @dev: network device | 2251 | * @dev: network device |
2252 | * | 2252 | * |
2253 | * Mark device as attached from system and restart if needed. | 2253 | * Mark device as attached from system and restart if needed. |
2254 | */ | 2254 | */ |
2255 | void netif_device_attach(struct net_device *dev) | 2255 | void netif_device_attach(struct net_device *dev) |
2256 | { | 2256 | { |
2257 | if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && | 2257 | if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && |
2258 | netif_running(dev)) { | 2258 | netif_running(dev)) { |
2259 | netif_tx_wake_all_queues(dev); | 2259 | netif_tx_wake_all_queues(dev); |
2260 | __netdev_watchdog_up(dev); | 2260 | __netdev_watchdog_up(dev); |
2261 | } | 2261 | } |
2262 | } | 2262 | } |
2263 | EXPORT_SYMBOL(netif_device_attach); | 2263 | EXPORT_SYMBOL(netif_device_attach); |
2264 | 2264 | ||
2265 | static void skb_warn_bad_offload(const struct sk_buff *skb) | 2265 | static void skb_warn_bad_offload(const struct sk_buff *skb) |
2266 | { | 2266 | { |
2267 | static const netdev_features_t null_features = 0; | 2267 | static const netdev_features_t null_features = 0; |
2268 | struct net_device *dev = skb->dev; | 2268 | struct net_device *dev = skb->dev; |
2269 | const char *driver = ""; | 2269 | const char *driver = ""; |
2270 | 2270 | ||
2271 | if (dev && dev->dev.parent) | 2271 | if (dev && dev->dev.parent) |
2272 | driver = dev_driver_string(dev->dev.parent); | 2272 | driver = dev_driver_string(dev->dev.parent); |
2273 | 2273 | ||
2274 | WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d " | 2274 | WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d " |
2275 | "gso_type=%d ip_summed=%d\n", | 2275 | "gso_type=%d ip_summed=%d\n", |
2276 | driver, dev ? &dev->features : &null_features, | 2276 | driver, dev ? &dev->features : &null_features, |
2277 | skb->sk ? &skb->sk->sk_route_caps : &null_features, | 2277 | skb->sk ? &skb->sk->sk_route_caps : &null_features, |
2278 | skb->len, skb->data_len, skb_shinfo(skb)->gso_size, | 2278 | skb->len, skb->data_len, skb_shinfo(skb)->gso_size, |
2279 | skb_shinfo(skb)->gso_type, skb->ip_summed); | 2279 | skb_shinfo(skb)->gso_type, skb->ip_summed); |
2280 | } | 2280 | } |
2281 | 2281 | ||
2282 | /* | 2282 | /* |
2283 | * Invalidate hardware checksum when packet is to be mangled, and | 2283 | * Invalidate hardware checksum when packet is to be mangled, and |
2284 | * complete checksum manually on outgoing path. | 2284 | * complete checksum manually on outgoing path. |
2285 | */ | 2285 | */ |
2286 | int skb_checksum_help(struct sk_buff *skb) | 2286 | int skb_checksum_help(struct sk_buff *skb) |
2287 | { | 2287 | { |
2288 | __wsum csum; | 2288 | __wsum csum; |
2289 | int ret = 0, offset; | 2289 | int ret = 0, offset; |
2290 | 2290 | ||
2291 | if (skb->ip_summed == CHECKSUM_COMPLETE) | 2291 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
2292 | goto out_set_summed; | 2292 | goto out_set_summed; |
2293 | 2293 | ||
2294 | if (unlikely(skb_shinfo(skb)->gso_size)) { | 2294 | if (unlikely(skb_shinfo(skb)->gso_size)) { |
2295 | skb_warn_bad_offload(skb); | 2295 | skb_warn_bad_offload(skb); |
2296 | return -EINVAL; | 2296 | return -EINVAL; |
2297 | } | 2297 | } |
2298 | 2298 | ||
2299 | /* Before computing a checksum, we should make sure no frag could | 2299 | /* Before computing a checksum, we should make sure no frag could |
2300 | * be modified by an external entity : checksum could be wrong. | 2300 | * be modified by an external entity : checksum could be wrong. |
2301 | */ | 2301 | */ |
2302 | if (skb_has_shared_frag(skb)) { | 2302 | if (skb_has_shared_frag(skb)) { |
2303 | ret = __skb_linearize(skb); | 2303 | ret = __skb_linearize(skb); |
2304 | if (ret) | 2304 | if (ret) |
2305 | goto out; | 2305 | goto out; |
2306 | } | 2306 | } |
2307 | 2307 | ||
2308 | offset = skb_checksum_start_offset(skb); | 2308 | offset = skb_checksum_start_offset(skb); |
2309 | BUG_ON(offset >= skb_headlen(skb)); | 2309 | BUG_ON(offset >= skb_headlen(skb)); |
2310 | csum = skb_checksum(skb, offset, skb->len - offset, 0); | 2310 | csum = skb_checksum(skb, offset, skb->len - offset, 0); |
2311 | 2311 | ||
2312 | offset += skb->csum_offset; | 2312 | offset += skb->csum_offset; |
2313 | BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); | 2313 | BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); |
2314 | 2314 | ||
2315 | if (skb_cloned(skb) && | 2315 | if (skb_cloned(skb) && |
2316 | !skb_clone_writable(skb, offset + sizeof(__sum16))) { | 2316 | !skb_clone_writable(skb, offset + sizeof(__sum16))) { |
2317 | ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); | 2317 | ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); |
2318 | if (ret) | 2318 | if (ret) |
2319 | goto out; | 2319 | goto out; |
2320 | } | 2320 | } |
2321 | 2321 | ||
2322 | *(__sum16 *)(skb->data + offset) = csum_fold(csum); | 2322 | *(__sum16 *)(skb->data + offset) = csum_fold(csum); |
2323 | out_set_summed: | 2323 | out_set_summed: |
2324 | skb->ip_summed = CHECKSUM_NONE; | 2324 | skb->ip_summed = CHECKSUM_NONE; |
2325 | out: | 2325 | out: |
2326 | return ret; | 2326 | return ret; |
2327 | } | 2327 | } |
2328 | EXPORT_SYMBOL(skb_checksum_help); | 2328 | EXPORT_SYMBOL(skb_checksum_help); |
2329 | 2329 | ||
2330 | /* openvswitch calls this on rx path, so we need a different check. | ||
2331 | */ | ||
2332 | static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) | ||
2333 | { | ||
2334 | if (tx_path) | ||
2335 | return skb->ip_summed != CHECKSUM_PARTIAL; | ||
2336 | else | ||
2337 | return skb->ip_summed == CHECKSUM_NONE; | ||
2338 | } | ||
2339 | |||
2330 | /** | 2340 | /** |
2331 | * skb_gso_segment - Perform segmentation on skb. | 2341 | * __skb_gso_segment - Perform segmentation on skb. |
2332 | * @skb: buffer to segment | 2342 | * @skb: buffer to segment |
2333 | * @features: features for the output path (see dev->features) | 2343 | * @features: features for the output path (see dev->features) |
2344 | * @tx_path: whether it is called in TX path | ||
2334 | * | 2345 | * |
2335 | * This function segments the given skb and returns a list of segments. | 2346 | * This function segments the given skb and returns a list of segments. |
2336 | * | 2347 | * |
2337 | * It may return NULL if the skb requires no segmentation. This is | 2348 | * It may return NULL if the skb requires no segmentation. This is |
2338 | * only possible when GSO is used for verifying header integrity. | 2349 | * only possible when GSO is used for verifying header integrity. |
2339 | */ | 2350 | */ |
2340 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, | 2351 | struct sk_buff *__skb_gso_segment(struct sk_buff *skb, |
2341 | netdev_features_t features) | 2352 | netdev_features_t features, bool tx_path) |
2342 | { | 2353 | { |
2343 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); | 2354 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); |
2344 | struct packet_offload *ptype; | 2355 | struct packet_offload *ptype; |
2345 | __be16 type = skb->protocol; | 2356 | __be16 type = skb->protocol; |
2346 | int vlan_depth = ETH_HLEN; | 2357 | int vlan_depth = ETH_HLEN; |
2347 | int err; | 2358 | int err; |
2348 | 2359 | ||
2349 | while (type == htons(ETH_P_8021Q)) { | 2360 | while (type == htons(ETH_P_8021Q)) { |
2350 | struct vlan_hdr *vh; | 2361 | struct vlan_hdr *vh; |
2351 | 2362 | ||
2352 | if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) | 2363 | if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) |
2353 | return ERR_PTR(-EINVAL); | 2364 | return ERR_PTR(-EINVAL); |
2354 | 2365 | ||
2355 | vh = (struct vlan_hdr *)(skb->data + vlan_depth); | 2366 | vh = (struct vlan_hdr *)(skb->data + vlan_depth); |
2356 | type = vh->h_vlan_encapsulated_proto; | 2367 | type = vh->h_vlan_encapsulated_proto; |
2357 | vlan_depth += VLAN_HLEN; | 2368 | vlan_depth += VLAN_HLEN; |
2358 | } | 2369 | } |
2359 | 2370 | ||
2360 | skb_reset_mac_header(skb); | 2371 | skb_reset_mac_header(skb); |
2361 | skb->mac_len = skb->network_header - skb->mac_header; | 2372 | skb->mac_len = skb->network_header - skb->mac_header; |
2362 | __skb_pull(skb, skb->mac_len); | 2373 | __skb_pull(skb, skb->mac_len); |
2363 | 2374 | ||
2364 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { | 2375 | if (unlikely(skb_needs_check(skb, tx_path))) { |
2365 | skb_warn_bad_offload(skb); | 2376 | skb_warn_bad_offload(skb); |
2366 | 2377 | ||
2367 | if (skb_header_cloned(skb) && | 2378 | if (skb_header_cloned(skb) && |
2368 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) | 2379 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) |
2369 | return ERR_PTR(err); | 2380 | return ERR_PTR(err); |
2370 | } | 2381 | } |
2371 | 2382 | ||
2372 | rcu_read_lock(); | 2383 | rcu_read_lock(); |
2373 | list_for_each_entry_rcu(ptype, &offload_base, list) { | 2384 | list_for_each_entry_rcu(ptype, &offload_base, list) { |
2374 | if (ptype->type == type && ptype->callbacks.gso_segment) { | 2385 | if (ptype->type == type && ptype->callbacks.gso_segment) { |
2375 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { | 2386 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { |
2376 | err = ptype->callbacks.gso_send_check(skb); | 2387 | err = ptype->callbacks.gso_send_check(skb); |
2377 | segs = ERR_PTR(err); | 2388 | segs = ERR_PTR(err); |
2378 | if (err || skb_gso_ok(skb, features)) | 2389 | if (err || skb_gso_ok(skb, features)) |
2379 | break; | 2390 | break; |
2380 | __skb_push(skb, (skb->data - | 2391 | __skb_push(skb, (skb->data - |
2381 | skb_network_header(skb))); | 2392 | skb_network_header(skb))); |
2382 | } | 2393 | } |
2383 | segs = ptype->callbacks.gso_segment(skb, features); | 2394 | segs = ptype->callbacks.gso_segment(skb, features); |
2384 | break; | 2395 | break; |
2385 | } | 2396 | } |
2386 | } | 2397 | } |
2387 | rcu_read_unlock(); | 2398 | rcu_read_unlock(); |
2388 | 2399 | ||
2389 | __skb_push(skb, skb->data - skb_mac_header(skb)); | 2400 | __skb_push(skb, skb->data - skb_mac_header(skb)); |
2390 | 2401 | ||
2391 | return segs; | 2402 | return segs; |
2392 | } | 2403 | } |
2393 | EXPORT_SYMBOL(skb_gso_segment); | 2404 | EXPORT_SYMBOL(__skb_gso_segment); |
2394 | 2405 | ||
2395 | /* Take action when hardware reception checksum errors are detected. */ | 2406 | /* Take action when hardware reception checksum errors are detected. */ |
2396 | #ifdef CONFIG_BUG | 2407 | #ifdef CONFIG_BUG |
2397 | void netdev_rx_csum_fault(struct net_device *dev) | 2408 | void netdev_rx_csum_fault(struct net_device *dev) |
2398 | { | 2409 | { |
2399 | if (net_ratelimit()) { | 2410 | if (net_ratelimit()) { |
2400 | pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>"); | 2411 | pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>"); |
2401 | dump_stack(); | 2412 | dump_stack(); |
2402 | } | 2413 | } |
2403 | } | 2414 | } |
2404 | EXPORT_SYMBOL(netdev_rx_csum_fault); | 2415 | EXPORT_SYMBOL(netdev_rx_csum_fault); |
2405 | #endif | 2416 | #endif |
2406 | 2417 | ||
2407 | /* Actually, we should eliminate this check as soon as we know, that: | 2418 | /* Actually, we should eliminate this check as soon as we know, that: |
2408 | * 1. IOMMU is present and allows to map all the memory. | 2419 | * 1. IOMMU is present and allows to map all the memory. |
2409 | * 2. No high memory really exists on this machine. | 2420 | * 2. No high memory really exists on this machine. |
2410 | */ | 2421 | */ |
2411 | 2422 | ||
2412 | static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) | 2423 | static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) |
2413 | { | 2424 | { |
2414 | #ifdef CONFIG_HIGHMEM | 2425 | #ifdef CONFIG_HIGHMEM |
2415 | int i; | 2426 | int i; |
2416 | if (!(dev->features & NETIF_F_HIGHDMA)) { | 2427 | if (!(dev->features & NETIF_F_HIGHDMA)) { |
2417 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | 2428 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
2418 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | 2429 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
2419 | if (PageHighMem(skb_frag_page(frag))) | 2430 | if (PageHighMem(skb_frag_page(frag))) |
2420 | return 1; | 2431 | return 1; |
2421 | } | 2432 | } |
2422 | } | 2433 | } |
2423 | 2434 | ||
2424 | if (PCI_DMA_BUS_IS_PHYS) { | 2435 | if (PCI_DMA_BUS_IS_PHYS) { |
2425 | struct device *pdev = dev->dev.parent; | 2436 | struct device *pdev = dev->dev.parent; |
2426 | 2437 | ||
2427 | if (!pdev) | 2438 | if (!pdev) |
2428 | return 0; | 2439 | return 0; |
2429 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | 2440 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
2430 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | 2441 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
2431 | dma_addr_t addr = page_to_phys(skb_frag_page(frag)); | 2442 | dma_addr_t addr = page_to_phys(skb_frag_page(frag)); |
2432 | if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) | 2443 | if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) |
2433 | return 1; | 2444 | return 1; |
2434 | } | 2445 | } |
2435 | } | 2446 | } |
2436 | #endif | 2447 | #endif |
2437 | return 0; | 2448 | return 0; |
2438 | } | 2449 | } |
2439 | 2450 | ||
2440 | struct dev_gso_cb { | 2451 | struct dev_gso_cb { |
2441 | void (*destructor)(struct sk_buff *skb); | 2452 | void (*destructor)(struct sk_buff *skb); |
2442 | }; | 2453 | }; |
2443 | 2454 | ||
2444 | #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) | 2455 | #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) |
2445 | 2456 | ||
2446 | static void dev_gso_skb_destructor(struct sk_buff *skb) | 2457 | static void dev_gso_skb_destructor(struct sk_buff *skb) |
2447 | { | 2458 | { |
2448 | struct dev_gso_cb *cb; | 2459 | struct dev_gso_cb *cb; |
2449 | 2460 | ||
2450 | do { | 2461 | do { |
2451 | struct sk_buff *nskb = skb->next; | 2462 | struct sk_buff *nskb = skb->next; |
2452 | 2463 | ||
2453 | skb->next = nskb->next; | 2464 | skb->next = nskb->next; |
2454 | nskb->next = NULL; | 2465 | nskb->next = NULL; |
2455 | kfree_skb(nskb); | 2466 | kfree_skb(nskb); |
2456 | } while (skb->next); | 2467 | } while (skb->next); |
2457 | 2468 | ||
2458 | cb = DEV_GSO_CB(skb); | 2469 | cb = DEV_GSO_CB(skb); |
2459 | if (cb->destructor) | 2470 | if (cb->destructor) |
2460 | cb->destructor(skb); | 2471 | cb->destructor(skb); |
2461 | } | 2472 | } |
2462 | 2473 | ||
2463 | /** | 2474 | /** |
2464 | * dev_gso_segment - Perform emulated hardware segmentation on skb. | 2475 | * dev_gso_segment - Perform emulated hardware segmentation on skb. |
2465 | * @skb: buffer to segment | 2476 | * @skb: buffer to segment |
2466 | * @features: device features as applicable to this skb | 2477 | * @features: device features as applicable to this skb |
2467 | * | 2478 | * |
2468 | * This function segments the given skb and stores the list of segments | 2479 | * This function segments the given skb and stores the list of segments |
2469 | * in skb->next. | 2480 | * in skb->next. |
2470 | */ | 2481 | */ |
2471 | static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) | 2482 | static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) |
2472 | { | 2483 | { |
2473 | struct sk_buff *segs; | 2484 | struct sk_buff *segs; |
2474 | 2485 | ||
2475 | segs = skb_gso_segment(skb, features); | 2486 | segs = skb_gso_segment(skb, features); |
2476 | 2487 | ||
2477 | /* Verifying header integrity only. */ | 2488 | /* Verifying header integrity only. */ |
2478 | if (!segs) | 2489 | if (!segs) |
2479 | return 0; | 2490 | return 0; |
2480 | 2491 | ||
2481 | if (IS_ERR(segs)) | 2492 | if (IS_ERR(segs)) |
2482 | return PTR_ERR(segs); | 2493 | return PTR_ERR(segs); |
2483 | 2494 | ||
2484 | skb->next = segs; | 2495 | skb->next = segs; |
2485 | DEV_GSO_CB(skb)->destructor = skb->destructor; | 2496 | DEV_GSO_CB(skb)->destructor = skb->destructor; |
2486 | skb->destructor = dev_gso_skb_destructor; | 2497 | skb->destructor = dev_gso_skb_destructor; |
2487 | 2498 | ||
2488 | return 0; | 2499 | return 0; |
2489 | } | 2500 | } |
2490 | 2501 | ||
2491 | static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) | 2502 | static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) |
2492 | { | 2503 | { |
2493 | return ((features & NETIF_F_GEN_CSUM) || | 2504 | return ((features & NETIF_F_GEN_CSUM) || |
2494 | ((features & NETIF_F_V4_CSUM) && | 2505 | ((features & NETIF_F_V4_CSUM) && |
2495 | protocol == htons(ETH_P_IP)) || | 2506 | protocol == htons(ETH_P_IP)) || |
2496 | ((features & NETIF_F_V6_CSUM) && | 2507 | ((features & NETIF_F_V6_CSUM) && |
2497 | protocol == htons(ETH_P_IPV6)) || | 2508 | protocol == htons(ETH_P_IPV6)) || |
2498 | ((features & NETIF_F_FCOE_CRC) && | 2509 | ((features & NETIF_F_FCOE_CRC) && |
2499 | protocol == htons(ETH_P_FCOE))); | 2510 | protocol == htons(ETH_P_FCOE))); |
2500 | } | 2511 | } |
2501 | 2512 | ||
2502 | static netdev_features_t harmonize_features(struct sk_buff *skb, | 2513 | static netdev_features_t harmonize_features(struct sk_buff *skb, |
2503 | __be16 protocol, netdev_features_t features) | 2514 | __be16 protocol, netdev_features_t features) |
2504 | { | 2515 | { |
2505 | if (skb->ip_summed != CHECKSUM_NONE && | 2516 | if (skb->ip_summed != CHECKSUM_NONE && |
2506 | !can_checksum_protocol(features, protocol)) { | 2517 | !can_checksum_protocol(features, protocol)) { |
2507 | features &= ~NETIF_F_ALL_CSUM; | 2518 | features &= ~NETIF_F_ALL_CSUM; |
2508 | features &= ~NETIF_F_SG; | 2519 | features &= ~NETIF_F_SG; |
2509 | } else if (illegal_highdma(skb->dev, skb)) { | 2520 | } else if (illegal_highdma(skb->dev, skb)) { |
2510 | features &= ~NETIF_F_SG; | 2521 | features &= ~NETIF_F_SG; |
2511 | } | 2522 | } |
2512 | 2523 | ||
2513 | return features; | 2524 | return features; |
2514 | } | 2525 | } |
2515 | 2526 | ||
2516 | netdev_features_t netif_skb_features(struct sk_buff *skb) | 2527 | netdev_features_t netif_skb_features(struct sk_buff *skb) |
2517 | { | 2528 | { |
2518 | __be16 protocol = skb->protocol; | 2529 | __be16 protocol = skb->protocol; |
2519 | netdev_features_t features = skb->dev->features; | 2530 | netdev_features_t features = skb->dev->features; |
2520 | 2531 | ||
2521 | if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) | 2532 | if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) |
2522 | features &= ~NETIF_F_GSO_MASK; | 2533 | features &= ~NETIF_F_GSO_MASK; |
2523 | 2534 | ||
2524 | if (protocol == htons(ETH_P_8021Q)) { | 2535 | if (protocol == htons(ETH_P_8021Q)) { |
2525 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; | 2536 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; |
2526 | protocol = veh->h_vlan_encapsulated_proto; | 2537 | protocol = veh->h_vlan_encapsulated_proto; |
2527 | } else if (!vlan_tx_tag_present(skb)) { | 2538 | } else if (!vlan_tx_tag_present(skb)) { |
2528 | return harmonize_features(skb, protocol, features); | 2539 | return harmonize_features(skb, protocol, features); |
2529 | } | 2540 | } |
2530 | 2541 | ||
2531 | features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX); | 2542 | features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX); |
2532 | 2543 | ||
2533 | if (protocol != htons(ETH_P_8021Q)) { | 2544 | if (protocol != htons(ETH_P_8021Q)) { |
2534 | return harmonize_features(skb, protocol, features); | 2545 | return harmonize_features(skb, protocol, features); |
2535 | } else { | 2546 | } else { |
2536 | features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | | 2547 | features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | |
2537 | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; | 2548 | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; |
2538 | return harmonize_features(skb, protocol, features); | 2549 | return harmonize_features(skb, protocol, features); |
2539 | } | 2550 | } |
2540 | } | 2551 | } |
2541 | EXPORT_SYMBOL(netif_skb_features); | 2552 | EXPORT_SYMBOL(netif_skb_features); |
2542 | 2553 | ||
2543 | /* | 2554 | /* |
2544 | * Returns true if either: | 2555 | * Returns true if either: |
2545 | * 1. skb has frag_list and the device doesn't support FRAGLIST, or | 2556 | * 1. skb has frag_list and the device doesn't support FRAGLIST, or |
2546 | * 2. skb is fragmented and the device does not support SG. | 2557 | * 2. skb is fragmented and the device does not support SG. |
2547 | */ | 2558 | */ |
2548 | static inline int skb_needs_linearize(struct sk_buff *skb, | 2559 | static inline int skb_needs_linearize(struct sk_buff *skb, |
2549 | int features) | 2560 | int features) |
2550 | { | 2561 | { |
2551 | return skb_is_nonlinear(skb) && | 2562 | return skb_is_nonlinear(skb) && |
2552 | ((skb_has_frag_list(skb) && | 2563 | ((skb_has_frag_list(skb) && |
2553 | !(features & NETIF_F_FRAGLIST)) || | 2564 | !(features & NETIF_F_FRAGLIST)) || |
2554 | (skb_shinfo(skb)->nr_frags && | 2565 | (skb_shinfo(skb)->nr_frags && |
2555 | !(features & NETIF_F_SG))); | 2566 | !(features & NETIF_F_SG))); |
2556 | } | 2567 | } |
2557 | 2568 | ||
2558 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | 2569 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, |
2559 | struct netdev_queue *txq) | 2570 | struct netdev_queue *txq) |
2560 | { | 2571 | { |
2561 | const struct net_device_ops *ops = dev->netdev_ops; | 2572 | const struct net_device_ops *ops = dev->netdev_ops; |
2562 | int rc = NETDEV_TX_OK; | 2573 | int rc = NETDEV_TX_OK; |
2563 | unsigned int skb_len; | 2574 | unsigned int skb_len; |
2564 | 2575 | ||
2565 | if (likely(!skb->next)) { | 2576 | if (likely(!skb->next)) { |
2566 | netdev_features_t features; | 2577 | netdev_features_t features; |
2567 | 2578 | ||
2568 | /* | 2579 | /* |
2569 | * If device doesn't need skb->dst, release it right now while | 2580 | * If device doesn't need skb->dst, release it right now while |
2570 | * its hot in this cpu cache | 2581 | * its hot in this cpu cache |
2571 | */ | 2582 | */ |
2572 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) | 2583 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) |
2573 | skb_dst_drop(skb); | 2584 | skb_dst_drop(skb); |
2574 | 2585 | ||
2575 | features = netif_skb_features(skb); | 2586 | features = netif_skb_features(skb); |
2576 | 2587 | ||
2577 | if (vlan_tx_tag_present(skb) && | 2588 | if (vlan_tx_tag_present(skb) && |
2578 | !(features & NETIF_F_HW_VLAN_TX)) { | 2589 | !(features & NETIF_F_HW_VLAN_TX)) { |
2579 | skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); | 2590 | skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); |
2580 | if (unlikely(!skb)) | 2591 | if (unlikely(!skb)) |
2581 | goto out; | 2592 | goto out; |
2582 | 2593 | ||
2583 | skb->vlan_tci = 0; | 2594 | skb->vlan_tci = 0; |
2584 | } | 2595 | } |
2585 | 2596 | ||
2586 | /* If encapsulation offload request, verify we are testing | 2597 | /* If encapsulation offload request, verify we are testing |
2587 | * hardware encapsulation features instead of standard | 2598 | * hardware encapsulation features instead of standard |
2588 | * features for the netdev | 2599 | * features for the netdev |
2589 | */ | 2600 | */ |
2590 | if (skb->encapsulation) | 2601 | if (skb->encapsulation) |
2591 | features &= dev->hw_enc_features; | 2602 | features &= dev->hw_enc_features; |
2592 | 2603 | ||
2593 | if (netif_needs_gso(skb, features)) { | 2604 | if (netif_needs_gso(skb, features)) { |
2594 | if (unlikely(dev_gso_segment(skb, features))) | 2605 | if (unlikely(dev_gso_segment(skb, features))) |
2595 | goto out_kfree_skb; | 2606 | goto out_kfree_skb; |
2596 | if (skb->next) | 2607 | if (skb->next) |
2597 | goto gso; | 2608 | goto gso; |
2598 | } else { | 2609 | } else { |
2599 | if (skb_needs_linearize(skb, features) && | 2610 | if (skb_needs_linearize(skb, features) && |
2600 | __skb_linearize(skb)) | 2611 | __skb_linearize(skb)) |
2601 | goto out_kfree_skb; | 2612 | goto out_kfree_skb; |
2602 | 2613 | ||
2603 | /* If packet is not checksummed and device does not | 2614 | /* If packet is not checksummed and device does not |
2604 | * support checksumming for this protocol, complete | 2615 | * support checksumming for this protocol, complete |
2605 | * checksumming here. | 2616 | * checksumming here. |
2606 | */ | 2617 | */ |
2607 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | 2618 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
2608 | if (skb->encapsulation) | 2619 | if (skb->encapsulation) |
2609 | skb_set_inner_transport_header(skb, | 2620 | skb_set_inner_transport_header(skb, |
2610 | skb_checksum_start_offset(skb)); | 2621 | skb_checksum_start_offset(skb)); |
2611 | else | 2622 | else |
2612 | skb_set_transport_header(skb, | 2623 | skb_set_transport_header(skb, |
2613 | skb_checksum_start_offset(skb)); | 2624 | skb_checksum_start_offset(skb)); |
2614 | if (!(features & NETIF_F_ALL_CSUM) && | 2625 | if (!(features & NETIF_F_ALL_CSUM) && |
2615 | skb_checksum_help(skb)) | 2626 | skb_checksum_help(skb)) |
2616 | goto out_kfree_skb; | 2627 | goto out_kfree_skb; |
2617 | } | 2628 | } |
2618 | } | 2629 | } |
2619 | 2630 | ||
2620 | if (!list_empty(&ptype_all)) | 2631 | if (!list_empty(&ptype_all)) |
2621 | dev_queue_xmit_nit(skb, dev); | 2632 | dev_queue_xmit_nit(skb, dev); |
2622 | 2633 | ||
2623 | skb_len = skb->len; | 2634 | skb_len = skb->len; |
2624 | rc = ops->ndo_start_xmit(skb, dev); | 2635 | rc = ops->ndo_start_xmit(skb, dev); |
2625 | trace_net_dev_xmit(skb, rc, dev, skb_len); | 2636 | trace_net_dev_xmit(skb, rc, dev, skb_len); |
2626 | if (rc == NETDEV_TX_OK) | 2637 | if (rc == NETDEV_TX_OK) |
2627 | txq_trans_update(txq); | 2638 | txq_trans_update(txq); |
2628 | return rc; | 2639 | return rc; |
2629 | } | 2640 | } |
2630 | 2641 | ||
2631 | gso: | 2642 | gso: |
2632 | do { | 2643 | do { |
2633 | struct sk_buff *nskb = skb->next; | 2644 | struct sk_buff *nskb = skb->next; |
2634 | 2645 | ||
2635 | skb->next = nskb->next; | 2646 | skb->next = nskb->next; |
2636 | nskb->next = NULL; | 2647 | nskb->next = NULL; |
2637 | 2648 | ||
2638 | /* | 2649 | /* |
2639 | * If device doesn't need nskb->dst, release it right now while | 2650 | * If device doesn't need nskb->dst, release it right now while |
2640 | * its hot in this cpu cache | 2651 | * its hot in this cpu cache |
2641 | */ | 2652 | */ |
2642 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) | 2653 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) |
2643 | skb_dst_drop(nskb); | 2654 | skb_dst_drop(nskb); |
2644 | 2655 | ||
2645 | if (!list_empty(&ptype_all)) | 2656 | if (!list_empty(&ptype_all)) |
2646 | dev_queue_xmit_nit(nskb, dev); | 2657 | dev_queue_xmit_nit(nskb, dev); |
2647 | 2658 | ||
2648 | skb_len = nskb->len; | 2659 | skb_len = nskb->len; |
2649 | rc = ops->ndo_start_xmit(nskb, dev); | 2660 | rc = ops->ndo_start_xmit(nskb, dev); |
2650 | trace_net_dev_xmit(nskb, rc, dev, skb_len); | 2661 | trace_net_dev_xmit(nskb, rc, dev, skb_len); |
2651 | if (unlikely(rc != NETDEV_TX_OK)) { | 2662 | if (unlikely(rc != NETDEV_TX_OK)) { |
2652 | if (rc & ~NETDEV_TX_MASK) | 2663 | if (rc & ~NETDEV_TX_MASK) |
2653 | goto out_kfree_gso_skb; | 2664 | goto out_kfree_gso_skb; |
2654 | nskb->next = skb->next; | 2665 | nskb->next = skb->next; |
2655 | skb->next = nskb; | 2666 | skb->next = nskb; |
2656 | return rc; | 2667 | return rc; |
2657 | } | 2668 | } |
2658 | txq_trans_update(txq); | 2669 | txq_trans_update(txq); |
2659 | if (unlikely(netif_xmit_stopped(txq) && skb->next)) | 2670 | if (unlikely(netif_xmit_stopped(txq) && skb->next)) |
2660 | return NETDEV_TX_BUSY; | 2671 | return NETDEV_TX_BUSY; |
2661 | } while (skb->next); | 2672 | } while (skb->next); |
2662 | 2673 | ||
2663 | out_kfree_gso_skb: | 2674 | out_kfree_gso_skb: |
2664 | if (likely(skb->next == NULL)) | 2675 | if (likely(skb->next == NULL)) |
2665 | skb->destructor = DEV_GSO_CB(skb)->destructor; | 2676 | skb->destructor = DEV_GSO_CB(skb)->destructor; |
2666 | out_kfree_skb: | 2677 | out_kfree_skb: |
2667 | kfree_skb(skb); | 2678 | kfree_skb(skb); |
2668 | out: | 2679 | out: |
2669 | return rc; | 2680 | return rc; |
2670 | } | 2681 | } |
2671 | 2682 | ||
2672 | static void qdisc_pkt_len_init(struct sk_buff *skb) | 2683 | static void qdisc_pkt_len_init(struct sk_buff *skb) |
2673 | { | 2684 | { |
2674 | const struct skb_shared_info *shinfo = skb_shinfo(skb); | 2685 | const struct skb_shared_info *shinfo = skb_shinfo(skb); |
2675 | 2686 | ||
2676 | qdisc_skb_cb(skb)->pkt_len = skb->len; | 2687 | qdisc_skb_cb(skb)->pkt_len = skb->len; |
2677 | 2688 | ||
2678 | /* To get more precise estimation of bytes sent on wire, | 2689 | /* To get more precise estimation of bytes sent on wire, |
2679 | * we add to pkt_len the headers size of all segments | 2690 | * we add to pkt_len the headers size of all segments |
2680 | */ | 2691 | */ |
2681 | if (shinfo->gso_size) { | 2692 | if (shinfo->gso_size) { |
2682 | unsigned int hdr_len; | 2693 | unsigned int hdr_len; |
2683 | 2694 | ||
2684 | /* mac layer + network layer */ | 2695 | /* mac layer + network layer */ |
2685 | hdr_len = skb_transport_header(skb) - skb_mac_header(skb); | 2696 | hdr_len = skb_transport_header(skb) - skb_mac_header(skb); |
2686 | 2697 | ||
2687 | /* + transport layer */ | 2698 | /* + transport layer */ |
2688 | if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) | 2699 | if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) |
2689 | hdr_len += tcp_hdrlen(skb); | 2700 | hdr_len += tcp_hdrlen(skb); |
2690 | else | 2701 | else |
2691 | hdr_len += sizeof(struct udphdr); | 2702 | hdr_len += sizeof(struct udphdr); |
2692 | qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; | 2703 | qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; |
2693 | } | 2704 | } |
2694 | } | 2705 | } |
2695 | 2706 | ||
2696 | static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | 2707 | static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, |
2697 | struct net_device *dev, | 2708 | struct net_device *dev, |
2698 | struct netdev_queue *txq) | 2709 | struct netdev_queue *txq) |
2699 | { | 2710 | { |
2700 | spinlock_t *root_lock = qdisc_lock(q); | 2711 | spinlock_t *root_lock = qdisc_lock(q); |
2701 | bool contended; | 2712 | bool contended; |
2702 | int rc; | 2713 | int rc; |
2703 | 2714 | ||
2704 | qdisc_pkt_len_init(skb); | 2715 | qdisc_pkt_len_init(skb); |
2705 | qdisc_calculate_pkt_len(skb, q); | 2716 | qdisc_calculate_pkt_len(skb, q); |
2706 | /* | 2717 | /* |
2707 | * Heuristic to force contended enqueues to serialize on a | 2718 | * Heuristic to force contended enqueues to serialize on a |
2708 | * separate lock before trying to get qdisc main lock. | 2719 | * separate lock before trying to get qdisc main lock. |
2709 | * This permits __QDISC_STATE_RUNNING owner to get the lock more often | 2720 | * This permits __QDISC_STATE_RUNNING owner to get the lock more often |
2710 | * and dequeue packets faster. | 2721 | * and dequeue packets faster. |
2711 | */ | 2722 | */ |
2712 | contended = qdisc_is_running(q); | 2723 | contended = qdisc_is_running(q); |
2713 | if (unlikely(contended)) | 2724 | if (unlikely(contended)) |
2714 | spin_lock(&q->busylock); | 2725 | spin_lock(&q->busylock); |
2715 | 2726 | ||
2716 | spin_lock(root_lock); | 2727 | spin_lock(root_lock); |
2717 | if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { | 2728 | if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { |
2718 | kfree_skb(skb); | 2729 | kfree_skb(skb); |
2719 | rc = NET_XMIT_DROP; | 2730 | rc = NET_XMIT_DROP; |
2720 | } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && | 2731 | } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && |
2721 | qdisc_run_begin(q)) { | 2732 | qdisc_run_begin(q)) { |
2722 | /* | 2733 | /* |
2723 | * This is a work-conserving queue; there are no old skbs | 2734 | * This is a work-conserving queue; there are no old skbs |
2724 | * waiting to be sent out; and the qdisc is not running - | 2735 | * waiting to be sent out; and the qdisc is not running - |
2725 | * xmit the skb directly. | 2736 | * xmit the skb directly. |
2726 | */ | 2737 | */ |
2727 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) | 2738 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) |
2728 | skb_dst_force(skb); | 2739 | skb_dst_force(skb); |
2729 | 2740 | ||
2730 | qdisc_bstats_update(q, skb); | 2741 | qdisc_bstats_update(q, skb); |
2731 | 2742 | ||
2732 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { | 2743 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { |
2733 | if (unlikely(contended)) { | 2744 | if (unlikely(contended)) { |
2734 | spin_unlock(&q->busylock); | 2745 | spin_unlock(&q->busylock); |
2735 | contended = false; | 2746 | contended = false; |
2736 | } | 2747 | } |
2737 | __qdisc_run(q); | 2748 | __qdisc_run(q); |
2738 | } else | 2749 | } else |
2739 | qdisc_run_end(q); | 2750 | qdisc_run_end(q); |
2740 | 2751 | ||
2741 | rc = NET_XMIT_SUCCESS; | 2752 | rc = NET_XMIT_SUCCESS; |
2742 | } else { | 2753 | } else { |
2743 | skb_dst_force(skb); | 2754 | skb_dst_force(skb); |
2744 | rc = q->enqueue(skb, q) & NET_XMIT_MASK; | 2755 | rc = q->enqueue(skb, q) & NET_XMIT_MASK; |
2745 | if (qdisc_run_begin(q)) { | 2756 | if (qdisc_run_begin(q)) { |
2746 | if (unlikely(contended)) { | 2757 | if (unlikely(contended)) { |
2747 | spin_unlock(&q->busylock); | 2758 | spin_unlock(&q->busylock); |
2748 | contended = false; | 2759 | contended = false; |
2749 | } | 2760 | } |
2750 | __qdisc_run(q); | 2761 | __qdisc_run(q); |
2751 | } | 2762 | } |
2752 | } | 2763 | } |
2753 | spin_unlock(root_lock); | 2764 | spin_unlock(root_lock); |
2754 | if (unlikely(contended)) | 2765 | if (unlikely(contended)) |
2755 | spin_unlock(&q->busylock); | 2766 | spin_unlock(&q->busylock); |
2756 | return rc; | 2767 | return rc; |
2757 | } | 2768 | } |
2758 | 2769 | ||
2759 | #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) | 2770 | #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) |
2760 | static void skb_update_prio(struct sk_buff *skb) | 2771 | static void skb_update_prio(struct sk_buff *skb) |
2761 | { | 2772 | { |
2762 | struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); | 2773 | struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); |
2763 | 2774 | ||
2764 | if (!skb->priority && skb->sk && map) { | 2775 | if (!skb->priority && skb->sk && map) { |
2765 | unsigned int prioidx = skb->sk->sk_cgrp_prioidx; | 2776 | unsigned int prioidx = skb->sk->sk_cgrp_prioidx; |
2766 | 2777 | ||
2767 | if (prioidx < map->priomap_len) | 2778 | if (prioidx < map->priomap_len) |
2768 | skb->priority = map->priomap[prioidx]; | 2779 | skb->priority = map->priomap[prioidx]; |
2769 | } | 2780 | } |
2770 | } | 2781 | } |
2771 | #else | 2782 | #else |
2772 | #define skb_update_prio(skb) | 2783 | #define skb_update_prio(skb) |
2773 | #endif | 2784 | #endif |
2774 | 2785 | ||
2775 | static DEFINE_PER_CPU(int, xmit_recursion); | 2786 | static DEFINE_PER_CPU(int, xmit_recursion); |
2776 | #define RECURSION_LIMIT 10 | 2787 | #define RECURSION_LIMIT 10 |
2777 | 2788 | ||
2778 | /** | 2789 | /** |
2779 | * dev_loopback_xmit - loop back @skb | 2790 | * dev_loopback_xmit - loop back @skb |
2780 | * @skb: buffer to transmit | 2791 | * @skb: buffer to transmit |
2781 | */ | 2792 | */ |
2782 | int dev_loopback_xmit(struct sk_buff *skb) | 2793 | int dev_loopback_xmit(struct sk_buff *skb) |
2783 | { | 2794 | { |
2784 | skb_reset_mac_header(skb); | 2795 | skb_reset_mac_header(skb); |
2785 | __skb_pull(skb, skb_network_offset(skb)); | 2796 | __skb_pull(skb, skb_network_offset(skb)); |
2786 | skb->pkt_type = PACKET_LOOPBACK; | 2797 | skb->pkt_type = PACKET_LOOPBACK; |
2787 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 2798 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
2788 | WARN_ON(!skb_dst(skb)); | 2799 | WARN_ON(!skb_dst(skb)); |
2789 | skb_dst_force(skb); | 2800 | skb_dst_force(skb); |
2790 | netif_rx_ni(skb); | 2801 | netif_rx_ni(skb); |
2791 | return 0; | 2802 | return 0; |
2792 | } | 2803 | } |
2793 | EXPORT_SYMBOL(dev_loopback_xmit); | 2804 | EXPORT_SYMBOL(dev_loopback_xmit); |
2794 | 2805 | ||
2795 | /** | 2806 | /** |
2796 | * dev_queue_xmit - transmit a buffer | 2807 | * dev_queue_xmit - transmit a buffer |
2797 | * @skb: buffer to transmit | 2808 | * @skb: buffer to transmit |
2798 | * | 2809 | * |
2799 | * Queue a buffer for transmission to a network device. The caller must | 2810 | * Queue a buffer for transmission to a network device. The caller must |
2800 | * have set the device and priority and built the buffer before calling | 2811 | * have set the device and priority and built the buffer before calling |
2801 | * this function. The function can be called from an interrupt. | 2812 | * this function. The function can be called from an interrupt. |
2802 | * | 2813 | * |
2803 | * A negative errno code is returned on a failure. A success does not | 2814 | * A negative errno code is returned on a failure. A success does not |
2804 | * guarantee the frame will be transmitted as it may be dropped due | 2815 | * guarantee the frame will be transmitted as it may be dropped due |
2805 | * to congestion or traffic shaping. | 2816 | * to congestion or traffic shaping. |
2806 | * | 2817 | * |
2807 | * ----------------------------------------------------------------------------------- | 2818 | * ----------------------------------------------------------------------------------- |
2808 | * I notice this method can also return errors from the queue disciplines, | 2819 | * I notice this method can also return errors from the queue disciplines, |
2809 | * including NET_XMIT_DROP, which is a positive value. So, errors can also | 2820 | * including NET_XMIT_DROP, which is a positive value. So, errors can also |
2810 | * be positive. | 2821 | * be positive. |
2811 | * | 2822 | * |
2812 | * Regardless of the return value, the skb is consumed, so it is currently | 2823 | * Regardless of the return value, the skb is consumed, so it is currently |
2813 | * difficult to retry a send to this method. (You can bump the ref count | 2824 | * difficult to retry a send to this method. (You can bump the ref count |
2814 | * before sending to hold a reference for retry if you are careful.) | 2825 | * before sending to hold a reference for retry if you are careful.) |
2815 | * | 2826 | * |
2816 | * When calling this method, interrupts MUST be enabled. This is because | 2827 | * When calling this method, interrupts MUST be enabled. This is because |
2817 | * the BH enable code must have IRQs enabled so that it will not deadlock. | 2828 | * the BH enable code must have IRQs enabled so that it will not deadlock. |
2818 | * --BLG | 2829 | * --BLG |
2819 | */ | 2830 | */ |
2820 | int dev_queue_xmit(struct sk_buff *skb) | 2831 | int dev_queue_xmit(struct sk_buff *skb) |
2821 | { | 2832 | { |
2822 | struct net_device *dev = skb->dev; | 2833 | struct net_device *dev = skb->dev; |
2823 | struct netdev_queue *txq; | 2834 | struct netdev_queue *txq; |
2824 | struct Qdisc *q; | 2835 | struct Qdisc *q; |
2825 | int rc = -ENOMEM; | 2836 | int rc = -ENOMEM; |
2826 | 2837 | ||
2827 | /* Disable soft irqs for various locks below. Also | 2838 | /* Disable soft irqs for various locks below. Also |
2828 | * stops preemption for RCU. | 2839 | * stops preemption for RCU. |
2829 | */ | 2840 | */ |
2830 | rcu_read_lock_bh(); | 2841 | rcu_read_lock_bh(); |
2831 | 2842 | ||
2832 | skb_update_prio(skb); | 2843 | skb_update_prio(skb); |
2833 | 2844 | ||
2834 | txq = netdev_pick_tx(dev, skb); | 2845 | txq = netdev_pick_tx(dev, skb); |
2835 | q = rcu_dereference_bh(txq->qdisc); | 2846 | q = rcu_dereference_bh(txq->qdisc); |
2836 | 2847 | ||
2837 | #ifdef CONFIG_NET_CLS_ACT | 2848 | #ifdef CONFIG_NET_CLS_ACT |
2838 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); | 2849 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); |
2839 | #endif | 2850 | #endif |
2840 | trace_net_dev_queue(skb); | 2851 | trace_net_dev_queue(skb); |
2841 | if (q->enqueue) { | 2852 | if (q->enqueue) { |
2842 | rc = __dev_xmit_skb(skb, q, dev, txq); | 2853 | rc = __dev_xmit_skb(skb, q, dev, txq); |
2843 | goto out; | 2854 | goto out; |
2844 | } | 2855 | } |
2845 | 2856 | ||
2846 | /* The device has no queue. Common case for software devices: | 2857 | /* The device has no queue. Common case for software devices: |
2847 | loopback, all the sorts of tunnels... | 2858 | loopback, all the sorts of tunnels... |
2848 | 2859 | ||
2849 | Really, it is unlikely that netif_tx_lock protection is necessary | 2860 | Really, it is unlikely that netif_tx_lock protection is necessary |
2850 | here. (f.e. loopback and IP tunnels are clean ignoring statistics | 2861 | here. (f.e. loopback and IP tunnels are clean ignoring statistics |
2851 | counters.) | 2862 | counters.) |
2852 | However, it is possible, that they rely on protection | 2863 | However, it is possible, that they rely on protection |
2853 | made by us here. | 2864 | made by us here. |
2854 | 2865 | ||
2855 | Check this and shot the lock. It is not prone from deadlocks. | 2866 | Check this and shot the lock. It is not prone from deadlocks. |
2856 | Either shot noqueue qdisc, it is even simpler 8) | 2867 | Either shot noqueue qdisc, it is even simpler 8) |
2857 | */ | 2868 | */ |
2858 | if (dev->flags & IFF_UP) { | 2869 | if (dev->flags & IFF_UP) { |
2859 | int cpu = smp_processor_id(); /* ok because BHs are off */ | 2870 | int cpu = smp_processor_id(); /* ok because BHs are off */ |
2860 | 2871 | ||
2861 | if (txq->xmit_lock_owner != cpu) { | 2872 | if (txq->xmit_lock_owner != cpu) { |
2862 | 2873 | ||
2863 | if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) | 2874 | if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) |
2864 | goto recursion_alert; | 2875 | goto recursion_alert; |
2865 | 2876 | ||
2866 | HARD_TX_LOCK(dev, txq, cpu); | 2877 | HARD_TX_LOCK(dev, txq, cpu); |
2867 | 2878 | ||
2868 | if (!netif_xmit_stopped(txq)) { | 2879 | if (!netif_xmit_stopped(txq)) { |
2869 | __this_cpu_inc(xmit_recursion); | 2880 | __this_cpu_inc(xmit_recursion); |
2870 | rc = dev_hard_start_xmit(skb, dev, txq); | 2881 | rc = dev_hard_start_xmit(skb, dev, txq); |
2871 | __this_cpu_dec(xmit_recursion); | 2882 | __this_cpu_dec(xmit_recursion); |
2872 | if (dev_xmit_complete(rc)) { | 2883 | if (dev_xmit_complete(rc)) { |
2873 | HARD_TX_UNLOCK(dev, txq); | 2884 | HARD_TX_UNLOCK(dev, txq); |
2874 | goto out; | 2885 | goto out; |
2875 | } | 2886 | } |
2876 | } | 2887 | } |
2877 | HARD_TX_UNLOCK(dev, txq); | 2888 | HARD_TX_UNLOCK(dev, txq); |
2878 | net_crit_ratelimited("Virtual device %s asks to queue packet!\n", | 2889 | net_crit_ratelimited("Virtual device %s asks to queue packet!\n", |
2879 | dev->name); | 2890 | dev->name); |
2880 | } else { | 2891 | } else { |
2881 | /* Recursion is detected! It is possible, | 2892 | /* Recursion is detected! It is possible, |
2882 | * unfortunately | 2893 | * unfortunately |
2883 | */ | 2894 | */ |
2884 | recursion_alert: | 2895 | recursion_alert: |
2885 | net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", | 2896 | net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", |
2886 | dev->name); | 2897 | dev->name); |
2887 | } | 2898 | } |
2888 | } | 2899 | } |
2889 | 2900 | ||
2890 | rc = -ENETDOWN; | 2901 | rc = -ENETDOWN; |
2891 | rcu_read_unlock_bh(); | 2902 | rcu_read_unlock_bh(); |
2892 | 2903 | ||
2893 | kfree_skb(skb); | 2904 | kfree_skb(skb); |
2894 | return rc; | 2905 | return rc; |
2895 | out: | 2906 | out: |
2896 | rcu_read_unlock_bh(); | 2907 | rcu_read_unlock_bh(); |
2897 | return rc; | 2908 | return rc; |
2898 | } | 2909 | } |
2899 | EXPORT_SYMBOL(dev_queue_xmit); | 2910 | EXPORT_SYMBOL(dev_queue_xmit); |
2900 | 2911 | ||
2901 | 2912 | ||
2902 | /*======================================================================= | 2913 | /*======================================================================= |
2903 | Receiver routines | 2914 | Receiver routines |
2904 | =======================================================================*/ | 2915 | =======================================================================*/ |
2905 | 2916 | ||
2906 | int netdev_max_backlog __read_mostly = 1000; | 2917 | int netdev_max_backlog __read_mostly = 1000; |
2907 | EXPORT_SYMBOL(netdev_max_backlog); | 2918 | EXPORT_SYMBOL(netdev_max_backlog); |
2908 | 2919 | ||
2909 | int netdev_tstamp_prequeue __read_mostly = 1; | 2920 | int netdev_tstamp_prequeue __read_mostly = 1; |
2910 | int netdev_budget __read_mostly = 300; | 2921 | int netdev_budget __read_mostly = 300; |
2911 | int weight_p __read_mostly = 64; /* old backlog weight */ | 2922 | int weight_p __read_mostly = 64; /* old backlog weight */ |
2912 | 2923 | ||
2913 | /* Called with irq disabled */ | 2924 | /* Called with irq disabled */ |
2914 | static inline void ____napi_schedule(struct softnet_data *sd, | 2925 | static inline void ____napi_schedule(struct softnet_data *sd, |
2915 | struct napi_struct *napi) | 2926 | struct napi_struct *napi) |
2916 | { | 2927 | { |
2917 | list_add_tail(&napi->poll_list, &sd->poll_list); | 2928 | list_add_tail(&napi->poll_list, &sd->poll_list); |
2918 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | 2929 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
2919 | } | 2930 | } |
2920 | 2931 | ||
2921 | #ifdef CONFIG_RPS | 2932 | #ifdef CONFIG_RPS |
2922 | 2933 | ||
2923 | /* One global table that all flow-based protocols share. */ | 2934 | /* One global table that all flow-based protocols share. */ |
2924 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; | 2935 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; |
2925 | EXPORT_SYMBOL(rps_sock_flow_table); | 2936 | EXPORT_SYMBOL(rps_sock_flow_table); |
2926 | 2937 | ||
2927 | struct static_key rps_needed __read_mostly; | 2938 | struct static_key rps_needed __read_mostly; |
2928 | 2939 | ||
2929 | static struct rps_dev_flow * | 2940 | static struct rps_dev_flow * |
2930 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | 2941 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, |
2931 | struct rps_dev_flow *rflow, u16 next_cpu) | 2942 | struct rps_dev_flow *rflow, u16 next_cpu) |
2932 | { | 2943 | { |
2933 | if (next_cpu != RPS_NO_CPU) { | 2944 | if (next_cpu != RPS_NO_CPU) { |
2934 | #ifdef CONFIG_RFS_ACCEL | 2945 | #ifdef CONFIG_RFS_ACCEL |
2935 | struct netdev_rx_queue *rxqueue; | 2946 | struct netdev_rx_queue *rxqueue; |
2936 | struct rps_dev_flow_table *flow_table; | 2947 | struct rps_dev_flow_table *flow_table; |
2937 | struct rps_dev_flow *old_rflow; | 2948 | struct rps_dev_flow *old_rflow; |
2938 | u32 flow_id; | 2949 | u32 flow_id; |
2939 | u16 rxq_index; | 2950 | u16 rxq_index; |
2940 | int rc; | 2951 | int rc; |
2941 | 2952 | ||
2942 | /* Should we steer this flow to a different hardware queue? */ | 2953 | /* Should we steer this flow to a different hardware queue? */ |
2943 | if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || | 2954 | if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || |
2944 | !(dev->features & NETIF_F_NTUPLE)) | 2955 | !(dev->features & NETIF_F_NTUPLE)) |
2945 | goto out; | 2956 | goto out; |
2946 | rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); | 2957 | rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); |
2947 | if (rxq_index == skb_get_rx_queue(skb)) | 2958 | if (rxq_index == skb_get_rx_queue(skb)) |
2948 | goto out; | 2959 | goto out; |
2949 | 2960 | ||
2950 | rxqueue = dev->_rx + rxq_index; | 2961 | rxqueue = dev->_rx + rxq_index; |
2951 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | 2962 | flow_table = rcu_dereference(rxqueue->rps_flow_table); |
2952 | if (!flow_table) | 2963 | if (!flow_table) |
2953 | goto out; | 2964 | goto out; |
2954 | flow_id = skb->rxhash & flow_table->mask; | 2965 | flow_id = skb->rxhash & flow_table->mask; |
2955 | rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, | 2966 | rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, |
2956 | rxq_index, flow_id); | 2967 | rxq_index, flow_id); |
2957 | if (rc < 0) | 2968 | if (rc < 0) |
2958 | goto out; | 2969 | goto out; |
2959 | old_rflow = rflow; | 2970 | old_rflow = rflow; |
2960 | rflow = &flow_table->flows[flow_id]; | 2971 | rflow = &flow_table->flows[flow_id]; |
2961 | rflow->filter = rc; | 2972 | rflow->filter = rc; |
2962 | if (old_rflow->filter == rflow->filter) | 2973 | if (old_rflow->filter == rflow->filter) |
2963 | old_rflow->filter = RPS_NO_FILTER; | 2974 | old_rflow->filter = RPS_NO_FILTER; |
2964 | out: | 2975 | out: |
2965 | #endif | 2976 | #endif |
2966 | rflow->last_qtail = | 2977 | rflow->last_qtail = |
2967 | per_cpu(softnet_data, next_cpu).input_queue_head; | 2978 | per_cpu(softnet_data, next_cpu).input_queue_head; |
2968 | } | 2979 | } |
2969 | 2980 | ||
2970 | rflow->cpu = next_cpu; | 2981 | rflow->cpu = next_cpu; |
2971 | return rflow; | 2982 | return rflow; |
2972 | } | 2983 | } |
2973 | 2984 | ||
2974 | /* | 2985 | /* |
2975 | * get_rps_cpu is called from netif_receive_skb and returns the target | 2986 | * get_rps_cpu is called from netif_receive_skb and returns the target |
2976 | * CPU from the RPS map of the receiving queue for a given skb. | 2987 | * CPU from the RPS map of the receiving queue for a given skb. |
2977 | * rcu_read_lock must be held on entry. | 2988 | * rcu_read_lock must be held on entry. |
2978 | */ | 2989 | */ |
2979 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | 2990 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, |
2980 | struct rps_dev_flow **rflowp) | 2991 | struct rps_dev_flow **rflowp) |
2981 | { | 2992 | { |
2982 | struct netdev_rx_queue *rxqueue; | 2993 | struct netdev_rx_queue *rxqueue; |
2983 | struct rps_map *map; | 2994 | struct rps_map *map; |
2984 | struct rps_dev_flow_table *flow_table; | 2995 | struct rps_dev_flow_table *flow_table; |
2985 | struct rps_sock_flow_table *sock_flow_table; | 2996 | struct rps_sock_flow_table *sock_flow_table; |
2986 | int cpu = -1; | 2997 | int cpu = -1; |
2987 | u16 tcpu; | 2998 | u16 tcpu; |
2988 | 2999 | ||
2989 | if (skb_rx_queue_recorded(skb)) { | 3000 | if (skb_rx_queue_recorded(skb)) { |
2990 | u16 index = skb_get_rx_queue(skb); | 3001 | u16 index = skb_get_rx_queue(skb); |
2991 | if (unlikely(index >= dev->real_num_rx_queues)) { | 3002 | if (unlikely(index >= dev->real_num_rx_queues)) { |
2992 | WARN_ONCE(dev->real_num_rx_queues > 1, | 3003 | WARN_ONCE(dev->real_num_rx_queues > 1, |
2993 | "%s received packet on queue %u, but number " | 3004 | "%s received packet on queue %u, but number " |
2994 | "of RX queues is %u\n", | 3005 | "of RX queues is %u\n", |
2995 | dev->name, index, dev->real_num_rx_queues); | 3006 | dev->name, index, dev->real_num_rx_queues); |
2996 | goto done; | 3007 | goto done; |
2997 | } | 3008 | } |
2998 | rxqueue = dev->_rx + index; | 3009 | rxqueue = dev->_rx + index; |
2999 | } else | 3010 | } else |
3000 | rxqueue = dev->_rx; | 3011 | rxqueue = dev->_rx; |
3001 | 3012 | ||
3002 | map = rcu_dereference(rxqueue->rps_map); | 3013 | map = rcu_dereference(rxqueue->rps_map); |
3003 | if (map) { | 3014 | if (map) { |
3004 | if (map->len == 1 && | 3015 | if (map->len == 1 && |
3005 | !rcu_access_pointer(rxqueue->rps_flow_table)) { | 3016 | !rcu_access_pointer(rxqueue->rps_flow_table)) { |
3006 | tcpu = map->cpus[0]; | 3017 | tcpu = map->cpus[0]; |
3007 | if (cpu_online(tcpu)) | 3018 | if (cpu_online(tcpu)) |
3008 | cpu = tcpu; | 3019 | cpu = tcpu; |
3009 | goto done; | 3020 | goto done; |
3010 | } | 3021 | } |
3011 | } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) { | 3022 | } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) { |
3012 | goto done; | 3023 | goto done; |
3013 | } | 3024 | } |
3014 | 3025 | ||
3015 | skb_reset_network_header(skb); | 3026 | skb_reset_network_header(skb); |
3016 | if (!skb_get_rxhash(skb)) | 3027 | if (!skb_get_rxhash(skb)) |
3017 | goto done; | 3028 | goto done; |
3018 | 3029 | ||
3019 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | 3030 | flow_table = rcu_dereference(rxqueue->rps_flow_table); |
3020 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | 3031 | sock_flow_table = rcu_dereference(rps_sock_flow_table); |
3021 | if (flow_table && sock_flow_table) { | 3032 | if (flow_table && sock_flow_table) { |
3022 | u16 next_cpu; | 3033 | u16 next_cpu; |
3023 | struct rps_dev_flow *rflow; | 3034 | struct rps_dev_flow *rflow; |
3024 | 3035 | ||
3025 | rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; | 3036 | rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; |
3026 | tcpu = rflow->cpu; | 3037 | tcpu = rflow->cpu; |
3027 | 3038 | ||
3028 | next_cpu = sock_flow_table->ents[skb->rxhash & | 3039 | next_cpu = sock_flow_table->ents[skb->rxhash & |
3029 | sock_flow_table->mask]; | 3040 | sock_flow_table->mask]; |
3030 | 3041 | ||
3031 | /* | 3042 | /* |
3032 | * If the desired CPU (where last recvmsg was done) is | 3043 | * If the desired CPU (where last recvmsg was done) is |
3033 | * different from current CPU (one in the rx-queue flow | 3044 | * different from current CPU (one in the rx-queue flow |
3034 | * table entry), switch if one of the following holds: | 3045 | * table entry), switch if one of the following holds: |
3035 | * - Current CPU is unset (equal to RPS_NO_CPU). | 3046 | * - Current CPU is unset (equal to RPS_NO_CPU). |
3036 | * - Current CPU is offline. | 3047 | * - Current CPU is offline. |
3037 | * - The current CPU's queue tail has advanced beyond the | 3048 | * - The current CPU's queue tail has advanced beyond the |
3038 | * last packet that was enqueued using this table entry. | 3049 | * last packet that was enqueued using this table entry. |
3039 | * This guarantees that all previous packets for the flow | 3050 | * This guarantees that all previous packets for the flow |
3040 | * have been dequeued, thus preserving in order delivery. | 3051 | * have been dequeued, thus preserving in order delivery. |
3041 | */ | 3052 | */ |
3042 | if (unlikely(tcpu != next_cpu) && | 3053 | if (unlikely(tcpu != next_cpu) && |
3043 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || | 3054 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || |
3044 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - | 3055 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - |
3045 | rflow->last_qtail)) >= 0)) { | 3056 | rflow->last_qtail)) >= 0)) { |
3046 | tcpu = next_cpu; | 3057 | tcpu = next_cpu; |
3047 | rflow = set_rps_cpu(dev, skb, rflow, next_cpu); | 3058 | rflow = set_rps_cpu(dev, skb, rflow, next_cpu); |
3048 | } | 3059 | } |
3049 | 3060 | ||
3050 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { | 3061 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { |
3051 | *rflowp = rflow; | 3062 | *rflowp = rflow; |
3052 | cpu = tcpu; | 3063 | cpu = tcpu; |
3053 | goto done; | 3064 | goto done; |
3054 | } | 3065 | } |
3055 | } | 3066 | } |
3056 | 3067 | ||
3057 | if (map) { | 3068 | if (map) { |
3058 | tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; | 3069 | tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; |
3059 | 3070 | ||
3060 | if (cpu_online(tcpu)) { | 3071 | if (cpu_online(tcpu)) { |
3061 | cpu = tcpu; | 3072 | cpu = tcpu; |
3062 | goto done; | 3073 | goto done; |
3063 | } | 3074 | } |
3064 | } | 3075 | } |
3065 | 3076 | ||
3066 | done: | 3077 | done: |
3067 | return cpu; | 3078 | return cpu; |
3068 | } | 3079 | } |
3069 | 3080 | ||
3070 | #ifdef CONFIG_RFS_ACCEL | 3081 | #ifdef CONFIG_RFS_ACCEL |
3071 | 3082 | ||
3072 | /** | 3083 | /** |
3073 | * rps_may_expire_flow - check whether an RFS hardware filter may be removed | 3084 | * rps_may_expire_flow - check whether an RFS hardware filter may be removed |
3074 | * @dev: Device on which the filter was set | 3085 | * @dev: Device on which the filter was set |
3075 | * @rxq_index: RX queue index | 3086 | * @rxq_index: RX queue index |
3076 | * @flow_id: Flow ID passed to ndo_rx_flow_steer() | 3087 | * @flow_id: Flow ID passed to ndo_rx_flow_steer() |
3077 | * @filter_id: Filter ID returned by ndo_rx_flow_steer() | 3088 | * @filter_id: Filter ID returned by ndo_rx_flow_steer() |
3078 | * | 3089 | * |
3079 | * Drivers that implement ndo_rx_flow_steer() should periodically call | 3090 | * Drivers that implement ndo_rx_flow_steer() should periodically call |
3080 | * this function for each installed filter and remove the filters for | 3091 | * this function for each installed filter and remove the filters for |
3081 | * which it returns %true. | 3092 | * which it returns %true. |
3082 | */ | 3093 | */ |
3083 | bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, | 3094 | bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, |
3084 | u32 flow_id, u16 filter_id) | 3095 | u32 flow_id, u16 filter_id) |
3085 | { | 3096 | { |
3086 | struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; | 3097 | struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; |
3087 | struct rps_dev_flow_table *flow_table; | 3098 | struct rps_dev_flow_table *flow_table; |
3088 | struct rps_dev_flow *rflow; | 3099 | struct rps_dev_flow *rflow; |
3089 | bool expire = true; | 3100 | bool expire = true; |
3090 | int cpu; | 3101 | int cpu; |
3091 | 3102 | ||
3092 | rcu_read_lock(); | 3103 | rcu_read_lock(); |
3093 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | 3104 | flow_table = rcu_dereference(rxqueue->rps_flow_table); |
3094 | if (flow_table && flow_id <= flow_table->mask) { | 3105 | if (flow_table && flow_id <= flow_table->mask) { |
3095 | rflow = &flow_table->flows[flow_id]; | 3106 | rflow = &flow_table->flows[flow_id]; |
3096 | cpu = ACCESS_ONCE(rflow->cpu); | 3107 | cpu = ACCESS_ONCE(rflow->cpu); |
3097 | if (rflow->filter == filter_id && cpu != RPS_NO_CPU && | 3108 | if (rflow->filter == filter_id && cpu != RPS_NO_CPU && |
3098 | ((int)(per_cpu(softnet_data, cpu).input_queue_head - | 3109 | ((int)(per_cpu(softnet_data, cpu).input_queue_head - |
3099 | rflow->last_qtail) < | 3110 | rflow->last_qtail) < |
3100 | (int)(10 * flow_table->mask))) | 3111 | (int)(10 * flow_table->mask))) |
3101 | expire = false; | 3112 | expire = false; |
3102 | } | 3113 | } |
3103 | rcu_read_unlock(); | 3114 | rcu_read_unlock(); |
3104 | return expire; | 3115 | return expire; |
3105 | } | 3116 | } |
3106 | EXPORT_SYMBOL(rps_may_expire_flow); | 3117 | EXPORT_SYMBOL(rps_may_expire_flow); |
3107 | 3118 | ||
3108 | #endif /* CONFIG_RFS_ACCEL */ | 3119 | #endif /* CONFIG_RFS_ACCEL */ |
3109 | 3120 | ||
3110 | /* Called from hardirq (IPI) context */ | 3121 | /* Called from hardirq (IPI) context */ |
3111 | static void rps_trigger_softirq(void *data) | 3122 | static void rps_trigger_softirq(void *data) |
3112 | { | 3123 | { |
3113 | struct softnet_data *sd = data; | 3124 | struct softnet_data *sd = data; |
3114 | 3125 | ||
3115 | ____napi_schedule(sd, &sd->backlog); | 3126 | ____napi_schedule(sd, &sd->backlog); |
3116 | sd->received_rps++; | 3127 | sd->received_rps++; |
3117 | } | 3128 | } |
3118 | 3129 | ||
3119 | #endif /* CONFIG_RPS */ | 3130 | #endif /* CONFIG_RPS */ |
3120 | 3131 | ||
3121 | /* | 3132 | /* |
3122 | * Check if this softnet_data structure is another cpu one | 3133 | * Check if this softnet_data structure is another cpu one |
3123 | * If yes, queue it to our IPI list and return 1 | 3134 | * If yes, queue it to our IPI list and return 1 |
3124 | * If no, return 0 | 3135 | * If no, return 0 |
3125 | */ | 3136 | */ |
3126 | static int rps_ipi_queued(struct softnet_data *sd) | 3137 | static int rps_ipi_queued(struct softnet_data *sd) |
3127 | { | 3138 | { |
3128 | #ifdef CONFIG_RPS | 3139 | #ifdef CONFIG_RPS |
3129 | struct softnet_data *mysd = &__get_cpu_var(softnet_data); | 3140 | struct softnet_data *mysd = &__get_cpu_var(softnet_data); |
3130 | 3141 | ||
3131 | if (sd != mysd) { | 3142 | if (sd != mysd) { |
3132 | sd->rps_ipi_next = mysd->rps_ipi_list; | 3143 | sd->rps_ipi_next = mysd->rps_ipi_list; |
3133 | mysd->rps_ipi_list = sd; | 3144 | mysd->rps_ipi_list = sd; |
3134 | 3145 | ||
3135 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | 3146 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
3136 | return 1; | 3147 | return 1; |
3137 | } | 3148 | } |
3138 | #endif /* CONFIG_RPS */ | 3149 | #endif /* CONFIG_RPS */ |
3139 | return 0; | 3150 | return 0; |
3140 | } | 3151 | } |
3141 | 3152 | ||
3142 | /* | 3153 | /* |
3143 | * enqueue_to_backlog is called to queue an skb to a per CPU backlog | 3154 | * enqueue_to_backlog is called to queue an skb to a per CPU backlog |
3144 | * queue (may be a remote CPU queue). | 3155 | * queue (may be a remote CPU queue). |
3145 | */ | 3156 | */ |
3146 | static int enqueue_to_backlog(struct sk_buff *skb, int cpu, | 3157 | static int enqueue_to_backlog(struct sk_buff *skb, int cpu, |
3147 | unsigned int *qtail) | 3158 | unsigned int *qtail) |
3148 | { | 3159 | { |
3149 | struct softnet_data *sd; | 3160 | struct softnet_data *sd; |
3150 | unsigned long flags; | 3161 | unsigned long flags; |
3151 | 3162 | ||
3152 | sd = &per_cpu(softnet_data, cpu); | 3163 | sd = &per_cpu(softnet_data, cpu); |
3153 | 3164 | ||
3154 | local_irq_save(flags); | 3165 | local_irq_save(flags); |
3155 | 3166 | ||
3156 | rps_lock(sd); | 3167 | rps_lock(sd); |
3157 | if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { | 3168 | if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { |
3158 | if (skb_queue_len(&sd->input_pkt_queue)) { | 3169 | if (skb_queue_len(&sd->input_pkt_queue)) { |
3159 | enqueue: | 3170 | enqueue: |
3160 | __skb_queue_tail(&sd->input_pkt_queue, skb); | 3171 | __skb_queue_tail(&sd->input_pkt_queue, skb); |
3161 | input_queue_tail_incr_save(sd, qtail); | 3172 | input_queue_tail_incr_save(sd, qtail); |
3162 | rps_unlock(sd); | 3173 | rps_unlock(sd); |
3163 | local_irq_restore(flags); | 3174 | local_irq_restore(flags); |
3164 | return NET_RX_SUCCESS; | 3175 | return NET_RX_SUCCESS; |
3165 | } | 3176 | } |
3166 | 3177 | ||
3167 | /* Schedule NAPI for backlog device | 3178 | /* Schedule NAPI for backlog device |
3168 | * We can use non atomic operation since we own the queue lock | 3179 | * We can use non atomic operation since we own the queue lock |
3169 | */ | 3180 | */ |
3170 | if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) { | 3181 | if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) { |
3171 | if (!rps_ipi_queued(sd)) | 3182 | if (!rps_ipi_queued(sd)) |
3172 | ____napi_schedule(sd, &sd->backlog); | 3183 | ____napi_schedule(sd, &sd->backlog); |
3173 | } | 3184 | } |
3174 | goto enqueue; | 3185 | goto enqueue; |
3175 | } | 3186 | } |
3176 | 3187 | ||
3177 | sd->dropped++; | 3188 | sd->dropped++; |
3178 | rps_unlock(sd); | 3189 | rps_unlock(sd); |
3179 | 3190 | ||
3180 | local_irq_restore(flags); | 3191 | local_irq_restore(flags); |
3181 | 3192 | ||
3182 | atomic_long_inc(&skb->dev->rx_dropped); | 3193 | atomic_long_inc(&skb->dev->rx_dropped); |
3183 | kfree_skb(skb); | 3194 | kfree_skb(skb); |
3184 | return NET_RX_DROP; | 3195 | return NET_RX_DROP; |
3185 | } | 3196 | } |
3186 | 3197 | ||
3187 | /** | 3198 | /** |
3188 | * netif_rx - post buffer to the network code | 3199 | * netif_rx - post buffer to the network code |
3189 | * @skb: buffer to post | 3200 | * @skb: buffer to post |
3190 | * | 3201 | * |
3191 | * This function receives a packet from a device driver and queues it for | 3202 | * This function receives a packet from a device driver and queues it for |
3192 | * the upper (protocol) levels to process. It always succeeds. The buffer | 3203 | * the upper (protocol) levels to process. It always succeeds. The buffer |
3193 | * may be dropped during processing for congestion control or by the | 3204 | * may be dropped during processing for congestion control or by the |
3194 | * protocol layers. | 3205 | * protocol layers. |
3195 | * | 3206 | * |
3196 | * return values: | 3207 | * return values: |
3197 | * NET_RX_SUCCESS (no congestion) | 3208 | * NET_RX_SUCCESS (no congestion) |
3198 | * NET_RX_DROP (packet was dropped) | 3209 | * NET_RX_DROP (packet was dropped) |
3199 | * | 3210 | * |
3200 | */ | 3211 | */ |
3201 | 3212 | ||
3202 | int netif_rx(struct sk_buff *skb) | 3213 | int netif_rx(struct sk_buff *skb) |
3203 | { | 3214 | { |
3204 | int ret; | 3215 | int ret; |
3205 | 3216 | ||
3206 | /* if netpoll wants it, pretend we never saw it */ | 3217 | /* if netpoll wants it, pretend we never saw it */ |
3207 | if (netpoll_rx(skb)) | 3218 | if (netpoll_rx(skb)) |
3208 | return NET_RX_DROP; | 3219 | return NET_RX_DROP; |
3209 | 3220 | ||
3210 | net_timestamp_check(netdev_tstamp_prequeue, skb); | 3221 | net_timestamp_check(netdev_tstamp_prequeue, skb); |
3211 | 3222 | ||
3212 | trace_netif_rx(skb); | 3223 | trace_netif_rx(skb); |
3213 | #ifdef CONFIG_RPS | 3224 | #ifdef CONFIG_RPS |
3214 | if (static_key_false(&rps_needed)) { | 3225 | if (static_key_false(&rps_needed)) { |
3215 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 3226 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
3216 | int cpu; | 3227 | int cpu; |
3217 | 3228 | ||
3218 | preempt_disable(); | 3229 | preempt_disable(); |
3219 | rcu_read_lock(); | 3230 | rcu_read_lock(); |
3220 | 3231 | ||
3221 | cpu = get_rps_cpu(skb->dev, skb, &rflow); | 3232 | cpu = get_rps_cpu(skb->dev, skb, &rflow); |
3222 | if (cpu < 0) | 3233 | if (cpu < 0) |
3223 | cpu = smp_processor_id(); | 3234 | cpu = smp_processor_id(); |
3224 | 3235 | ||
3225 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); | 3236 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
3226 | 3237 | ||
3227 | rcu_read_unlock(); | 3238 | rcu_read_unlock(); |
3228 | preempt_enable(); | 3239 | preempt_enable(); |
3229 | } else | 3240 | } else |
3230 | #endif | 3241 | #endif |
3231 | { | 3242 | { |
3232 | unsigned int qtail; | 3243 | unsigned int qtail; |
3233 | ret = enqueue_to_backlog(skb, get_cpu(), &qtail); | 3244 | ret = enqueue_to_backlog(skb, get_cpu(), &qtail); |
3234 | put_cpu(); | 3245 | put_cpu(); |
3235 | } | 3246 | } |
3236 | return ret; | 3247 | return ret; |
3237 | } | 3248 | } |
3238 | EXPORT_SYMBOL(netif_rx); | 3249 | EXPORT_SYMBOL(netif_rx); |
3239 | 3250 | ||
3240 | int netif_rx_ni(struct sk_buff *skb) | 3251 | int netif_rx_ni(struct sk_buff *skb) |
3241 | { | 3252 | { |
3242 | int err; | 3253 | int err; |
3243 | 3254 | ||
3244 | preempt_disable(); | 3255 | preempt_disable(); |
3245 | err = netif_rx(skb); | 3256 | err = netif_rx(skb); |
3246 | if (local_softirq_pending()) | 3257 | if (local_softirq_pending()) |
3247 | do_softirq(); | 3258 | do_softirq(); |
3248 | preempt_enable(); | 3259 | preempt_enable(); |
3249 | 3260 | ||
3250 | return err; | 3261 | return err; |
3251 | } | 3262 | } |
3252 | EXPORT_SYMBOL(netif_rx_ni); | 3263 | EXPORT_SYMBOL(netif_rx_ni); |
3253 | 3264 | ||
3254 | static void net_tx_action(struct softirq_action *h) | 3265 | static void net_tx_action(struct softirq_action *h) |
3255 | { | 3266 | { |
3256 | struct softnet_data *sd = &__get_cpu_var(softnet_data); | 3267 | struct softnet_data *sd = &__get_cpu_var(softnet_data); |
3257 | 3268 | ||
3258 | if (sd->completion_queue) { | 3269 | if (sd->completion_queue) { |
3259 | struct sk_buff *clist; | 3270 | struct sk_buff *clist; |
3260 | 3271 | ||
3261 | local_irq_disable(); | 3272 | local_irq_disable(); |
3262 | clist = sd->completion_queue; | 3273 | clist = sd->completion_queue; |
3263 | sd->completion_queue = NULL; | 3274 | sd->completion_queue = NULL; |
3264 | local_irq_enable(); | 3275 | local_irq_enable(); |
3265 | 3276 | ||
3266 | while (clist) { | 3277 | while (clist) { |
3267 | struct sk_buff *skb = clist; | 3278 | struct sk_buff *skb = clist; |
3268 | clist = clist->next; | 3279 | clist = clist->next; |
3269 | 3280 | ||
3270 | WARN_ON(atomic_read(&skb->users)); | 3281 | WARN_ON(atomic_read(&skb->users)); |
3271 | trace_kfree_skb(skb, net_tx_action); | 3282 | trace_kfree_skb(skb, net_tx_action); |
3272 | __kfree_skb(skb); | 3283 | __kfree_skb(skb); |
3273 | } | 3284 | } |
3274 | } | 3285 | } |
3275 | 3286 | ||
3276 | if (sd->output_queue) { | 3287 | if (sd->output_queue) { |
3277 | struct Qdisc *head; | 3288 | struct Qdisc *head; |
3278 | 3289 | ||
3279 | local_irq_disable(); | 3290 | local_irq_disable(); |
3280 | head = sd->output_queue; | 3291 | head = sd->output_queue; |
3281 | sd->output_queue = NULL; | 3292 | sd->output_queue = NULL; |
3282 | sd->output_queue_tailp = &sd->output_queue; | 3293 | sd->output_queue_tailp = &sd->output_queue; |
3283 | local_irq_enable(); | 3294 | local_irq_enable(); |
3284 | 3295 | ||
3285 | while (head) { | 3296 | while (head) { |
3286 | struct Qdisc *q = head; | 3297 | struct Qdisc *q = head; |
3287 | spinlock_t *root_lock; | 3298 | spinlock_t *root_lock; |
3288 | 3299 | ||
3289 | head = head->next_sched; | 3300 | head = head->next_sched; |
3290 | 3301 | ||
3291 | root_lock = qdisc_lock(q); | 3302 | root_lock = qdisc_lock(q); |
3292 | if (spin_trylock(root_lock)) { | 3303 | if (spin_trylock(root_lock)) { |
3293 | smp_mb__before_clear_bit(); | 3304 | smp_mb__before_clear_bit(); |
3294 | clear_bit(__QDISC_STATE_SCHED, | 3305 | clear_bit(__QDISC_STATE_SCHED, |
3295 | &q->state); | 3306 | &q->state); |
3296 | qdisc_run(q); | 3307 | qdisc_run(q); |
3297 | spin_unlock(root_lock); | 3308 | spin_unlock(root_lock); |
3298 | } else { | 3309 | } else { |
3299 | if (!test_bit(__QDISC_STATE_DEACTIVATED, | 3310 | if (!test_bit(__QDISC_STATE_DEACTIVATED, |
3300 | &q->state)) { | 3311 | &q->state)) { |
3301 | __netif_reschedule(q); | 3312 | __netif_reschedule(q); |
3302 | } else { | 3313 | } else { |
3303 | smp_mb__before_clear_bit(); | 3314 | smp_mb__before_clear_bit(); |
3304 | clear_bit(__QDISC_STATE_SCHED, | 3315 | clear_bit(__QDISC_STATE_SCHED, |
3305 | &q->state); | 3316 | &q->state); |
3306 | } | 3317 | } |
3307 | } | 3318 | } |
3308 | } | 3319 | } |
3309 | } | 3320 | } |
3310 | } | 3321 | } |
3311 | 3322 | ||
3312 | #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ | 3323 | #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ |
3313 | (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) | 3324 | (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) |
3314 | /* This hook is defined here for ATM LANE */ | 3325 | /* This hook is defined here for ATM LANE */ |
3315 | int (*br_fdb_test_addr_hook)(struct net_device *dev, | 3326 | int (*br_fdb_test_addr_hook)(struct net_device *dev, |
3316 | unsigned char *addr) __read_mostly; | 3327 | unsigned char *addr) __read_mostly; |
3317 | EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); | 3328 | EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); |
3318 | #endif | 3329 | #endif |
3319 | 3330 | ||
3320 | #ifdef CONFIG_NET_CLS_ACT | 3331 | #ifdef CONFIG_NET_CLS_ACT |
3321 | /* TODO: Maybe we should just force sch_ingress to be compiled in | 3332 | /* TODO: Maybe we should just force sch_ingress to be compiled in |
3322 | * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions | 3333 | * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions |
3323 | * a compare and 2 stores extra right now if we dont have it on | 3334 | * a compare and 2 stores extra right now if we dont have it on |
3324 | * but have CONFIG_NET_CLS_ACT | 3335 | * but have CONFIG_NET_CLS_ACT |
3325 | * NOTE: This doesn't stop any functionality; if you dont have | 3336 | * NOTE: This doesn't stop any functionality; if you dont have |
3326 | * the ingress scheduler, you just can't add policies on ingress. | 3337 | * the ingress scheduler, you just can't add policies on ingress. |
3327 | * | 3338 | * |
3328 | */ | 3339 | */ |
3329 | static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) | 3340 | static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) |
3330 | { | 3341 | { |
3331 | struct net_device *dev = skb->dev; | 3342 | struct net_device *dev = skb->dev; |
3332 | u32 ttl = G_TC_RTTL(skb->tc_verd); | 3343 | u32 ttl = G_TC_RTTL(skb->tc_verd); |
3333 | int result = TC_ACT_OK; | 3344 | int result = TC_ACT_OK; |
3334 | struct Qdisc *q; | 3345 | struct Qdisc *q; |
3335 | 3346 | ||
3336 | if (unlikely(MAX_RED_LOOP < ttl++)) { | 3347 | if (unlikely(MAX_RED_LOOP < ttl++)) { |
3337 | net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n", | 3348 | net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n", |
3338 | skb->skb_iif, dev->ifindex); | 3349 | skb->skb_iif, dev->ifindex); |
3339 | return TC_ACT_SHOT; | 3350 | return TC_ACT_SHOT; |
3340 | } | 3351 | } |
3341 | 3352 | ||
3342 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); | 3353 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); |
3343 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); | 3354 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); |
3344 | 3355 | ||
3345 | q = rxq->qdisc; | 3356 | q = rxq->qdisc; |
3346 | if (q != &noop_qdisc) { | 3357 | if (q != &noop_qdisc) { |
3347 | spin_lock(qdisc_lock(q)); | 3358 | spin_lock(qdisc_lock(q)); |
3348 | if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) | 3359 | if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) |
3349 | result = qdisc_enqueue_root(skb, q); | 3360 | result = qdisc_enqueue_root(skb, q); |
3350 | spin_unlock(qdisc_lock(q)); | 3361 | spin_unlock(qdisc_lock(q)); |
3351 | } | 3362 | } |
3352 | 3363 | ||
3353 | return result; | 3364 | return result; |
3354 | } | 3365 | } |
3355 | 3366 | ||
3356 | static inline struct sk_buff *handle_ing(struct sk_buff *skb, | 3367 | static inline struct sk_buff *handle_ing(struct sk_buff *skb, |
3357 | struct packet_type **pt_prev, | 3368 | struct packet_type **pt_prev, |
3358 | int *ret, struct net_device *orig_dev) | 3369 | int *ret, struct net_device *orig_dev) |
3359 | { | 3370 | { |
3360 | struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); | 3371 | struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); |
3361 | 3372 | ||
3362 | if (!rxq || rxq->qdisc == &noop_qdisc) | 3373 | if (!rxq || rxq->qdisc == &noop_qdisc) |
3363 | goto out; | 3374 | goto out; |
3364 | 3375 | ||
3365 | if (*pt_prev) { | 3376 | if (*pt_prev) { |
3366 | *ret = deliver_skb(skb, *pt_prev, orig_dev); | 3377 | *ret = deliver_skb(skb, *pt_prev, orig_dev); |
3367 | *pt_prev = NULL; | 3378 | *pt_prev = NULL; |
3368 | } | 3379 | } |
3369 | 3380 | ||
3370 | switch (ing_filter(skb, rxq)) { | 3381 | switch (ing_filter(skb, rxq)) { |
3371 | case TC_ACT_SHOT: | 3382 | case TC_ACT_SHOT: |
3372 | case TC_ACT_STOLEN: | 3383 | case TC_ACT_STOLEN: |
3373 | kfree_skb(skb); | 3384 | kfree_skb(skb); |
3374 | return NULL; | 3385 | return NULL; |
3375 | } | 3386 | } |
3376 | 3387 | ||
3377 | out: | 3388 | out: |
3378 | skb->tc_verd = 0; | 3389 | skb->tc_verd = 0; |
3379 | return skb; | 3390 | return skb; |
3380 | } | 3391 | } |
3381 | #endif | 3392 | #endif |
3382 | 3393 | ||
3383 | /** | 3394 | /** |
3384 | * netdev_rx_handler_register - register receive handler | 3395 | * netdev_rx_handler_register - register receive handler |
3385 | * @dev: device to register a handler for | 3396 | * @dev: device to register a handler for |
3386 | * @rx_handler: receive handler to register | 3397 | * @rx_handler: receive handler to register |
3387 | * @rx_handler_data: data pointer that is used by rx handler | 3398 | * @rx_handler_data: data pointer that is used by rx handler |
3388 | * | 3399 | * |
3389 | * Register a receive hander for a device. This handler will then be | 3400 | * Register a receive hander for a device. This handler will then be |
3390 | * called from __netif_receive_skb. A negative errno code is returned | 3401 | * called from __netif_receive_skb. A negative errno code is returned |
3391 | * on a failure. | 3402 | * on a failure. |
3392 | * | 3403 | * |
3393 | * The caller must hold the rtnl_mutex. | 3404 | * The caller must hold the rtnl_mutex. |
3394 | * | 3405 | * |
3395 | * For a general description of rx_handler, see enum rx_handler_result. | 3406 | * For a general description of rx_handler, see enum rx_handler_result. |
3396 | */ | 3407 | */ |
3397 | int netdev_rx_handler_register(struct net_device *dev, | 3408 | int netdev_rx_handler_register(struct net_device *dev, |
3398 | rx_handler_func_t *rx_handler, | 3409 | rx_handler_func_t *rx_handler, |
3399 | void *rx_handler_data) | 3410 | void *rx_handler_data) |
3400 | { | 3411 | { |
3401 | ASSERT_RTNL(); | 3412 | ASSERT_RTNL(); |
3402 | 3413 | ||
3403 | if (dev->rx_handler) | 3414 | if (dev->rx_handler) |
3404 | return -EBUSY; | 3415 | return -EBUSY; |
3405 | 3416 | ||
3406 | rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); | 3417 | rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); |
3407 | rcu_assign_pointer(dev->rx_handler, rx_handler); | 3418 | rcu_assign_pointer(dev->rx_handler, rx_handler); |
3408 | 3419 | ||
3409 | return 0; | 3420 | return 0; |
3410 | } | 3421 | } |
3411 | EXPORT_SYMBOL_GPL(netdev_rx_handler_register); | 3422 | EXPORT_SYMBOL_GPL(netdev_rx_handler_register); |
3412 | 3423 | ||
3413 | /** | 3424 | /** |
3414 | * netdev_rx_handler_unregister - unregister receive handler | 3425 | * netdev_rx_handler_unregister - unregister receive handler |
3415 | * @dev: device to unregister a handler from | 3426 | * @dev: device to unregister a handler from |
3416 | * | 3427 | * |
3417 | * Unregister a receive hander from a device. | 3428 | * Unregister a receive hander from a device. |
3418 | * | 3429 | * |
3419 | * The caller must hold the rtnl_mutex. | 3430 | * The caller must hold the rtnl_mutex. |
3420 | */ | 3431 | */ |
3421 | void netdev_rx_handler_unregister(struct net_device *dev) | 3432 | void netdev_rx_handler_unregister(struct net_device *dev) |
3422 | { | 3433 | { |
3423 | 3434 | ||
3424 | ASSERT_RTNL(); | 3435 | ASSERT_RTNL(); |
3425 | RCU_INIT_POINTER(dev->rx_handler, NULL); | 3436 | RCU_INIT_POINTER(dev->rx_handler, NULL); |
3426 | RCU_INIT_POINTER(dev->rx_handler_data, NULL); | 3437 | RCU_INIT_POINTER(dev->rx_handler_data, NULL); |
3427 | } | 3438 | } |
3428 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); | 3439 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); |
3429 | 3440 | ||
3430 | /* | 3441 | /* |
3431 | * Limit the use of PFMEMALLOC reserves to those protocols that implement | 3442 | * Limit the use of PFMEMALLOC reserves to those protocols that implement |
3432 | * the special handling of PFMEMALLOC skbs. | 3443 | * the special handling of PFMEMALLOC skbs. |
3433 | */ | 3444 | */ |
3434 | static bool skb_pfmemalloc_protocol(struct sk_buff *skb) | 3445 | static bool skb_pfmemalloc_protocol(struct sk_buff *skb) |
3435 | { | 3446 | { |
3436 | switch (skb->protocol) { | 3447 | switch (skb->protocol) { |
3437 | case __constant_htons(ETH_P_ARP): | 3448 | case __constant_htons(ETH_P_ARP): |
3438 | case __constant_htons(ETH_P_IP): | 3449 | case __constant_htons(ETH_P_IP): |
3439 | case __constant_htons(ETH_P_IPV6): | 3450 | case __constant_htons(ETH_P_IPV6): |
3440 | case __constant_htons(ETH_P_8021Q): | 3451 | case __constant_htons(ETH_P_8021Q): |
3441 | return true; | 3452 | return true; |
3442 | default: | 3453 | default: |
3443 | return false; | 3454 | return false; |
3444 | } | 3455 | } |
3445 | } | 3456 | } |
3446 | 3457 | ||
3447 | static int __netif_receive_skb(struct sk_buff *skb) | 3458 | static int __netif_receive_skb(struct sk_buff *skb) |
3448 | { | 3459 | { |
3449 | struct packet_type *ptype, *pt_prev; | 3460 | struct packet_type *ptype, *pt_prev; |
3450 | rx_handler_func_t *rx_handler; | 3461 | rx_handler_func_t *rx_handler; |
3451 | struct net_device *orig_dev; | 3462 | struct net_device *orig_dev; |
3452 | struct net_device *null_or_dev; | 3463 | struct net_device *null_or_dev; |
3453 | bool deliver_exact = false; | 3464 | bool deliver_exact = false; |
3454 | int ret = NET_RX_DROP; | 3465 | int ret = NET_RX_DROP; |
3455 | __be16 type; | 3466 | __be16 type; |
3456 | unsigned long pflags = current->flags; | 3467 | unsigned long pflags = current->flags; |
3457 | 3468 | ||
3458 | net_timestamp_check(!netdev_tstamp_prequeue, skb); | 3469 | net_timestamp_check(!netdev_tstamp_prequeue, skb); |
3459 | 3470 | ||
3460 | trace_netif_receive_skb(skb); | 3471 | trace_netif_receive_skb(skb); |
3461 | 3472 | ||
3462 | /* | 3473 | /* |
3463 | * PFMEMALLOC skbs are special, they should | 3474 | * PFMEMALLOC skbs are special, they should |
3464 | * - be delivered to SOCK_MEMALLOC sockets only | 3475 | * - be delivered to SOCK_MEMALLOC sockets only |
3465 | * - stay away from userspace | 3476 | * - stay away from userspace |
3466 | * - have bounded memory usage | 3477 | * - have bounded memory usage |
3467 | * | 3478 | * |
3468 | * Use PF_MEMALLOC as this saves us from propagating the allocation | 3479 | * Use PF_MEMALLOC as this saves us from propagating the allocation |
3469 | * context down to all allocation sites. | 3480 | * context down to all allocation sites. |
3470 | */ | 3481 | */ |
3471 | if (sk_memalloc_socks() && skb_pfmemalloc(skb)) | 3482 | if (sk_memalloc_socks() && skb_pfmemalloc(skb)) |
3472 | current->flags |= PF_MEMALLOC; | 3483 | current->flags |= PF_MEMALLOC; |
3473 | 3484 | ||
3474 | /* if we've gotten here through NAPI, check netpoll */ | 3485 | /* if we've gotten here through NAPI, check netpoll */ |
3475 | if (netpoll_receive_skb(skb)) | 3486 | if (netpoll_receive_skb(skb)) |
3476 | goto out; | 3487 | goto out; |
3477 | 3488 | ||
3478 | orig_dev = skb->dev; | 3489 | orig_dev = skb->dev; |
3479 | 3490 | ||
3480 | skb_reset_network_header(skb); | 3491 | skb_reset_network_header(skb); |
3481 | if (!skb_transport_header_was_set(skb)) | 3492 | if (!skb_transport_header_was_set(skb)) |
3482 | skb_reset_transport_header(skb); | 3493 | skb_reset_transport_header(skb); |
3483 | skb_reset_mac_len(skb); | 3494 | skb_reset_mac_len(skb); |
3484 | 3495 | ||
3485 | pt_prev = NULL; | 3496 | pt_prev = NULL; |
3486 | 3497 | ||
3487 | rcu_read_lock(); | 3498 | rcu_read_lock(); |
3488 | 3499 | ||
3489 | another_round: | 3500 | another_round: |
3490 | skb->skb_iif = skb->dev->ifindex; | 3501 | skb->skb_iif = skb->dev->ifindex; |
3491 | 3502 | ||
3492 | __this_cpu_inc(softnet_data.processed); | 3503 | __this_cpu_inc(softnet_data.processed); |
3493 | 3504 | ||
3494 | if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { | 3505 | if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { |
3495 | skb = vlan_untag(skb); | 3506 | skb = vlan_untag(skb); |
3496 | if (unlikely(!skb)) | 3507 | if (unlikely(!skb)) |
3497 | goto unlock; | 3508 | goto unlock; |
3498 | } | 3509 | } |
3499 | 3510 | ||
3500 | #ifdef CONFIG_NET_CLS_ACT | 3511 | #ifdef CONFIG_NET_CLS_ACT |
3501 | if (skb->tc_verd & TC_NCLS) { | 3512 | if (skb->tc_verd & TC_NCLS) { |
3502 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); | 3513 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); |
3503 | goto ncls; | 3514 | goto ncls; |
3504 | } | 3515 | } |
3505 | #endif | 3516 | #endif |
3506 | 3517 | ||
3507 | if (sk_memalloc_socks() && skb_pfmemalloc(skb)) | 3518 | if (sk_memalloc_socks() && skb_pfmemalloc(skb)) |
3508 | goto skip_taps; | 3519 | goto skip_taps; |
3509 | 3520 | ||
3510 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | 3521 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
3511 | if (!ptype->dev || ptype->dev == skb->dev) { | 3522 | if (!ptype->dev || ptype->dev == skb->dev) { |
3512 | if (pt_prev) | 3523 | if (pt_prev) |
3513 | ret = deliver_skb(skb, pt_prev, orig_dev); | 3524 | ret = deliver_skb(skb, pt_prev, orig_dev); |
3514 | pt_prev = ptype; | 3525 | pt_prev = ptype; |
3515 | } | 3526 | } |
3516 | } | 3527 | } |
3517 | 3528 | ||
3518 | skip_taps: | 3529 | skip_taps: |
3519 | #ifdef CONFIG_NET_CLS_ACT | 3530 | #ifdef CONFIG_NET_CLS_ACT |
3520 | skb = handle_ing(skb, &pt_prev, &ret, orig_dev); | 3531 | skb = handle_ing(skb, &pt_prev, &ret, orig_dev); |
3521 | if (!skb) | 3532 | if (!skb) |
3522 | goto unlock; | 3533 | goto unlock; |
3523 | ncls: | 3534 | ncls: |
3524 | #endif | 3535 | #endif |
3525 | 3536 | ||
3526 | if (sk_memalloc_socks() && skb_pfmemalloc(skb) | 3537 | if (sk_memalloc_socks() && skb_pfmemalloc(skb) |
3527 | && !skb_pfmemalloc_protocol(skb)) | 3538 | && !skb_pfmemalloc_protocol(skb)) |
3528 | goto drop; | 3539 | goto drop; |
3529 | 3540 | ||
3530 | if (vlan_tx_tag_present(skb)) { | 3541 | if (vlan_tx_tag_present(skb)) { |
3531 | if (pt_prev) { | 3542 | if (pt_prev) { |
3532 | ret = deliver_skb(skb, pt_prev, orig_dev); | 3543 | ret = deliver_skb(skb, pt_prev, orig_dev); |
3533 | pt_prev = NULL; | 3544 | pt_prev = NULL; |
3534 | } | 3545 | } |
3535 | if (vlan_do_receive(&skb)) | 3546 | if (vlan_do_receive(&skb)) |
3536 | goto another_round; | 3547 | goto another_round; |
3537 | else if (unlikely(!skb)) | 3548 | else if (unlikely(!skb)) |
3538 | goto unlock; | 3549 | goto unlock; |
3539 | } | 3550 | } |
3540 | 3551 | ||
3541 | rx_handler = rcu_dereference(skb->dev->rx_handler); | 3552 | rx_handler = rcu_dereference(skb->dev->rx_handler); |
3542 | if (rx_handler) { | 3553 | if (rx_handler) { |
3543 | if (pt_prev) { | 3554 | if (pt_prev) { |
3544 | ret = deliver_skb(skb, pt_prev, orig_dev); | 3555 | ret = deliver_skb(skb, pt_prev, orig_dev); |
3545 | pt_prev = NULL; | 3556 | pt_prev = NULL; |
3546 | } | 3557 | } |
3547 | switch (rx_handler(&skb)) { | 3558 | switch (rx_handler(&skb)) { |
3548 | case RX_HANDLER_CONSUMED: | 3559 | case RX_HANDLER_CONSUMED: |
3549 | goto unlock; | 3560 | goto unlock; |
3550 | case RX_HANDLER_ANOTHER: | 3561 | case RX_HANDLER_ANOTHER: |
3551 | goto another_round; | 3562 | goto another_round; |
3552 | case RX_HANDLER_EXACT: | 3563 | case RX_HANDLER_EXACT: |
3553 | deliver_exact = true; | 3564 | deliver_exact = true; |
3554 | case RX_HANDLER_PASS: | 3565 | case RX_HANDLER_PASS: |
3555 | break; | 3566 | break; |
3556 | default: | 3567 | default: |
3557 | BUG(); | 3568 | BUG(); |
3558 | } | 3569 | } |
3559 | } | 3570 | } |
3560 | 3571 | ||
3561 | if (vlan_tx_nonzero_tag_present(skb)) | 3572 | if (vlan_tx_nonzero_tag_present(skb)) |
3562 | skb->pkt_type = PACKET_OTHERHOST; | 3573 | skb->pkt_type = PACKET_OTHERHOST; |
3563 | 3574 | ||
3564 | /* deliver only exact match when indicated */ | 3575 | /* deliver only exact match when indicated */ |
3565 | null_or_dev = deliver_exact ? skb->dev : NULL; | 3576 | null_or_dev = deliver_exact ? skb->dev : NULL; |
3566 | 3577 | ||
3567 | type = skb->protocol; | 3578 | type = skb->protocol; |
3568 | list_for_each_entry_rcu(ptype, | 3579 | list_for_each_entry_rcu(ptype, |
3569 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { | 3580 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { |
3570 | if (ptype->type == type && | 3581 | if (ptype->type == type && |
3571 | (ptype->dev == null_or_dev || ptype->dev == skb->dev || | 3582 | (ptype->dev == null_or_dev || ptype->dev == skb->dev || |
3572 | ptype->dev == orig_dev)) { | 3583 | ptype->dev == orig_dev)) { |
3573 | if (pt_prev) | 3584 | if (pt_prev) |
3574 | ret = deliver_skb(skb, pt_prev, orig_dev); | 3585 | ret = deliver_skb(skb, pt_prev, orig_dev); |
3575 | pt_prev = ptype; | 3586 | pt_prev = ptype; |
3576 | } | 3587 | } |
3577 | } | 3588 | } |
3578 | 3589 | ||
3579 | if (pt_prev) { | 3590 | if (pt_prev) { |
3580 | if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) | 3591 | if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) |
3581 | goto drop; | 3592 | goto drop; |
3582 | else | 3593 | else |
3583 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | 3594 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
3584 | } else { | 3595 | } else { |
3585 | drop: | 3596 | drop: |
3586 | atomic_long_inc(&skb->dev->rx_dropped); | 3597 | atomic_long_inc(&skb->dev->rx_dropped); |
3587 | kfree_skb(skb); | 3598 | kfree_skb(skb); |
3588 | /* Jamal, now you will not able to escape explaining | 3599 | /* Jamal, now you will not able to escape explaining |
3589 | * me how you were going to use this. :-) | 3600 | * me how you were going to use this. :-) |
3590 | */ | 3601 | */ |
3591 | ret = NET_RX_DROP; | 3602 | ret = NET_RX_DROP; |
3592 | } | 3603 | } |
3593 | 3604 | ||
3594 | unlock: | 3605 | unlock: |
3595 | rcu_read_unlock(); | 3606 | rcu_read_unlock(); |
3596 | out: | 3607 | out: |
3597 | tsk_restore_flags(current, pflags, PF_MEMALLOC); | 3608 | tsk_restore_flags(current, pflags, PF_MEMALLOC); |
3598 | return ret; | 3609 | return ret; |
3599 | } | 3610 | } |
3600 | 3611 | ||
3601 | /** | 3612 | /** |
3602 | * netif_receive_skb - process receive buffer from network | 3613 | * netif_receive_skb - process receive buffer from network |
3603 | * @skb: buffer to process | 3614 | * @skb: buffer to process |
3604 | * | 3615 | * |
3605 | * netif_receive_skb() is the main receive data processing function. | 3616 | * netif_receive_skb() is the main receive data processing function. |
3606 | * It always succeeds. The buffer may be dropped during processing | 3617 | * It always succeeds. The buffer may be dropped during processing |
3607 | * for congestion control or by the protocol layers. | 3618 | * for congestion control or by the protocol layers. |
3608 | * | 3619 | * |
3609 | * This function may only be called from softirq context and interrupts | 3620 | * This function may only be called from softirq context and interrupts |
3610 | * should be enabled. | 3621 | * should be enabled. |
3611 | * | 3622 | * |
3612 | * Return values (usually ignored): | 3623 | * Return values (usually ignored): |
3613 | * NET_RX_SUCCESS: no congestion | 3624 | * NET_RX_SUCCESS: no congestion |
3614 | * NET_RX_DROP: packet was dropped | 3625 | * NET_RX_DROP: packet was dropped |
3615 | */ | 3626 | */ |
3616 | int netif_receive_skb(struct sk_buff *skb) | 3627 | int netif_receive_skb(struct sk_buff *skb) |
3617 | { | 3628 | { |
3618 | net_timestamp_check(netdev_tstamp_prequeue, skb); | 3629 | net_timestamp_check(netdev_tstamp_prequeue, skb); |
3619 | 3630 | ||
3620 | if (skb_defer_rx_timestamp(skb)) | 3631 | if (skb_defer_rx_timestamp(skb)) |
3621 | return NET_RX_SUCCESS; | 3632 | return NET_RX_SUCCESS; |
3622 | 3633 | ||
3623 | #ifdef CONFIG_RPS | 3634 | #ifdef CONFIG_RPS |
3624 | if (static_key_false(&rps_needed)) { | 3635 | if (static_key_false(&rps_needed)) { |
3625 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 3636 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
3626 | int cpu, ret; | 3637 | int cpu, ret; |
3627 | 3638 | ||
3628 | rcu_read_lock(); | 3639 | rcu_read_lock(); |
3629 | 3640 | ||
3630 | cpu = get_rps_cpu(skb->dev, skb, &rflow); | 3641 | cpu = get_rps_cpu(skb->dev, skb, &rflow); |
3631 | 3642 | ||
3632 | if (cpu >= 0) { | 3643 | if (cpu >= 0) { |
3633 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); | 3644 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
3634 | rcu_read_unlock(); | 3645 | rcu_read_unlock(); |
3635 | return ret; | 3646 | return ret; |
3636 | } | 3647 | } |
3637 | rcu_read_unlock(); | 3648 | rcu_read_unlock(); |
3638 | } | 3649 | } |
3639 | #endif | 3650 | #endif |
3640 | return __netif_receive_skb(skb); | 3651 | return __netif_receive_skb(skb); |
3641 | } | 3652 | } |
3642 | EXPORT_SYMBOL(netif_receive_skb); | 3653 | EXPORT_SYMBOL(netif_receive_skb); |
3643 | 3654 | ||
3644 | /* Network device is going away, flush any packets still pending | 3655 | /* Network device is going away, flush any packets still pending |
3645 | * Called with irqs disabled. | 3656 | * Called with irqs disabled. |
3646 | */ | 3657 | */ |
3647 | static void flush_backlog(void *arg) | 3658 | static void flush_backlog(void *arg) |
3648 | { | 3659 | { |
3649 | struct net_device *dev = arg; | 3660 | struct net_device *dev = arg; |
3650 | struct softnet_data *sd = &__get_cpu_var(softnet_data); | 3661 | struct softnet_data *sd = &__get_cpu_var(softnet_data); |
3651 | struct sk_buff *skb, *tmp; | 3662 | struct sk_buff *skb, *tmp; |
3652 | 3663 | ||
3653 | rps_lock(sd); | 3664 | rps_lock(sd); |
3654 | skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { | 3665 | skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { |
3655 | if (skb->dev == dev) { | 3666 | if (skb->dev == dev) { |
3656 | __skb_unlink(skb, &sd->input_pkt_queue); | 3667 | __skb_unlink(skb, &sd->input_pkt_queue); |
3657 | kfree_skb(skb); | 3668 | kfree_skb(skb); |
3658 | input_queue_head_incr(sd); | 3669 | input_queue_head_incr(sd); |
3659 | } | 3670 | } |
3660 | } | 3671 | } |
3661 | rps_unlock(sd); | 3672 | rps_unlock(sd); |
3662 | 3673 | ||
3663 | skb_queue_walk_safe(&sd->process_queue, skb, tmp) { | 3674 | skb_queue_walk_safe(&sd->process_queue, skb, tmp) { |
3664 | if (skb->dev == dev) { | 3675 | if (skb->dev == dev) { |
3665 | __skb_unlink(skb, &sd->process_queue); | 3676 | __skb_unlink(skb, &sd->process_queue); |
3666 | kfree_skb(skb); | 3677 | kfree_skb(skb); |
3667 | input_queue_head_incr(sd); | 3678 | input_queue_head_incr(sd); |
3668 | } | 3679 | } |
3669 | } | 3680 | } |
3670 | } | 3681 | } |
3671 | 3682 | ||
3672 | static int napi_gro_complete(struct sk_buff *skb) | 3683 | static int napi_gro_complete(struct sk_buff *skb) |
3673 | { | 3684 | { |
3674 | struct packet_offload *ptype; | 3685 | struct packet_offload *ptype; |
3675 | __be16 type = skb->protocol; | 3686 | __be16 type = skb->protocol; |
3676 | struct list_head *head = &offload_base; | 3687 | struct list_head *head = &offload_base; |
3677 | int err = -ENOENT; | 3688 | int err = -ENOENT; |
3678 | 3689 | ||
3679 | BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); | 3690 | BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); |
3680 | 3691 | ||
3681 | if (NAPI_GRO_CB(skb)->count == 1) { | 3692 | if (NAPI_GRO_CB(skb)->count == 1) { |
3682 | skb_shinfo(skb)->gso_size = 0; | 3693 | skb_shinfo(skb)->gso_size = 0; |
3683 | goto out; | 3694 | goto out; |
3684 | } | 3695 | } |
3685 | 3696 | ||
3686 | rcu_read_lock(); | 3697 | rcu_read_lock(); |
3687 | list_for_each_entry_rcu(ptype, head, list) { | 3698 | list_for_each_entry_rcu(ptype, head, list) { |
3688 | if (ptype->type != type || !ptype->callbacks.gro_complete) | 3699 | if (ptype->type != type || !ptype->callbacks.gro_complete) |
3689 | continue; | 3700 | continue; |
3690 | 3701 | ||
3691 | err = ptype->callbacks.gro_complete(skb); | 3702 | err = ptype->callbacks.gro_complete(skb); |
3692 | break; | 3703 | break; |
3693 | } | 3704 | } |
3694 | rcu_read_unlock(); | 3705 | rcu_read_unlock(); |
3695 | 3706 | ||
3696 | if (err) { | 3707 | if (err) { |
3697 | WARN_ON(&ptype->list == head); | 3708 | WARN_ON(&ptype->list == head); |
3698 | kfree_skb(skb); | 3709 | kfree_skb(skb); |
3699 | return NET_RX_SUCCESS; | 3710 | return NET_RX_SUCCESS; |
3700 | } | 3711 | } |
3701 | 3712 | ||
3702 | out: | 3713 | out: |
3703 | return netif_receive_skb(skb); | 3714 | return netif_receive_skb(skb); |
3704 | } | 3715 | } |
3705 | 3716 | ||
3706 | /* napi->gro_list contains packets ordered by age. | 3717 | /* napi->gro_list contains packets ordered by age. |
3707 | * youngest packets at the head of it. | 3718 | * youngest packets at the head of it. |
3708 | * Complete skbs in reverse order to reduce latencies. | 3719 | * Complete skbs in reverse order to reduce latencies. |
3709 | */ | 3720 | */ |
3710 | void napi_gro_flush(struct napi_struct *napi, bool flush_old) | 3721 | void napi_gro_flush(struct napi_struct *napi, bool flush_old) |
3711 | { | 3722 | { |
3712 | struct sk_buff *skb, *prev = NULL; | 3723 | struct sk_buff *skb, *prev = NULL; |
3713 | 3724 | ||
3714 | /* scan list and build reverse chain */ | 3725 | /* scan list and build reverse chain */ |
3715 | for (skb = napi->gro_list; skb != NULL; skb = skb->next) { | 3726 | for (skb = napi->gro_list; skb != NULL; skb = skb->next) { |
3716 | skb->prev = prev; | 3727 | skb->prev = prev; |
3717 | prev = skb; | 3728 | prev = skb; |
3718 | } | 3729 | } |
3719 | 3730 | ||
3720 | for (skb = prev; skb; skb = prev) { | 3731 | for (skb = prev; skb; skb = prev) { |
3721 | skb->next = NULL; | 3732 | skb->next = NULL; |
3722 | 3733 | ||
3723 | if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) | 3734 | if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) |
3724 | return; | 3735 | return; |
3725 | 3736 | ||
3726 | prev = skb->prev; | 3737 | prev = skb->prev; |
3727 | napi_gro_complete(skb); | 3738 | napi_gro_complete(skb); |
3728 | napi->gro_count--; | 3739 | napi->gro_count--; |
3729 | } | 3740 | } |
3730 | 3741 | ||
3731 | napi->gro_list = NULL; | 3742 | napi->gro_list = NULL; |
3732 | } | 3743 | } |
3733 | EXPORT_SYMBOL(napi_gro_flush); | 3744 | EXPORT_SYMBOL(napi_gro_flush); |
3734 | 3745 | ||
3735 | static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) | 3746 | static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) |
3736 | { | 3747 | { |
3737 | struct sk_buff *p; | 3748 | struct sk_buff *p; |
3738 | unsigned int maclen = skb->dev->hard_header_len; | 3749 | unsigned int maclen = skb->dev->hard_header_len; |
3739 | 3750 | ||
3740 | for (p = napi->gro_list; p; p = p->next) { | 3751 | for (p = napi->gro_list; p; p = p->next) { |
3741 | unsigned long diffs; | 3752 | unsigned long diffs; |
3742 | 3753 | ||
3743 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; | 3754 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; |
3744 | diffs |= p->vlan_tci ^ skb->vlan_tci; | 3755 | diffs |= p->vlan_tci ^ skb->vlan_tci; |
3745 | if (maclen == ETH_HLEN) | 3756 | if (maclen == ETH_HLEN) |
3746 | diffs |= compare_ether_header(skb_mac_header(p), | 3757 | diffs |= compare_ether_header(skb_mac_header(p), |
3747 | skb_gro_mac_header(skb)); | 3758 | skb_gro_mac_header(skb)); |
3748 | else if (!diffs) | 3759 | else if (!diffs) |
3749 | diffs = memcmp(skb_mac_header(p), | 3760 | diffs = memcmp(skb_mac_header(p), |
3750 | skb_gro_mac_header(skb), | 3761 | skb_gro_mac_header(skb), |
3751 | maclen); | 3762 | maclen); |
3752 | NAPI_GRO_CB(p)->same_flow = !diffs; | 3763 | NAPI_GRO_CB(p)->same_flow = !diffs; |
3753 | NAPI_GRO_CB(p)->flush = 0; | 3764 | NAPI_GRO_CB(p)->flush = 0; |
3754 | } | 3765 | } |
3755 | } | 3766 | } |
3756 | 3767 | ||
3757 | static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 3768 | static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
3758 | { | 3769 | { |
3759 | struct sk_buff **pp = NULL; | 3770 | struct sk_buff **pp = NULL; |
3760 | struct packet_offload *ptype; | 3771 | struct packet_offload *ptype; |
3761 | __be16 type = skb->protocol; | 3772 | __be16 type = skb->protocol; |
3762 | struct list_head *head = &offload_base; | 3773 | struct list_head *head = &offload_base; |
3763 | int same_flow; | 3774 | int same_flow; |
3764 | int mac_len; | 3775 | int mac_len; |
3765 | enum gro_result ret; | 3776 | enum gro_result ret; |
3766 | 3777 | ||
3767 | if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) | 3778 | if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) |
3768 | goto normal; | 3779 | goto normal; |
3769 | 3780 | ||
3770 | if (skb_is_gso(skb) || skb_has_frag_list(skb)) | 3781 | if (skb_is_gso(skb) || skb_has_frag_list(skb)) |
3771 | goto normal; | 3782 | goto normal; |
3772 | 3783 | ||
3773 | gro_list_prepare(napi, skb); | 3784 | gro_list_prepare(napi, skb); |
3774 | 3785 | ||
3775 | rcu_read_lock(); | 3786 | rcu_read_lock(); |
3776 | list_for_each_entry_rcu(ptype, head, list) { | 3787 | list_for_each_entry_rcu(ptype, head, list) { |
3777 | if (ptype->type != type || !ptype->callbacks.gro_receive) | 3788 | if (ptype->type != type || !ptype->callbacks.gro_receive) |
3778 | continue; | 3789 | continue; |
3779 | 3790 | ||
3780 | skb_set_network_header(skb, skb_gro_offset(skb)); | 3791 | skb_set_network_header(skb, skb_gro_offset(skb)); |
3781 | mac_len = skb->network_header - skb->mac_header; | 3792 | mac_len = skb->network_header - skb->mac_header; |
3782 | skb->mac_len = mac_len; | 3793 | skb->mac_len = mac_len; |
3783 | NAPI_GRO_CB(skb)->same_flow = 0; | 3794 | NAPI_GRO_CB(skb)->same_flow = 0; |
3784 | NAPI_GRO_CB(skb)->flush = 0; | 3795 | NAPI_GRO_CB(skb)->flush = 0; |
3785 | NAPI_GRO_CB(skb)->free = 0; | 3796 | NAPI_GRO_CB(skb)->free = 0; |
3786 | 3797 | ||
3787 | pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); | 3798 | pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); |
3788 | break; | 3799 | break; |
3789 | } | 3800 | } |
3790 | rcu_read_unlock(); | 3801 | rcu_read_unlock(); |
3791 | 3802 | ||
3792 | if (&ptype->list == head) | 3803 | if (&ptype->list == head) |
3793 | goto normal; | 3804 | goto normal; |
3794 | 3805 | ||
3795 | same_flow = NAPI_GRO_CB(skb)->same_flow; | 3806 | same_flow = NAPI_GRO_CB(skb)->same_flow; |
3796 | ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; | 3807 | ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; |
3797 | 3808 | ||
3798 | if (pp) { | 3809 | if (pp) { |
3799 | struct sk_buff *nskb = *pp; | 3810 | struct sk_buff *nskb = *pp; |
3800 | 3811 | ||
3801 | *pp = nskb->next; | 3812 | *pp = nskb->next; |
3802 | nskb->next = NULL; | 3813 | nskb->next = NULL; |
3803 | napi_gro_complete(nskb); | 3814 | napi_gro_complete(nskb); |
3804 | napi->gro_count--; | 3815 | napi->gro_count--; |
3805 | } | 3816 | } |
3806 | 3817 | ||
3807 | if (same_flow) | 3818 | if (same_flow) |
3808 | goto ok; | 3819 | goto ok; |
3809 | 3820 | ||
3810 | if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) | 3821 | if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) |
3811 | goto normal; | 3822 | goto normal; |
3812 | 3823 | ||
3813 | napi->gro_count++; | 3824 | napi->gro_count++; |
3814 | NAPI_GRO_CB(skb)->count = 1; | 3825 | NAPI_GRO_CB(skb)->count = 1; |
3815 | NAPI_GRO_CB(skb)->age = jiffies; | 3826 | NAPI_GRO_CB(skb)->age = jiffies; |
3816 | skb_shinfo(skb)->gso_size = skb_gro_len(skb); | 3827 | skb_shinfo(skb)->gso_size = skb_gro_len(skb); |
3817 | skb->next = napi->gro_list; | 3828 | skb->next = napi->gro_list; |
3818 | napi->gro_list = skb; | 3829 | napi->gro_list = skb; |
3819 | ret = GRO_HELD; | 3830 | ret = GRO_HELD; |
3820 | 3831 | ||
3821 | pull: | 3832 | pull: |
3822 | if (skb_headlen(skb) < skb_gro_offset(skb)) { | 3833 | if (skb_headlen(skb) < skb_gro_offset(skb)) { |
3823 | int grow = skb_gro_offset(skb) - skb_headlen(skb); | 3834 | int grow = skb_gro_offset(skb) - skb_headlen(skb); |
3824 | 3835 | ||
3825 | BUG_ON(skb->end - skb->tail < grow); | 3836 | BUG_ON(skb->end - skb->tail < grow); |
3826 | 3837 | ||
3827 | memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); | 3838 | memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); |
3828 | 3839 | ||
3829 | skb->tail += grow; | 3840 | skb->tail += grow; |
3830 | skb->data_len -= grow; | 3841 | skb->data_len -= grow; |
3831 | 3842 | ||
3832 | skb_shinfo(skb)->frags[0].page_offset += grow; | 3843 | skb_shinfo(skb)->frags[0].page_offset += grow; |
3833 | skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow); | 3844 | skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow); |
3834 | 3845 | ||
3835 | if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) { | 3846 | if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) { |
3836 | skb_frag_unref(skb, 0); | 3847 | skb_frag_unref(skb, 0); |
3837 | memmove(skb_shinfo(skb)->frags, | 3848 | memmove(skb_shinfo(skb)->frags, |
3838 | skb_shinfo(skb)->frags + 1, | 3849 | skb_shinfo(skb)->frags + 1, |
3839 | --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); | 3850 | --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); |
3840 | } | 3851 | } |
3841 | } | 3852 | } |
3842 | 3853 | ||
3843 | ok: | 3854 | ok: |
3844 | return ret; | 3855 | return ret; |
3845 | 3856 | ||
3846 | normal: | 3857 | normal: |
3847 | ret = GRO_NORMAL; | 3858 | ret = GRO_NORMAL; |
3848 | goto pull; | 3859 | goto pull; |
3849 | } | 3860 | } |
3850 | 3861 | ||
3851 | 3862 | ||
3852 | static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) | 3863 | static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) |
3853 | { | 3864 | { |
3854 | switch (ret) { | 3865 | switch (ret) { |
3855 | case GRO_NORMAL: | 3866 | case GRO_NORMAL: |
3856 | if (netif_receive_skb(skb)) | 3867 | if (netif_receive_skb(skb)) |
3857 | ret = GRO_DROP; | 3868 | ret = GRO_DROP; |
3858 | break; | 3869 | break; |
3859 | 3870 | ||
3860 | case GRO_DROP: | 3871 | case GRO_DROP: |
3861 | kfree_skb(skb); | 3872 | kfree_skb(skb); |
3862 | break; | 3873 | break; |
3863 | 3874 | ||
3864 | case GRO_MERGED_FREE: | 3875 | case GRO_MERGED_FREE: |
3865 | if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) | 3876 | if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) |
3866 | kmem_cache_free(skbuff_head_cache, skb); | 3877 | kmem_cache_free(skbuff_head_cache, skb); |
3867 | else | 3878 | else |
3868 | __kfree_skb(skb); | 3879 | __kfree_skb(skb); |
3869 | break; | 3880 | break; |
3870 | 3881 | ||
3871 | case GRO_HELD: | 3882 | case GRO_HELD: |
3872 | case GRO_MERGED: | 3883 | case GRO_MERGED: |
3873 | break; | 3884 | break; |
3874 | } | 3885 | } |
3875 | 3886 | ||
3876 | return ret; | 3887 | return ret; |
3877 | } | 3888 | } |
3878 | 3889 | ||
3879 | static void skb_gro_reset_offset(struct sk_buff *skb) | 3890 | static void skb_gro_reset_offset(struct sk_buff *skb) |
3880 | { | 3891 | { |
3881 | const struct skb_shared_info *pinfo = skb_shinfo(skb); | 3892 | const struct skb_shared_info *pinfo = skb_shinfo(skb); |
3882 | const skb_frag_t *frag0 = &pinfo->frags[0]; | 3893 | const skb_frag_t *frag0 = &pinfo->frags[0]; |
3883 | 3894 | ||
3884 | NAPI_GRO_CB(skb)->data_offset = 0; | 3895 | NAPI_GRO_CB(skb)->data_offset = 0; |
3885 | NAPI_GRO_CB(skb)->frag0 = NULL; | 3896 | NAPI_GRO_CB(skb)->frag0 = NULL; |
3886 | NAPI_GRO_CB(skb)->frag0_len = 0; | 3897 | NAPI_GRO_CB(skb)->frag0_len = 0; |
3887 | 3898 | ||
3888 | if (skb->mac_header == skb->tail && | 3899 | if (skb->mac_header == skb->tail && |
3889 | pinfo->nr_frags && | 3900 | pinfo->nr_frags && |
3890 | !PageHighMem(skb_frag_page(frag0))) { | 3901 | !PageHighMem(skb_frag_page(frag0))) { |
3891 | NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); | 3902 | NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); |
3892 | NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); | 3903 | NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); |
3893 | } | 3904 | } |
3894 | } | 3905 | } |
3895 | 3906 | ||
3896 | gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 3907 | gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
3897 | { | 3908 | { |
3898 | skb_gro_reset_offset(skb); | 3909 | skb_gro_reset_offset(skb); |
3899 | 3910 | ||
3900 | return napi_skb_finish(dev_gro_receive(napi, skb), skb); | 3911 | return napi_skb_finish(dev_gro_receive(napi, skb), skb); |
3901 | } | 3912 | } |
3902 | EXPORT_SYMBOL(napi_gro_receive); | 3913 | EXPORT_SYMBOL(napi_gro_receive); |
3903 | 3914 | ||
3904 | static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) | 3915 | static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) |
3905 | { | 3916 | { |
3906 | __skb_pull(skb, skb_headlen(skb)); | 3917 | __skb_pull(skb, skb_headlen(skb)); |
3907 | /* restore the reserve we had after netdev_alloc_skb_ip_align() */ | 3918 | /* restore the reserve we had after netdev_alloc_skb_ip_align() */ |
3908 | skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); | 3919 | skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); |
3909 | skb->vlan_tci = 0; | 3920 | skb->vlan_tci = 0; |
3910 | skb->dev = napi->dev; | 3921 | skb->dev = napi->dev; |
3911 | skb->skb_iif = 0; | 3922 | skb->skb_iif = 0; |
3912 | 3923 | ||
3913 | napi->skb = skb; | 3924 | napi->skb = skb; |
3914 | } | 3925 | } |
3915 | 3926 | ||
3916 | struct sk_buff *napi_get_frags(struct napi_struct *napi) | 3927 | struct sk_buff *napi_get_frags(struct napi_struct *napi) |
3917 | { | 3928 | { |
3918 | struct sk_buff *skb = napi->skb; | 3929 | struct sk_buff *skb = napi->skb; |
3919 | 3930 | ||
3920 | if (!skb) { | 3931 | if (!skb) { |
3921 | skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); | 3932 | skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); |
3922 | if (skb) | 3933 | if (skb) |
3923 | napi->skb = skb; | 3934 | napi->skb = skb; |
3924 | } | 3935 | } |
3925 | return skb; | 3936 | return skb; |
3926 | } | 3937 | } |
3927 | EXPORT_SYMBOL(napi_get_frags); | 3938 | EXPORT_SYMBOL(napi_get_frags); |
3928 | 3939 | ||
3929 | static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, | 3940 | static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, |
3930 | gro_result_t ret) | 3941 | gro_result_t ret) |
3931 | { | 3942 | { |
3932 | switch (ret) { | 3943 | switch (ret) { |
3933 | case GRO_NORMAL: | 3944 | case GRO_NORMAL: |
3934 | case GRO_HELD: | 3945 | case GRO_HELD: |
3935 | skb->protocol = eth_type_trans(skb, skb->dev); | 3946 | skb->protocol = eth_type_trans(skb, skb->dev); |
3936 | 3947 | ||
3937 | if (ret == GRO_HELD) | 3948 | if (ret == GRO_HELD) |
3938 | skb_gro_pull(skb, -ETH_HLEN); | 3949 | skb_gro_pull(skb, -ETH_HLEN); |
3939 | else if (netif_receive_skb(skb)) | 3950 | else if (netif_receive_skb(skb)) |
3940 | ret = GRO_DROP; | 3951 | ret = GRO_DROP; |
3941 | break; | 3952 | break; |
3942 | 3953 | ||
3943 | case GRO_DROP: | 3954 | case GRO_DROP: |
3944 | case GRO_MERGED_FREE: | 3955 | case GRO_MERGED_FREE: |
3945 | napi_reuse_skb(napi, skb); | 3956 | napi_reuse_skb(napi, skb); |
3946 | break; | 3957 | break; |
3947 | 3958 | ||
3948 | case GRO_MERGED: | 3959 | case GRO_MERGED: |
3949 | break; | 3960 | break; |
3950 | } | 3961 | } |
3951 | 3962 | ||
3952 | return ret; | 3963 | return ret; |
3953 | } | 3964 | } |
3954 | 3965 | ||
3955 | static struct sk_buff *napi_frags_skb(struct napi_struct *napi) | 3966 | static struct sk_buff *napi_frags_skb(struct napi_struct *napi) |
3956 | { | 3967 | { |
3957 | struct sk_buff *skb = napi->skb; | 3968 | struct sk_buff *skb = napi->skb; |
3958 | struct ethhdr *eth; | 3969 | struct ethhdr *eth; |
3959 | unsigned int hlen; | 3970 | unsigned int hlen; |
3960 | unsigned int off; | 3971 | unsigned int off; |
3961 | 3972 | ||
3962 | napi->skb = NULL; | 3973 | napi->skb = NULL; |
3963 | 3974 | ||
3964 | skb_reset_mac_header(skb); | 3975 | skb_reset_mac_header(skb); |
3965 | skb_gro_reset_offset(skb); | 3976 | skb_gro_reset_offset(skb); |
3966 | 3977 | ||
3967 | off = skb_gro_offset(skb); | 3978 | off = skb_gro_offset(skb); |
3968 | hlen = off + sizeof(*eth); | 3979 | hlen = off + sizeof(*eth); |
3969 | eth = skb_gro_header_fast(skb, off); | 3980 | eth = skb_gro_header_fast(skb, off); |
3970 | if (skb_gro_header_hard(skb, hlen)) { | 3981 | if (skb_gro_header_hard(skb, hlen)) { |
3971 | eth = skb_gro_header_slow(skb, hlen, off); | 3982 | eth = skb_gro_header_slow(skb, hlen, off); |
3972 | if (unlikely(!eth)) { | 3983 | if (unlikely(!eth)) { |
3973 | napi_reuse_skb(napi, skb); | 3984 | napi_reuse_skb(napi, skb); |
3974 | skb = NULL; | 3985 | skb = NULL; |
3975 | goto out; | 3986 | goto out; |
3976 | } | 3987 | } |
3977 | } | 3988 | } |
3978 | 3989 | ||
3979 | skb_gro_pull(skb, sizeof(*eth)); | 3990 | skb_gro_pull(skb, sizeof(*eth)); |
3980 | 3991 | ||
3981 | /* | 3992 | /* |
3982 | * This works because the only protocols we care about don't require | 3993 | * This works because the only protocols we care about don't require |
3983 | * special handling. We'll fix it up properly at the end. | 3994 | * special handling. We'll fix it up properly at the end. |
3984 | */ | 3995 | */ |
3985 | skb->protocol = eth->h_proto; | 3996 | skb->protocol = eth->h_proto; |
3986 | 3997 | ||
3987 | out: | 3998 | out: |
3988 | return skb; | 3999 | return skb; |
3989 | } | 4000 | } |
3990 | 4001 | ||
3991 | gro_result_t napi_gro_frags(struct napi_struct *napi) | 4002 | gro_result_t napi_gro_frags(struct napi_struct *napi) |
3992 | { | 4003 | { |
3993 | struct sk_buff *skb = napi_frags_skb(napi); | 4004 | struct sk_buff *skb = napi_frags_skb(napi); |
3994 | 4005 | ||
3995 | if (!skb) | 4006 | if (!skb) |
3996 | return GRO_DROP; | 4007 | return GRO_DROP; |
3997 | 4008 | ||
3998 | return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); | 4009 | return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); |
3999 | } | 4010 | } |
4000 | EXPORT_SYMBOL(napi_gro_frags); | 4011 | EXPORT_SYMBOL(napi_gro_frags); |
4001 | 4012 | ||
4002 | /* | 4013 | /* |
4003 | * net_rps_action sends any pending IPI's for rps. | 4014 | * net_rps_action sends any pending IPI's for rps. |
4004 | * Note: called with local irq disabled, but exits with local irq enabled. | 4015 | * Note: called with local irq disabled, but exits with local irq enabled. |
4005 | */ | 4016 | */ |
4006 | static void net_rps_action_and_irq_enable(struct softnet_data *sd) | 4017 | static void net_rps_action_and_irq_enable(struct softnet_data *sd) |
4007 | { | 4018 | { |
4008 | #ifdef CONFIG_RPS | 4019 | #ifdef CONFIG_RPS |
4009 | struct softnet_data *remsd = sd->rps_ipi_list; | 4020 | struct softnet_data *remsd = sd->rps_ipi_list; |
4010 | 4021 | ||
4011 | if (remsd) { | 4022 | if (remsd) { |
4012 | sd->rps_ipi_list = NULL; | 4023 | sd->rps_ipi_list = NULL; |
4013 | 4024 | ||
4014 | local_irq_enable(); | 4025 | local_irq_enable(); |
4015 | 4026 | ||
4016 | /* Send pending IPI's to kick RPS processing on remote cpus. */ | 4027 | /* Send pending IPI's to kick RPS processing on remote cpus. */ |
4017 | while (remsd) { | 4028 | while (remsd) { |
4018 | struct softnet_data *next = remsd->rps_ipi_next; | 4029 | struct softnet_data *next = remsd->rps_ipi_next; |
4019 | 4030 | ||
4020 | if (cpu_online(remsd->cpu)) | 4031 | if (cpu_online(remsd->cpu)) |
4021 | __smp_call_function_single(remsd->cpu, | 4032 | __smp_call_function_single(remsd->cpu, |
4022 | &remsd->csd, 0); | 4033 | &remsd->csd, 0); |
4023 | remsd = next; | 4034 | remsd = next; |
4024 | } | 4035 | } |
4025 | } else | 4036 | } else |
4026 | #endif | 4037 | #endif |
4027 | local_irq_enable(); | 4038 | local_irq_enable(); |
4028 | } | 4039 | } |
4029 | 4040 | ||
4030 | static int process_backlog(struct napi_struct *napi, int quota) | 4041 | static int process_backlog(struct napi_struct *napi, int quota) |
4031 | { | 4042 | { |
4032 | int work = 0; | 4043 | int work = 0; |
4033 | struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); | 4044 | struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); |
4034 | 4045 | ||
4035 | #ifdef CONFIG_RPS | 4046 | #ifdef CONFIG_RPS |
4036 | /* Check if we have pending ipi, its better to send them now, | 4047 | /* Check if we have pending ipi, its better to send them now, |
4037 | * not waiting net_rx_action() end. | 4048 | * not waiting net_rx_action() end. |
4038 | */ | 4049 | */ |
4039 | if (sd->rps_ipi_list) { | 4050 | if (sd->rps_ipi_list) { |
4040 | local_irq_disable(); | 4051 | local_irq_disable(); |
4041 | net_rps_action_and_irq_enable(sd); | 4052 | net_rps_action_and_irq_enable(sd); |
4042 | } | 4053 | } |
4043 | #endif | 4054 | #endif |
4044 | napi->weight = weight_p; | 4055 | napi->weight = weight_p; |
4045 | local_irq_disable(); | 4056 | local_irq_disable(); |
4046 | while (work < quota) { | 4057 | while (work < quota) { |
4047 | struct sk_buff *skb; | 4058 | struct sk_buff *skb; |
4048 | unsigned int qlen; | 4059 | unsigned int qlen; |
4049 | 4060 | ||
4050 | while ((skb = __skb_dequeue(&sd->process_queue))) { | 4061 | while ((skb = __skb_dequeue(&sd->process_queue))) { |
4051 | local_irq_enable(); | 4062 | local_irq_enable(); |
4052 | __netif_receive_skb(skb); | 4063 | __netif_receive_skb(skb); |
4053 | local_irq_disable(); | 4064 | local_irq_disable(); |
4054 | input_queue_head_incr(sd); | 4065 | input_queue_head_incr(sd); |
4055 | if (++work >= quota) { | 4066 | if (++work >= quota) { |
4056 | local_irq_enable(); | 4067 | local_irq_enable(); |
4057 | return work; | 4068 | return work; |
4058 | } | 4069 | } |
4059 | } | 4070 | } |
4060 | 4071 | ||
4061 | rps_lock(sd); | 4072 | rps_lock(sd); |
4062 | qlen = skb_queue_len(&sd->input_pkt_queue); | 4073 | qlen = skb_queue_len(&sd->input_pkt_queue); |
4063 | if (qlen) | 4074 | if (qlen) |
4064 | skb_queue_splice_tail_init(&sd->input_pkt_queue, | 4075 | skb_queue_splice_tail_init(&sd->input_pkt_queue, |
4065 | &sd->process_queue); | 4076 | &sd->process_queue); |
4066 | 4077 | ||
4067 | if (qlen < quota - work) { | 4078 | if (qlen < quota - work) { |
4068 | /* | 4079 | /* |
4069 | * Inline a custom version of __napi_complete(). | 4080 | * Inline a custom version of __napi_complete(). |
4070 | * only current cpu owns and manipulates this napi, | 4081 | * only current cpu owns and manipulates this napi, |
4071 | * and NAPI_STATE_SCHED is the only possible flag set on backlog. | 4082 | * and NAPI_STATE_SCHED is the only possible flag set on backlog. |
4072 | * we can use a plain write instead of clear_bit(), | 4083 | * we can use a plain write instead of clear_bit(), |
4073 | * and we dont need an smp_mb() memory barrier. | 4084 | * and we dont need an smp_mb() memory barrier. |
4074 | */ | 4085 | */ |
4075 | list_del(&napi->poll_list); | 4086 | list_del(&napi->poll_list); |
4076 | napi->state = 0; | 4087 | napi->state = 0; |
4077 | 4088 | ||
4078 | quota = work + qlen; | 4089 | quota = work + qlen; |
4079 | } | 4090 | } |
4080 | rps_unlock(sd); | 4091 | rps_unlock(sd); |
4081 | } | 4092 | } |
4082 | local_irq_enable(); | 4093 | local_irq_enable(); |
4083 | 4094 | ||
4084 | return work; | 4095 | return work; |
4085 | } | 4096 | } |
4086 | 4097 | ||
4087 | /** | 4098 | /** |
4088 | * __napi_schedule - schedule for receive | 4099 | * __napi_schedule - schedule for receive |
4089 | * @n: entry to schedule | 4100 | * @n: entry to schedule |
4090 | * | 4101 | * |
4091 | * The entry's receive function will be scheduled to run | 4102 | * The entry's receive function will be scheduled to run |
4092 | */ | 4103 | */ |
4093 | void __napi_schedule(struct napi_struct *n) | 4104 | void __napi_schedule(struct napi_struct *n) |
4094 | { | 4105 | { |
4095 | unsigned long flags; | 4106 | unsigned long flags; |
4096 | 4107 | ||
4097 | local_irq_save(flags); | 4108 | local_irq_save(flags); |
4098 | ____napi_schedule(&__get_cpu_var(softnet_data), n); | 4109 | ____napi_schedule(&__get_cpu_var(softnet_data), n); |
4099 | local_irq_restore(flags); | 4110 | local_irq_restore(flags); |
4100 | } | 4111 | } |
4101 | EXPORT_SYMBOL(__napi_schedule); | 4112 | EXPORT_SYMBOL(__napi_schedule); |
4102 | 4113 | ||
4103 | void __napi_complete(struct napi_struct *n) | 4114 | void __napi_complete(struct napi_struct *n) |
4104 | { | 4115 | { |
4105 | BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); | 4116 | BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); |
4106 | BUG_ON(n->gro_list); | 4117 | BUG_ON(n->gro_list); |
4107 | 4118 | ||
4108 | list_del(&n->poll_list); | 4119 | list_del(&n->poll_list); |
4109 | smp_mb__before_clear_bit(); | 4120 | smp_mb__before_clear_bit(); |
4110 | clear_bit(NAPI_STATE_SCHED, &n->state); | 4121 | clear_bit(NAPI_STATE_SCHED, &n->state); |
4111 | } | 4122 | } |
4112 | EXPORT_SYMBOL(__napi_complete); | 4123 | EXPORT_SYMBOL(__napi_complete); |
4113 | 4124 | ||
4114 | void napi_complete(struct napi_struct *n) | 4125 | void napi_complete(struct napi_struct *n) |
4115 | { | 4126 | { |
4116 | unsigned long flags; | 4127 | unsigned long flags; |
4117 | 4128 | ||
4118 | /* | 4129 | /* |
4119 | * don't let napi dequeue from the cpu poll list | 4130 | * don't let napi dequeue from the cpu poll list |
4120 | * just in case its running on a different cpu | 4131 | * just in case its running on a different cpu |
4121 | */ | 4132 | */ |
4122 | if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) | 4133 | if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) |
4123 | return; | 4134 | return; |
4124 | 4135 | ||
4125 | napi_gro_flush(n, false); | 4136 | napi_gro_flush(n, false); |
4126 | local_irq_save(flags); | 4137 | local_irq_save(flags); |
4127 | __napi_complete(n); | 4138 | __napi_complete(n); |
4128 | local_irq_restore(flags); | 4139 | local_irq_restore(flags); |
4129 | } | 4140 | } |
4130 | EXPORT_SYMBOL(napi_complete); | 4141 | EXPORT_SYMBOL(napi_complete); |
4131 | 4142 | ||
4132 | void netif_napi_add(struct net_device *dev, struct napi_struct *napi, | 4143 | void netif_napi_add(struct net_device *dev, struct napi_struct *napi, |
4133 | int (*poll)(struct napi_struct *, int), int weight) | 4144 | int (*poll)(struct napi_struct *, int), int weight) |
4134 | { | 4145 | { |
4135 | INIT_LIST_HEAD(&napi->poll_list); | 4146 | INIT_LIST_HEAD(&napi->poll_list); |
4136 | napi->gro_count = 0; | 4147 | napi->gro_count = 0; |
4137 | napi->gro_list = NULL; | 4148 | napi->gro_list = NULL; |
4138 | napi->skb = NULL; | 4149 | napi->skb = NULL; |
4139 | napi->poll = poll; | 4150 | napi->poll = poll; |
4140 | napi->weight = weight; | 4151 | napi->weight = weight; |
4141 | list_add(&napi->dev_list, &dev->napi_list); | 4152 | list_add(&napi->dev_list, &dev->napi_list); |
4142 | napi->dev = dev; | 4153 | napi->dev = dev; |
4143 | #ifdef CONFIG_NETPOLL | 4154 | #ifdef CONFIG_NETPOLL |
4144 | spin_lock_init(&napi->poll_lock); | 4155 | spin_lock_init(&napi->poll_lock); |
4145 | napi->poll_owner = -1; | 4156 | napi->poll_owner = -1; |
4146 | #endif | 4157 | #endif |
4147 | set_bit(NAPI_STATE_SCHED, &napi->state); | 4158 | set_bit(NAPI_STATE_SCHED, &napi->state); |
4148 | } | 4159 | } |
4149 | EXPORT_SYMBOL(netif_napi_add); | 4160 | EXPORT_SYMBOL(netif_napi_add); |
4150 | 4161 | ||
4151 | void netif_napi_del(struct napi_struct *napi) | 4162 | void netif_napi_del(struct napi_struct *napi) |
4152 | { | 4163 | { |
4153 | struct sk_buff *skb, *next; | 4164 | struct sk_buff *skb, *next; |
4154 | 4165 | ||
4155 | list_del_init(&napi->dev_list); | 4166 | list_del_init(&napi->dev_list); |
4156 | napi_free_frags(napi); | 4167 | napi_free_frags(napi); |
4157 | 4168 | ||
4158 | for (skb = napi->gro_list; skb; skb = next) { | 4169 | for (skb = napi->gro_list; skb; skb = next) { |
4159 | next = skb->next; | 4170 | next = skb->next; |
4160 | skb->next = NULL; | 4171 | skb->next = NULL; |
4161 | kfree_skb(skb); | 4172 | kfree_skb(skb); |
4162 | } | 4173 | } |
4163 | 4174 | ||
4164 | napi->gro_list = NULL; | 4175 | napi->gro_list = NULL; |
4165 | napi->gro_count = 0; | 4176 | napi->gro_count = 0; |
4166 | } | 4177 | } |
4167 | EXPORT_SYMBOL(netif_napi_del); | 4178 | EXPORT_SYMBOL(netif_napi_del); |
4168 | 4179 | ||
4169 | static void net_rx_action(struct softirq_action *h) | 4180 | static void net_rx_action(struct softirq_action *h) |
4170 | { | 4181 | { |
4171 | struct softnet_data *sd = &__get_cpu_var(softnet_data); | 4182 | struct softnet_data *sd = &__get_cpu_var(softnet_data); |
4172 | unsigned long time_limit = jiffies + 2; | 4183 | unsigned long time_limit = jiffies + 2; |
4173 | int budget = netdev_budget; | 4184 | int budget = netdev_budget; |
4174 | void *have; | 4185 | void *have; |
4175 | 4186 | ||
4176 | local_irq_disable(); | 4187 | local_irq_disable(); |
4177 | 4188 | ||
4178 | while (!list_empty(&sd->poll_list)) { | 4189 | while (!list_empty(&sd->poll_list)) { |
4179 | struct napi_struct *n; | 4190 | struct napi_struct *n; |
4180 | int work, weight; | 4191 | int work, weight; |
4181 | 4192 | ||
4182 | /* If softirq window is exhuasted then punt. | 4193 | /* If softirq window is exhuasted then punt. |
4183 | * Allow this to run for 2 jiffies since which will allow | 4194 | * Allow this to run for 2 jiffies since which will allow |
4184 | * an average latency of 1.5/HZ. | 4195 | * an average latency of 1.5/HZ. |
4185 | */ | 4196 | */ |
4186 | if (unlikely(budget <= 0 || time_after(jiffies, time_limit))) | 4197 | if (unlikely(budget <= 0 || time_after(jiffies, time_limit))) |
4187 | goto softnet_break; | 4198 | goto softnet_break; |
4188 | 4199 | ||
4189 | local_irq_enable(); | 4200 | local_irq_enable(); |
4190 | 4201 | ||
4191 | /* Even though interrupts have been re-enabled, this | 4202 | /* Even though interrupts have been re-enabled, this |
4192 | * access is safe because interrupts can only add new | 4203 | * access is safe because interrupts can only add new |
4193 | * entries to the tail of this list, and only ->poll() | 4204 | * entries to the tail of this list, and only ->poll() |
4194 | * calls can remove this head entry from the list. | 4205 | * calls can remove this head entry from the list. |
4195 | */ | 4206 | */ |
4196 | n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list); | 4207 | n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list); |
4197 | 4208 | ||
4198 | have = netpoll_poll_lock(n); | 4209 | have = netpoll_poll_lock(n); |
4199 | 4210 | ||
4200 | weight = n->weight; | 4211 | weight = n->weight; |
4201 | 4212 | ||
4202 | /* This NAPI_STATE_SCHED test is for avoiding a race | 4213 | /* This NAPI_STATE_SCHED test is for avoiding a race |
4203 | * with netpoll's poll_napi(). Only the entity which | 4214 | * with netpoll's poll_napi(). Only the entity which |
4204 | * obtains the lock and sees NAPI_STATE_SCHED set will | 4215 | * obtains the lock and sees NAPI_STATE_SCHED set will |
4205 | * actually make the ->poll() call. Therefore we avoid | 4216 | * actually make the ->poll() call. Therefore we avoid |
4206 | * accidentally calling ->poll() when NAPI is not scheduled. | 4217 | * accidentally calling ->poll() when NAPI is not scheduled. |
4207 | */ | 4218 | */ |
4208 | work = 0; | 4219 | work = 0; |
4209 | if (test_bit(NAPI_STATE_SCHED, &n->state)) { | 4220 | if (test_bit(NAPI_STATE_SCHED, &n->state)) { |
4210 | work = n->poll(n, weight); | 4221 | work = n->poll(n, weight); |
4211 | trace_napi_poll(n); | 4222 | trace_napi_poll(n); |
4212 | } | 4223 | } |
4213 | 4224 | ||
4214 | WARN_ON_ONCE(work > weight); | 4225 | WARN_ON_ONCE(work > weight); |
4215 | 4226 | ||
4216 | budget -= work; | 4227 | budget -= work; |
4217 | 4228 | ||
4218 | local_irq_disable(); | 4229 | local_irq_disable(); |
4219 | 4230 | ||
4220 | /* Drivers must not modify the NAPI state if they | 4231 | /* Drivers must not modify the NAPI state if they |
4221 | * consume the entire weight. In such cases this code | 4232 | * consume the entire weight. In such cases this code |
4222 | * still "owns" the NAPI instance and therefore can | 4233 | * still "owns" the NAPI instance and therefore can |
4223 | * move the instance around on the list at-will. | 4234 | * move the instance around on the list at-will. |
4224 | */ | 4235 | */ |
4225 | if (unlikely(work == weight)) { | 4236 | if (unlikely(work == weight)) { |
4226 | if (unlikely(napi_disable_pending(n))) { | 4237 | if (unlikely(napi_disable_pending(n))) { |
4227 | local_irq_enable(); | 4238 | local_irq_enable(); |
4228 | napi_complete(n); | 4239 | napi_complete(n); |
4229 | local_irq_disable(); | 4240 | local_irq_disable(); |
4230 | } else { | 4241 | } else { |
4231 | if (n->gro_list) { | 4242 | if (n->gro_list) { |
4232 | /* flush too old packets | 4243 | /* flush too old packets |
4233 | * If HZ < 1000, flush all packets. | 4244 | * If HZ < 1000, flush all packets. |
4234 | */ | 4245 | */ |
4235 | local_irq_enable(); | 4246 | local_irq_enable(); |
4236 | napi_gro_flush(n, HZ >= 1000); | 4247 | napi_gro_flush(n, HZ >= 1000); |
4237 | local_irq_disable(); | 4248 | local_irq_disable(); |
4238 | } | 4249 | } |
4239 | list_move_tail(&n->poll_list, &sd->poll_list); | 4250 | list_move_tail(&n->poll_list, &sd->poll_list); |
4240 | } | 4251 | } |
4241 | } | 4252 | } |
4242 | 4253 | ||
4243 | netpoll_poll_unlock(have); | 4254 | netpoll_poll_unlock(have); |
4244 | } | 4255 | } |
4245 | out: | 4256 | out: |
4246 | net_rps_action_and_irq_enable(sd); | 4257 | net_rps_action_and_irq_enable(sd); |
4247 | 4258 | ||
4248 | #ifdef CONFIG_NET_DMA | 4259 | #ifdef CONFIG_NET_DMA |
4249 | /* | 4260 | /* |
4250 | * There may not be any more sk_buffs coming right now, so push | 4261 | * There may not be any more sk_buffs coming right now, so push |
4251 | * any pending DMA copies to hardware | 4262 | * any pending DMA copies to hardware |
4252 | */ | 4263 | */ |
4253 | dma_issue_pending_all(); | 4264 | dma_issue_pending_all(); |
4254 | #endif | 4265 | #endif |
4255 | 4266 | ||
4256 | return; | 4267 | return; |
4257 | 4268 | ||
4258 | softnet_break: | 4269 | softnet_break: |
4259 | sd->time_squeeze++; | 4270 | sd->time_squeeze++; |
4260 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | 4271 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
4261 | goto out; | 4272 | goto out; |
4262 | } | 4273 | } |
4263 | 4274 | ||
4264 | static gifconf_func_t *gifconf_list[NPROTO]; | 4275 | static gifconf_func_t *gifconf_list[NPROTO]; |
4265 | 4276 | ||
4266 | /** | 4277 | /** |
4267 | * register_gifconf - register a SIOCGIF handler | 4278 | * register_gifconf - register a SIOCGIF handler |
4268 | * @family: Address family | 4279 | * @family: Address family |
4269 | * @gifconf: Function handler | 4280 | * @gifconf: Function handler |
4270 | * | 4281 | * |
4271 | * Register protocol dependent address dumping routines. The handler | 4282 | * Register protocol dependent address dumping routines. The handler |
4272 | * that is passed must not be freed or reused until it has been replaced | 4283 | * that is passed must not be freed or reused until it has been replaced |
4273 | * by another handler. | 4284 | * by another handler. |
4274 | */ | 4285 | */ |
4275 | int register_gifconf(unsigned int family, gifconf_func_t *gifconf) | 4286 | int register_gifconf(unsigned int family, gifconf_func_t *gifconf) |
4276 | { | 4287 | { |
4277 | if (family >= NPROTO) | 4288 | if (family >= NPROTO) |
4278 | return -EINVAL; | 4289 | return -EINVAL; |
4279 | gifconf_list[family] = gifconf; | 4290 | gifconf_list[family] = gifconf; |
4280 | return 0; | 4291 | return 0; |
4281 | } | 4292 | } |
4282 | EXPORT_SYMBOL(register_gifconf); | 4293 | EXPORT_SYMBOL(register_gifconf); |
4283 | 4294 | ||
4284 | 4295 | ||
4285 | /* | 4296 | /* |
4286 | * Map an interface index to its name (SIOCGIFNAME) | 4297 | * Map an interface index to its name (SIOCGIFNAME) |
4287 | */ | 4298 | */ |
4288 | 4299 | ||
4289 | /* | 4300 | /* |
4290 | * We need this ioctl for efficient implementation of the | 4301 | * We need this ioctl for efficient implementation of the |
4291 | * if_indextoname() function required by the IPv6 API. Without | 4302 | * if_indextoname() function required by the IPv6 API. Without |
4292 | * it, we would have to search all the interfaces to find a | 4303 | * it, we would have to search all the interfaces to find a |
4293 | * match. --pb | 4304 | * match. --pb |
4294 | */ | 4305 | */ |
4295 | 4306 | ||
4296 | static int dev_ifname(struct net *net, struct ifreq __user *arg) | 4307 | static int dev_ifname(struct net *net, struct ifreq __user *arg) |
4297 | { | 4308 | { |
4298 | struct net_device *dev; | 4309 | struct net_device *dev; |
4299 | struct ifreq ifr; | 4310 | struct ifreq ifr; |
4300 | unsigned seq; | 4311 | unsigned seq; |
4301 | 4312 | ||
4302 | /* | 4313 | /* |
4303 | * Fetch the caller's info block. | 4314 | * Fetch the caller's info block. |
4304 | */ | 4315 | */ |
4305 | 4316 | ||
4306 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) | 4317 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) |
4307 | return -EFAULT; | 4318 | return -EFAULT; |
4308 | 4319 | ||
4309 | retry: | 4320 | retry: |
4310 | seq = read_seqcount_begin(&devnet_rename_seq); | 4321 | seq = read_seqcount_begin(&devnet_rename_seq); |
4311 | rcu_read_lock(); | 4322 | rcu_read_lock(); |
4312 | dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); | 4323 | dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); |
4313 | if (!dev) { | 4324 | if (!dev) { |
4314 | rcu_read_unlock(); | 4325 | rcu_read_unlock(); |
4315 | return -ENODEV; | 4326 | return -ENODEV; |
4316 | } | 4327 | } |
4317 | 4328 | ||
4318 | strcpy(ifr.ifr_name, dev->name); | 4329 | strcpy(ifr.ifr_name, dev->name); |
4319 | rcu_read_unlock(); | 4330 | rcu_read_unlock(); |
4320 | if (read_seqcount_retry(&devnet_rename_seq, seq)) | 4331 | if (read_seqcount_retry(&devnet_rename_seq, seq)) |
4321 | goto retry; | 4332 | goto retry; |
4322 | 4333 | ||
4323 | if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) | 4334 | if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) |
4324 | return -EFAULT; | 4335 | return -EFAULT; |
4325 | return 0; | 4336 | return 0; |
4326 | } | 4337 | } |
4327 | 4338 | ||
4328 | /* | 4339 | /* |
4329 | * Perform a SIOCGIFCONF call. This structure will change | 4340 | * Perform a SIOCGIFCONF call. This structure will change |
4330 | * size eventually, and there is nothing I can do about it. | 4341 | * size eventually, and there is nothing I can do about it. |
4331 | * Thus we will need a 'compatibility mode'. | 4342 | * Thus we will need a 'compatibility mode'. |
4332 | */ | 4343 | */ |
4333 | 4344 | ||
4334 | static int dev_ifconf(struct net *net, char __user *arg) | 4345 | static int dev_ifconf(struct net *net, char __user *arg) |
4335 | { | 4346 | { |
4336 | struct ifconf ifc; | 4347 | struct ifconf ifc; |
4337 | struct net_device *dev; | 4348 | struct net_device *dev; |
4338 | char __user *pos; | 4349 | char __user *pos; |
4339 | int len; | 4350 | int len; |
4340 | int total; | 4351 | int total; |
4341 | int i; | 4352 | int i; |
4342 | 4353 | ||
4343 | /* | 4354 | /* |
4344 | * Fetch the caller's info block. | 4355 | * Fetch the caller's info block. |
4345 | */ | 4356 | */ |
4346 | 4357 | ||
4347 | if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) | 4358 | if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) |
4348 | return -EFAULT; | 4359 | return -EFAULT; |
4349 | 4360 | ||
4350 | pos = ifc.ifc_buf; | 4361 | pos = ifc.ifc_buf; |
4351 | len = ifc.ifc_len; | 4362 | len = ifc.ifc_len; |
4352 | 4363 | ||
4353 | /* | 4364 | /* |
4354 | * Loop over the interfaces, and write an info block for each. | 4365 | * Loop over the interfaces, and write an info block for each. |
4355 | */ | 4366 | */ |
4356 | 4367 | ||
4357 | total = 0; | 4368 | total = 0; |
4358 | for_each_netdev(net, dev) { | 4369 | for_each_netdev(net, dev) { |
4359 | for (i = 0; i < NPROTO; i++) { | 4370 | for (i = 0; i < NPROTO; i++) { |
4360 | if (gifconf_list[i]) { | 4371 | if (gifconf_list[i]) { |
4361 | int done; | 4372 | int done; |
4362 | if (!pos) | 4373 | if (!pos) |
4363 | done = gifconf_list[i](dev, NULL, 0); | 4374 | done = gifconf_list[i](dev, NULL, 0); |
4364 | else | 4375 | else |
4365 | done = gifconf_list[i](dev, pos + total, | 4376 | done = gifconf_list[i](dev, pos + total, |
4366 | len - total); | 4377 | len - total); |
4367 | if (done < 0) | 4378 | if (done < 0) |
4368 | return -EFAULT; | 4379 | return -EFAULT; |
4369 | total += done; | 4380 | total += done; |
4370 | } | 4381 | } |
4371 | } | 4382 | } |
4372 | } | 4383 | } |
4373 | 4384 | ||
4374 | /* | 4385 | /* |
4375 | * All done. Write the updated control block back to the caller. | 4386 | * All done. Write the updated control block back to the caller. |
4376 | */ | 4387 | */ |
4377 | ifc.ifc_len = total; | 4388 | ifc.ifc_len = total; |
4378 | 4389 | ||
4379 | /* | 4390 | /* |
4380 | * Both BSD and Solaris return 0 here, so we do too. | 4391 | * Both BSD and Solaris return 0 here, so we do too. |
4381 | */ | 4392 | */ |
4382 | return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; | 4393 | return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; |
4383 | } | 4394 | } |
4384 | 4395 | ||
4385 | #ifdef CONFIG_PROC_FS | 4396 | #ifdef CONFIG_PROC_FS |
4386 | 4397 | ||
4387 | #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) | 4398 | #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) |
4388 | 4399 | ||
4389 | #define get_bucket(x) ((x) >> BUCKET_SPACE) | 4400 | #define get_bucket(x) ((x) >> BUCKET_SPACE) |
4390 | #define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) | 4401 | #define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) |
4391 | #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) | 4402 | #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) |
4392 | 4403 | ||
4393 | static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) | 4404 | static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) |
4394 | { | 4405 | { |
4395 | struct net *net = seq_file_net(seq); | 4406 | struct net *net = seq_file_net(seq); |
4396 | struct net_device *dev; | 4407 | struct net_device *dev; |
4397 | struct hlist_node *p; | 4408 | struct hlist_node *p; |
4398 | struct hlist_head *h; | 4409 | struct hlist_head *h; |
4399 | unsigned int count = 0, offset = get_offset(*pos); | 4410 | unsigned int count = 0, offset = get_offset(*pos); |
4400 | 4411 | ||
4401 | h = &net->dev_name_head[get_bucket(*pos)]; | 4412 | h = &net->dev_name_head[get_bucket(*pos)]; |
4402 | hlist_for_each_entry_rcu(dev, p, h, name_hlist) { | 4413 | hlist_for_each_entry_rcu(dev, p, h, name_hlist) { |
4403 | if (++count == offset) | 4414 | if (++count == offset) |
4404 | return dev; | 4415 | return dev; |
4405 | } | 4416 | } |
4406 | 4417 | ||
4407 | return NULL; | 4418 | return NULL; |
4408 | } | 4419 | } |
4409 | 4420 | ||
4410 | static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) | 4421 | static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) |
4411 | { | 4422 | { |
4412 | struct net_device *dev; | 4423 | struct net_device *dev; |
4413 | unsigned int bucket; | 4424 | unsigned int bucket; |
4414 | 4425 | ||
4415 | do { | 4426 | do { |
4416 | dev = dev_from_same_bucket(seq, pos); | 4427 | dev = dev_from_same_bucket(seq, pos); |
4417 | if (dev) | 4428 | if (dev) |
4418 | return dev; | 4429 | return dev; |
4419 | 4430 | ||
4420 | bucket = get_bucket(*pos) + 1; | 4431 | bucket = get_bucket(*pos) + 1; |
4421 | *pos = set_bucket_offset(bucket, 1); | 4432 | *pos = set_bucket_offset(bucket, 1); |
4422 | } while (bucket < NETDEV_HASHENTRIES); | 4433 | } while (bucket < NETDEV_HASHENTRIES); |
4423 | 4434 | ||
4424 | return NULL; | 4435 | return NULL; |
4425 | } | 4436 | } |
4426 | 4437 | ||
4427 | /* | 4438 | /* |
4428 | * This is invoked by the /proc filesystem handler to display a device | 4439 | * This is invoked by the /proc filesystem handler to display a device |
4429 | * in detail. | 4440 | * in detail. |
4430 | */ | 4441 | */ |
4431 | void *dev_seq_start(struct seq_file *seq, loff_t *pos) | 4442 | void *dev_seq_start(struct seq_file *seq, loff_t *pos) |
4432 | __acquires(RCU) | 4443 | __acquires(RCU) |
4433 | { | 4444 | { |
4434 | rcu_read_lock(); | 4445 | rcu_read_lock(); |
4435 | if (!*pos) | 4446 | if (!*pos) |
4436 | return SEQ_START_TOKEN; | 4447 | return SEQ_START_TOKEN; |
4437 | 4448 | ||
4438 | if (get_bucket(*pos) >= NETDEV_HASHENTRIES) | 4449 | if (get_bucket(*pos) >= NETDEV_HASHENTRIES) |
4439 | return NULL; | 4450 | return NULL; |
4440 | 4451 | ||
4441 | return dev_from_bucket(seq, pos); | 4452 | return dev_from_bucket(seq, pos); |
4442 | } | 4453 | } |
4443 | 4454 | ||
4444 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 4455 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
4445 | { | 4456 | { |
4446 | ++*pos; | 4457 | ++*pos; |
4447 | return dev_from_bucket(seq, pos); | 4458 | return dev_from_bucket(seq, pos); |
4448 | } | 4459 | } |
4449 | 4460 | ||
4450 | void dev_seq_stop(struct seq_file *seq, void *v) | 4461 | void dev_seq_stop(struct seq_file *seq, void *v) |
4451 | __releases(RCU) | 4462 | __releases(RCU) |
4452 | { | 4463 | { |
4453 | rcu_read_unlock(); | 4464 | rcu_read_unlock(); |
4454 | } | 4465 | } |
4455 | 4466 | ||
4456 | static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) | 4467 | static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) |
4457 | { | 4468 | { |
4458 | struct rtnl_link_stats64 temp; | 4469 | struct rtnl_link_stats64 temp; |
4459 | const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); | 4470 | const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); |
4460 | 4471 | ||
4461 | seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " | 4472 | seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " |
4462 | "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", | 4473 | "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", |
4463 | dev->name, stats->rx_bytes, stats->rx_packets, | 4474 | dev->name, stats->rx_bytes, stats->rx_packets, |
4464 | stats->rx_errors, | 4475 | stats->rx_errors, |
4465 | stats->rx_dropped + stats->rx_missed_errors, | 4476 | stats->rx_dropped + stats->rx_missed_errors, |
4466 | stats->rx_fifo_errors, | 4477 | stats->rx_fifo_errors, |
4467 | stats->rx_length_errors + stats->rx_over_errors + | 4478 | stats->rx_length_errors + stats->rx_over_errors + |
4468 | stats->rx_crc_errors + stats->rx_frame_errors, | 4479 | stats->rx_crc_errors + stats->rx_frame_errors, |
4469 | stats->rx_compressed, stats->multicast, | 4480 | stats->rx_compressed, stats->multicast, |
4470 | stats->tx_bytes, stats->tx_packets, | 4481 | stats->tx_bytes, stats->tx_packets, |
4471 | stats->tx_errors, stats->tx_dropped, | 4482 | stats->tx_errors, stats->tx_dropped, |
4472 | stats->tx_fifo_errors, stats->collisions, | 4483 | stats->tx_fifo_errors, stats->collisions, |
4473 | stats->tx_carrier_errors + | 4484 | stats->tx_carrier_errors + |
4474 | stats->tx_aborted_errors + | 4485 | stats->tx_aborted_errors + |
4475 | stats->tx_window_errors + | 4486 | stats->tx_window_errors + |
4476 | stats->tx_heartbeat_errors, | 4487 | stats->tx_heartbeat_errors, |
4477 | stats->tx_compressed); | 4488 | stats->tx_compressed); |
4478 | } | 4489 | } |
4479 | 4490 | ||
4480 | /* | 4491 | /* |
4481 | * Called from the PROCfs module. This now uses the new arbitrary sized | 4492 | * Called from the PROCfs module. This now uses the new arbitrary sized |
4482 | * /proc/net interface to create /proc/net/dev | 4493 | * /proc/net interface to create /proc/net/dev |
4483 | */ | 4494 | */ |
4484 | static int dev_seq_show(struct seq_file *seq, void *v) | 4495 | static int dev_seq_show(struct seq_file *seq, void *v) |
4485 | { | 4496 | { |
4486 | if (v == SEQ_START_TOKEN) | 4497 | if (v == SEQ_START_TOKEN) |
4487 | seq_puts(seq, "Inter-| Receive " | 4498 | seq_puts(seq, "Inter-| Receive " |
4488 | " | Transmit\n" | 4499 | " | Transmit\n" |
4489 | " face |bytes packets errs drop fifo frame " | 4500 | " face |bytes packets errs drop fifo frame " |
4490 | "compressed multicast|bytes packets errs " | 4501 | "compressed multicast|bytes packets errs " |
4491 | "drop fifo colls carrier compressed\n"); | 4502 | "drop fifo colls carrier compressed\n"); |
4492 | else | 4503 | else |
4493 | dev_seq_printf_stats(seq, v); | 4504 | dev_seq_printf_stats(seq, v); |
4494 | return 0; | 4505 | return 0; |
4495 | } | 4506 | } |
4496 | 4507 | ||
4497 | static struct softnet_data *softnet_get_online(loff_t *pos) | 4508 | static struct softnet_data *softnet_get_online(loff_t *pos) |
4498 | { | 4509 | { |
4499 | struct softnet_data *sd = NULL; | 4510 | struct softnet_data *sd = NULL; |
4500 | 4511 | ||
4501 | while (*pos < nr_cpu_ids) | 4512 | while (*pos < nr_cpu_ids) |
4502 | if (cpu_online(*pos)) { | 4513 | if (cpu_online(*pos)) { |
4503 | sd = &per_cpu(softnet_data, *pos); | 4514 | sd = &per_cpu(softnet_data, *pos); |
4504 | break; | 4515 | break; |
4505 | } else | 4516 | } else |
4506 | ++*pos; | 4517 | ++*pos; |
4507 | return sd; | 4518 | return sd; |
4508 | } | 4519 | } |
4509 | 4520 | ||
4510 | static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) | 4521 | static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) |
4511 | { | 4522 | { |
4512 | return softnet_get_online(pos); | 4523 | return softnet_get_online(pos); |
4513 | } | 4524 | } |
4514 | 4525 | ||
4515 | static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 4526 | static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
4516 | { | 4527 | { |
4517 | ++*pos; | 4528 | ++*pos; |
4518 | return softnet_get_online(pos); | 4529 | return softnet_get_online(pos); |
4519 | } | 4530 | } |
4520 | 4531 | ||
4521 | static void softnet_seq_stop(struct seq_file *seq, void *v) | 4532 | static void softnet_seq_stop(struct seq_file *seq, void *v) |
4522 | { | 4533 | { |
4523 | } | 4534 | } |
4524 | 4535 | ||
4525 | static int softnet_seq_show(struct seq_file *seq, void *v) | 4536 | static int softnet_seq_show(struct seq_file *seq, void *v) |
4526 | { | 4537 | { |
4527 | struct softnet_data *sd = v; | 4538 | struct softnet_data *sd = v; |
4528 | 4539 | ||
4529 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", | 4540 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", |
4530 | sd->processed, sd->dropped, sd->time_squeeze, 0, | 4541 | sd->processed, sd->dropped, sd->time_squeeze, 0, |
4531 | 0, 0, 0, 0, /* was fastroute */ | 4542 | 0, 0, 0, 0, /* was fastroute */ |
4532 | sd->cpu_collision, sd->received_rps); | 4543 | sd->cpu_collision, sd->received_rps); |
4533 | return 0; | 4544 | return 0; |
4534 | } | 4545 | } |
4535 | 4546 | ||
4536 | static const struct seq_operations dev_seq_ops = { | 4547 | static const struct seq_operations dev_seq_ops = { |
4537 | .start = dev_seq_start, | 4548 | .start = dev_seq_start, |
4538 | .next = dev_seq_next, | 4549 | .next = dev_seq_next, |
4539 | .stop = dev_seq_stop, | 4550 | .stop = dev_seq_stop, |
4540 | .show = dev_seq_show, | 4551 | .show = dev_seq_show, |
4541 | }; | 4552 | }; |
4542 | 4553 | ||
4543 | static int dev_seq_open(struct inode *inode, struct file *file) | 4554 | static int dev_seq_open(struct inode *inode, struct file *file) |
4544 | { | 4555 | { |
4545 | return seq_open_net(inode, file, &dev_seq_ops, | 4556 | return seq_open_net(inode, file, &dev_seq_ops, |
4546 | sizeof(struct seq_net_private)); | 4557 | sizeof(struct seq_net_private)); |
4547 | } | 4558 | } |
4548 | 4559 | ||
4549 | static const struct file_operations dev_seq_fops = { | 4560 | static const struct file_operations dev_seq_fops = { |
4550 | .owner = THIS_MODULE, | 4561 | .owner = THIS_MODULE, |
4551 | .open = dev_seq_open, | 4562 | .open = dev_seq_open, |
4552 | .read = seq_read, | 4563 | .read = seq_read, |
4553 | .llseek = seq_lseek, | 4564 | .llseek = seq_lseek, |
4554 | .release = seq_release_net, | 4565 | .release = seq_release_net, |
4555 | }; | 4566 | }; |
4556 | 4567 | ||
4557 | static const struct seq_operations softnet_seq_ops = { | 4568 | static const struct seq_operations softnet_seq_ops = { |
4558 | .start = softnet_seq_start, | 4569 | .start = softnet_seq_start, |
4559 | .next = softnet_seq_next, | 4570 | .next = softnet_seq_next, |
4560 | .stop = softnet_seq_stop, | 4571 | .stop = softnet_seq_stop, |
4561 | .show = softnet_seq_show, | 4572 | .show = softnet_seq_show, |
4562 | }; | 4573 | }; |
4563 | 4574 | ||
4564 | static int softnet_seq_open(struct inode *inode, struct file *file) | 4575 | static int softnet_seq_open(struct inode *inode, struct file *file) |
4565 | { | 4576 | { |
4566 | return seq_open(file, &softnet_seq_ops); | 4577 | return seq_open(file, &softnet_seq_ops); |
4567 | } | 4578 | } |
4568 | 4579 | ||
4569 | static const struct file_operations softnet_seq_fops = { | 4580 | static const struct file_operations softnet_seq_fops = { |
4570 | .owner = THIS_MODULE, | 4581 | .owner = THIS_MODULE, |
4571 | .open = softnet_seq_open, | 4582 | .open = softnet_seq_open, |
4572 | .read = seq_read, | 4583 | .read = seq_read, |
4573 | .llseek = seq_lseek, | 4584 | .llseek = seq_lseek, |
4574 | .release = seq_release, | 4585 | .release = seq_release, |
4575 | }; | 4586 | }; |
4576 | 4587 | ||
4577 | static void *ptype_get_idx(loff_t pos) | 4588 | static void *ptype_get_idx(loff_t pos) |
4578 | { | 4589 | { |
4579 | struct packet_type *pt = NULL; | 4590 | struct packet_type *pt = NULL; |
4580 | loff_t i = 0; | 4591 | loff_t i = 0; |
4581 | int t; | 4592 | int t; |
4582 | 4593 | ||
4583 | list_for_each_entry_rcu(pt, &ptype_all, list) { | 4594 | list_for_each_entry_rcu(pt, &ptype_all, list) { |
4584 | if (i == pos) | 4595 | if (i == pos) |
4585 | return pt; | 4596 | return pt; |
4586 | ++i; | 4597 | ++i; |
4587 | } | 4598 | } |
4588 | 4599 | ||
4589 | for (t = 0; t < PTYPE_HASH_SIZE; t++) { | 4600 | for (t = 0; t < PTYPE_HASH_SIZE; t++) { |
4590 | list_for_each_entry_rcu(pt, &ptype_base[t], list) { | 4601 | list_for_each_entry_rcu(pt, &ptype_base[t], list) { |
4591 | if (i == pos) | 4602 | if (i == pos) |
4592 | return pt; | 4603 | return pt; |
4593 | ++i; | 4604 | ++i; |
4594 | } | 4605 | } |
4595 | } | 4606 | } |
4596 | return NULL; | 4607 | return NULL; |
4597 | } | 4608 | } |
4598 | 4609 | ||
4599 | static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) | 4610 | static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) |
4600 | __acquires(RCU) | 4611 | __acquires(RCU) |
4601 | { | 4612 | { |
4602 | rcu_read_lock(); | 4613 | rcu_read_lock(); |
4603 | return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; | 4614 | return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; |
4604 | } | 4615 | } |
4605 | 4616 | ||
4606 | static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 4617 | static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
4607 | { | 4618 | { |
4608 | struct packet_type *pt; | 4619 | struct packet_type *pt; |
4609 | struct list_head *nxt; | 4620 | struct list_head *nxt; |
4610 | int hash; | 4621 | int hash; |
4611 | 4622 | ||
4612 | ++*pos; | 4623 | ++*pos; |
4613 | if (v == SEQ_START_TOKEN) | 4624 | if (v == SEQ_START_TOKEN) |
4614 | return ptype_get_idx(0); | 4625 | return ptype_get_idx(0); |
4615 | 4626 | ||
4616 | pt = v; | 4627 | pt = v; |
4617 | nxt = pt->list.next; | 4628 | nxt = pt->list.next; |
4618 | if (pt->type == htons(ETH_P_ALL)) { | 4629 | if (pt->type == htons(ETH_P_ALL)) { |
4619 | if (nxt != &ptype_all) | 4630 | if (nxt != &ptype_all) |
4620 | goto found; | 4631 | goto found; |
4621 | hash = 0; | 4632 | hash = 0; |
4622 | nxt = ptype_base[0].next; | 4633 | nxt = ptype_base[0].next; |
4623 | } else | 4634 | } else |
4624 | hash = ntohs(pt->type) & PTYPE_HASH_MASK; | 4635 | hash = ntohs(pt->type) & PTYPE_HASH_MASK; |
4625 | 4636 | ||
4626 | while (nxt == &ptype_base[hash]) { | 4637 | while (nxt == &ptype_base[hash]) { |
4627 | if (++hash >= PTYPE_HASH_SIZE) | 4638 | if (++hash >= PTYPE_HASH_SIZE) |
4628 | return NULL; | 4639 | return NULL; |
4629 | nxt = ptype_base[hash].next; | 4640 | nxt = ptype_base[hash].next; |
4630 | } | 4641 | } |
4631 | found: | 4642 | found: |
4632 | return list_entry(nxt, struct packet_type, list); | 4643 | return list_entry(nxt, struct packet_type, list); |
4633 | } | 4644 | } |
4634 | 4645 | ||
4635 | static void ptype_seq_stop(struct seq_file *seq, void *v) | 4646 | static void ptype_seq_stop(struct seq_file *seq, void *v) |
4636 | __releases(RCU) | 4647 | __releases(RCU) |
4637 | { | 4648 | { |
4638 | rcu_read_unlock(); | 4649 | rcu_read_unlock(); |
4639 | } | 4650 | } |
4640 | 4651 | ||
4641 | static int ptype_seq_show(struct seq_file *seq, void *v) | 4652 | static int ptype_seq_show(struct seq_file *seq, void *v) |
4642 | { | 4653 | { |
4643 | struct packet_type *pt = v; | 4654 | struct packet_type *pt = v; |
4644 | 4655 | ||
4645 | if (v == SEQ_START_TOKEN) | 4656 | if (v == SEQ_START_TOKEN) |
4646 | seq_puts(seq, "Type Device Function\n"); | 4657 | seq_puts(seq, "Type Device Function\n"); |
4647 | else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { | 4658 | else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { |
4648 | if (pt->type == htons(ETH_P_ALL)) | 4659 | if (pt->type == htons(ETH_P_ALL)) |
4649 | seq_puts(seq, "ALL "); | 4660 | seq_puts(seq, "ALL "); |
4650 | else | 4661 | else |
4651 | seq_printf(seq, "%04x", ntohs(pt->type)); | 4662 | seq_printf(seq, "%04x", ntohs(pt->type)); |
4652 | 4663 | ||
4653 | seq_printf(seq, " %-8s %pF\n", | 4664 | seq_printf(seq, " %-8s %pF\n", |
4654 | pt->dev ? pt->dev->name : "", pt->func); | 4665 | pt->dev ? pt->dev->name : "", pt->func); |
4655 | } | 4666 | } |
4656 | 4667 | ||
4657 | return 0; | 4668 | return 0; |
4658 | } | 4669 | } |
4659 | 4670 | ||
4660 | static const struct seq_operations ptype_seq_ops = { | 4671 | static const struct seq_operations ptype_seq_ops = { |
4661 | .start = ptype_seq_start, | 4672 | .start = ptype_seq_start, |
4662 | .next = ptype_seq_next, | 4673 | .next = ptype_seq_next, |
4663 | .stop = ptype_seq_stop, | 4674 | .stop = ptype_seq_stop, |
4664 | .show = ptype_seq_show, | 4675 | .show = ptype_seq_show, |
4665 | }; | 4676 | }; |
4666 | 4677 | ||
4667 | static int ptype_seq_open(struct inode *inode, struct file *file) | 4678 | static int ptype_seq_open(struct inode *inode, struct file *file) |
4668 | { | 4679 | { |
4669 | return seq_open_net(inode, file, &ptype_seq_ops, | 4680 | return seq_open_net(inode, file, &ptype_seq_ops, |
4670 | sizeof(struct seq_net_private)); | 4681 | sizeof(struct seq_net_private)); |
4671 | } | 4682 | } |
4672 | 4683 | ||
4673 | static const struct file_operations ptype_seq_fops = { | 4684 | static const struct file_operations ptype_seq_fops = { |
4674 | .owner = THIS_MODULE, | 4685 | .owner = THIS_MODULE, |
4675 | .open = ptype_seq_open, | 4686 | .open = ptype_seq_open, |
4676 | .read = seq_read, | 4687 | .read = seq_read, |
4677 | .llseek = seq_lseek, | 4688 | .llseek = seq_lseek, |
4678 | .release = seq_release_net, | 4689 | .release = seq_release_net, |
4679 | }; | 4690 | }; |
4680 | 4691 | ||
4681 | 4692 | ||
4682 | static int __net_init dev_proc_net_init(struct net *net) | 4693 | static int __net_init dev_proc_net_init(struct net *net) |
4683 | { | 4694 | { |
4684 | int rc = -ENOMEM; | 4695 | int rc = -ENOMEM; |
4685 | 4696 | ||
4686 | if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) | 4697 | if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) |
4687 | goto out; | 4698 | goto out; |
4688 | if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) | 4699 | if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) |
4689 | goto out_dev; | 4700 | goto out_dev; |
4690 | if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) | 4701 | if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) |
4691 | goto out_softnet; | 4702 | goto out_softnet; |
4692 | 4703 | ||
4693 | if (wext_proc_init(net)) | 4704 | if (wext_proc_init(net)) |
4694 | goto out_ptype; | 4705 | goto out_ptype; |
4695 | rc = 0; | 4706 | rc = 0; |
4696 | out: | 4707 | out: |
4697 | return rc; | 4708 | return rc; |
4698 | out_ptype: | 4709 | out_ptype: |
4699 | proc_net_remove(net, "ptype"); | 4710 | proc_net_remove(net, "ptype"); |
4700 | out_softnet: | 4711 | out_softnet: |
4701 | proc_net_remove(net, "softnet_stat"); | 4712 | proc_net_remove(net, "softnet_stat"); |
4702 | out_dev: | 4713 | out_dev: |
4703 | proc_net_remove(net, "dev"); | 4714 | proc_net_remove(net, "dev"); |
4704 | goto out; | 4715 | goto out; |
4705 | } | 4716 | } |
4706 | 4717 | ||
4707 | static void __net_exit dev_proc_net_exit(struct net *net) | 4718 | static void __net_exit dev_proc_net_exit(struct net *net) |
4708 | { | 4719 | { |
4709 | wext_proc_exit(net); | 4720 | wext_proc_exit(net); |
4710 | 4721 | ||
4711 | proc_net_remove(net, "ptype"); | 4722 | proc_net_remove(net, "ptype"); |
4712 | proc_net_remove(net, "softnet_stat"); | 4723 | proc_net_remove(net, "softnet_stat"); |
4713 | proc_net_remove(net, "dev"); | 4724 | proc_net_remove(net, "dev"); |
4714 | } | 4725 | } |
4715 | 4726 | ||
4716 | static struct pernet_operations __net_initdata dev_proc_ops = { | 4727 | static struct pernet_operations __net_initdata dev_proc_ops = { |
4717 | .init = dev_proc_net_init, | 4728 | .init = dev_proc_net_init, |
4718 | .exit = dev_proc_net_exit, | 4729 | .exit = dev_proc_net_exit, |
4719 | }; | 4730 | }; |
4720 | 4731 | ||
4721 | static int __init dev_proc_init(void) | 4732 | static int __init dev_proc_init(void) |
4722 | { | 4733 | { |
4723 | return register_pernet_subsys(&dev_proc_ops); | 4734 | return register_pernet_subsys(&dev_proc_ops); |
4724 | } | 4735 | } |
4725 | #else | 4736 | #else |
4726 | #define dev_proc_init() 0 | 4737 | #define dev_proc_init() 0 |
4727 | #endif /* CONFIG_PROC_FS */ | 4738 | #endif /* CONFIG_PROC_FS */ |
4728 | 4739 | ||
4729 | 4740 | ||
4730 | struct netdev_upper { | 4741 | struct netdev_upper { |
4731 | struct net_device *dev; | 4742 | struct net_device *dev; |
4732 | bool master; | 4743 | bool master; |
4733 | struct list_head list; | 4744 | struct list_head list; |
4734 | struct rcu_head rcu; | 4745 | struct rcu_head rcu; |
4735 | struct list_head search_list; | 4746 | struct list_head search_list; |
4736 | }; | 4747 | }; |
4737 | 4748 | ||
4738 | static void __append_search_uppers(struct list_head *search_list, | 4749 | static void __append_search_uppers(struct list_head *search_list, |
4739 | struct net_device *dev) | 4750 | struct net_device *dev) |
4740 | { | 4751 | { |
4741 | struct netdev_upper *upper; | 4752 | struct netdev_upper *upper; |
4742 | 4753 | ||
4743 | list_for_each_entry(upper, &dev->upper_dev_list, list) { | 4754 | list_for_each_entry(upper, &dev->upper_dev_list, list) { |
4744 | /* check if this upper is not already in search list */ | 4755 | /* check if this upper is not already in search list */ |
4745 | if (list_empty(&upper->search_list)) | 4756 | if (list_empty(&upper->search_list)) |
4746 | list_add_tail(&upper->search_list, search_list); | 4757 | list_add_tail(&upper->search_list, search_list); |
4747 | } | 4758 | } |
4748 | } | 4759 | } |
4749 | 4760 | ||
4750 | static bool __netdev_search_upper_dev(struct net_device *dev, | 4761 | static bool __netdev_search_upper_dev(struct net_device *dev, |
4751 | struct net_device *upper_dev) | 4762 | struct net_device *upper_dev) |
4752 | { | 4763 | { |
4753 | LIST_HEAD(search_list); | 4764 | LIST_HEAD(search_list); |
4754 | struct netdev_upper *upper; | 4765 | struct netdev_upper *upper; |
4755 | struct netdev_upper *tmp; | 4766 | struct netdev_upper *tmp; |
4756 | bool ret = false; | 4767 | bool ret = false; |
4757 | 4768 | ||
4758 | __append_search_uppers(&search_list, dev); | 4769 | __append_search_uppers(&search_list, dev); |
4759 | list_for_each_entry(upper, &search_list, search_list) { | 4770 | list_for_each_entry(upper, &search_list, search_list) { |
4760 | if (upper->dev == upper_dev) { | 4771 | if (upper->dev == upper_dev) { |
4761 | ret = true; | 4772 | ret = true; |
4762 | break; | 4773 | break; |
4763 | } | 4774 | } |
4764 | __append_search_uppers(&search_list, upper->dev); | 4775 | __append_search_uppers(&search_list, upper->dev); |
4765 | } | 4776 | } |
4766 | list_for_each_entry_safe(upper, tmp, &search_list, search_list) | 4777 | list_for_each_entry_safe(upper, tmp, &search_list, search_list) |
4767 | INIT_LIST_HEAD(&upper->search_list); | 4778 | INIT_LIST_HEAD(&upper->search_list); |
4768 | return ret; | 4779 | return ret; |
4769 | } | 4780 | } |
4770 | 4781 | ||
4771 | static struct netdev_upper *__netdev_find_upper(struct net_device *dev, | 4782 | static struct netdev_upper *__netdev_find_upper(struct net_device *dev, |
4772 | struct net_device *upper_dev) | 4783 | struct net_device *upper_dev) |
4773 | { | 4784 | { |
4774 | struct netdev_upper *upper; | 4785 | struct netdev_upper *upper; |
4775 | 4786 | ||
4776 | list_for_each_entry(upper, &dev->upper_dev_list, list) { | 4787 | list_for_each_entry(upper, &dev->upper_dev_list, list) { |
4777 | if (upper->dev == upper_dev) | 4788 | if (upper->dev == upper_dev) |
4778 | return upper; | 4789 | return upper; |
4779 | } | 4790 | } |
4780 | return NULL; | 4791 | return NULL; |
4781 | } | 4792 | } |
4782 | 4793 | ||
4783 | /** | 4794 | /** |
4784 | * netdev_has_upper_dev - Check if device is linked to an upper device | 4795 | * netdev_has_upper_dev - Check if device is linked to an upper device |
4785 | * @dev: device | 4796 | * @dev: device |
4786 | * @upper_dev: upper device to check | 4797 | * @upper_dev: upper device to check |
4787 | * | 4798 | * |
4788 | * Find out if a device is linked to specified upper device and return true | 4799 | * Find out if a device is linked to specified upper device and return true |
4789 | * in case it is. Note that this checks only immediate upper device, | 4800 | * in case it is. Note that this checks only immediate upper device, |
4790 | * not through a complete stack of devices. The caller must hold the RTNL lock. | 4801 | * not through a complete stack of devices. The caller must hold the RTNL lock. |
4791 | */ | 4802 | */ |
4792 | bool netdev_has_upper_dev(struct net_device *dev, | 4803 | bool netdev_has_upper_dev(struct net_device *dev, |
4793 | struct net_device *upper_dev) | 4804 | struct net_device *upper_dev) |
4794 | { | 4805 | { |
4795 | ASSERT_RTNL(); | 4806 | ASSERT_RTNL(); |
4796 | 4807 | ||
4797 | return __netdev_find_upper(dev, upper_dev); | 4808 | return __netdev_find_upper(dev, upper_dev); |
4798 | } | 4809 | } |
4799 | EXPORT_SYMBOL(netdev_has_upper_dev); | 4810 | EXPORT_SYMBOL(netdev_has_upper_dev); |
4800 | 4811 | ||
4801 | /** | 4812 | /** |
4802 | * netdev_has_any_upper_dev - Check if device is linked to some device | 4813 | * netdev_has_any_upper_dev - Check if device is linked to some device |
4803 | * @dev: device | 4814 | * @dev: device |
4804 | * | 4815 | * |
4805 | * Find out if a device is linked to an upper device and return true in case | 4816 | * Find out if a device is linked to an upper device and return true in case |
4806 | * it is. The caller must hold the RTNL lock. | 4817 | * it is. The caller must hold the RTNL lock. |
4807 | */ | 4818 | */ |
4808 | bool netdev_has_any_upper_dev(struct net_device *dev) | 4819 | bool netdev_has_any_upper_dev(struct net_device *dev) |
4809 | { | 4820 | { |
4810 | ASSERT_RTNL(); | 4821 | ASSERT_RTNL(); |
4811 | 4822 | ||
4812 | return !list_empty(&dev->upper_dev_list); | 4823 | return !list_empty(&dev->upper_dev_list); |
4813 | } | 4824 | } |
4814 | EXPORT_SYMBOL(netdev_has_any_upper_dev); | 4825 | EXPORT_SYMBOL(netdev_has_any_upper_dev); |
4815 | 4826 | ||
4816 | /** | 4827 | /** |
4817 | * netdev_master_upper_dev_get - Get master upper device | 4828 | * netdev_master_upper_dev_get - Get master upper device |
4818 | * @dev: device | 4829 | * @dev: device |
4819 | * | 4830 | * |
4820 | * Find a master upper device and return pointer to it or NULL in case | 4831 | * Find a master upper device and return pointer to it or NULL in case |
4821 | * it's not there. The caller must hold the RTNL lock. | 4832 | * it's not there. The caller must hold the RTNL lock. |
4822 | */ | 4833 | */ |
4823 | struct net_device *netdev_master_upper_dev_get(struct net_device *dev) | 4834 | struct net_device *netdev_master_upper_dev_get(struct net_device *dev) |
4824 | { | 4835 | { |
4825 | struct netdev_upper *upper; | 4836 | struct netdev_upper *upper; |
4826 | 4837 | ||
4827 | ASSERT_RTNL(); | 4838 | ASSERT_RTNL(); |
4828 | 4839 | ||
4829 | if (list_empty(&dev->upper_dev_list)) | 4840 | if (list_empty(&dev->upper_dev_list)) |
4830 | return NULL; | 4841 | return NULL; |
4831 | 4842 | ||
4832 | upper = list_first_entry(&dev->upper_dev_list, | 4843 | upper = list_first_entry(&dev->upper_dev_list, |
4833 | struct netdev_upper, list); | 4844 | struct netdev_upper, list); |
4834 | if (likely(upper->master)) | 4845 | if (likely(upper->master)) |
4835 | return upper->dev; | 4846 | return upper->dev; |
4836 | return NULL; | 4847 | return NULL; |
4837 | } | 4848 | } |
4838 | EXPORT_SYMBOL(netdev_master_upper_dev_get); | 4849 | EXPORT_SYMBOL(netdev_master_upper_dev_get); |
4839 | 4850 | ||
4840 | /** | 4851 | /** |
4841 | * netdev_master_upper_dev_get_rcu - Get master upper device | 4852 | * netdev_master_upper_dev_get_rcu - Get master upper device |
4842 | * @dev: device | 4853 | * @dev: device |
4843 | * | 4854 | * |
4844 | * Find a master upper device and return pointer to it or NULL in case | 4855 | * Find a master upper device and return pointer to it or NULL in case |
4845 | * it's not there. The caller must hold the RCU read lock. | 4856 | * it's not there. The caller must hold the RCU read lock. |
4846 | */ | 4857 | */ |
4847 | struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) | 4858 | struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) |
4848 | { | 4859 | { |
4849 | struct netdev_upper *upper; | 4860 | struct netdev_upper *upper; |
4850 | 4861 | ||
4851 | upper = list_first_or_null_rcu(&dev->upper_dev_list, | 4862 | upper = list_first_or_null_rcu(&dev->upper_dev_list, |
4852 | struct netdev_upper, list); | 4863 | struct netdev_upper, list); |
4853 | if (upper && likely(upper->master)) | 4864 | if (upper && likely(upper->master)) |
4854 | return upper->dev; | 4865 | return upper->dev; |
4855 | return NULL; | 4866 | return NULL; |
4856 | } | 4867 | } |
4857 | EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); | 4868 | EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); |
4858 | 4869 | ||
4859 | static int __netdev_upper_dev_link(struct net_device *dev, | 4870 | static int __netdev_upper_dev_link(struct net_device *dev, |
4860 | struct net_device *upper_dev, bool master) | 4871 | struct net_device *upper_dev, bool master) |
4861 | { | 4872 | { |
4862 | struct netdev_upper *upper; | 4873 | struct netdev_upper *upper; |
4863 | 4874 | ||
4864 | ASSERT_RTNL(); | 4875 | ASSERT_RTNL(); |
4865 | 4876 | ||
4866 | if (dev == upper_dev) | 4877 | if (dev == upper_dev) |
4867 | return -EBUSY; | 4878 | return -EBUSY; |
4868 | 4879 | ||
4869 | /* To prevent loops, check if dev is not upper device to upper_dev. */ | 4880 | /* To prevent loops, check if dev is not upper device to upper_dev. */ |
4870 | if (__netdev_search_upper_dev(upper_dev, dev)) | 4881 | if (__netdev_search_upper_dev(upper_dev, dev)) |
4871 | return -EBUSY; | 4882 | return -EBUSY; |
4872 | 4883 | ||
4873 | if (__netdev_find_upper(dev, upper_dev)) | 4884 | if (__netdev_find_upper(dev, upper_dev)) |
4874 | return -EEXIST; | 4885 | return -EEXIST; |
4875 | 4886 | ||
4876 | if (master && netdev_master_upper_dev_get(dev)) | 4887 | if (master && netdev_master_upper_dev_get(dev)) |
4877 | return -EBUSY; | 4888 | return -EBUSY; |
4878 | 4889 | ||
4879 | upper = kmalloc(sizeof(*upper), GFP_KERNEL); | 4890 | upper = kmalloc(sizeof(*upper), GFP_KERNEL); |
4880 | if (!upper) | 4891 | if (!upper) |
4881 | return -ENOMEM; | 4892 | return -ENOMEM; |
4882 | 4893 | ||
4883 | upper->dev = upper_dev; | 4894 | upper->dev = upper_dev; |
4884 | upper->master = master; | 4895 | upper->master = master; |
4885 | INIT_LIST_HEAD(&upper->search_list); | 4896 | INIT_LIST_HEAD(&upper->search_list); |
4886 | 4897 | ||
4887 | /* Ensure that master upper link is always the first item in list. */ | 4898 | /* Ensure that master upper link is always the first item in list. */ |
4888 | if (master) | 4899 | if (master) |
4889 | list_add_rcu(&upper->list, &dev->upper_dev_list); | 4900 | list_add_rcu(&upper->list, &dev->upper_dev_list); |
4890 | else | 4901 | else |
4891 | list_add_tail_rcu(&upper->list, &dev->upper_dev_list); | 4902 | list_add_tail_rcu(&upper->list, &dev->upper_dev_list); |
4892 | dev_hold(upper_dev); | 4903 | dev_hold(upper_dev); |
4893 | 4904 | ||
4894 | return 0; | 4905 | return 0; |
4895 | } | 4906 | } |
4896 | 4907 | ||
4897 | /** | 4908 | /** |
4898 | * netdev_upper_dev_link - Add a link to the upper device | 4909 | * netdev_upper_dev_link - Add a link to the upper device |
4899 | * @dev: device | 4910 | * @dev: device |
4900 | * @upper_dev: new upper device | 4911 | * @upper_dev: new upper device |
4901 | * | 4912 | * |
4902 | * Adds a link to device which is upper to this one. The caller must hold | 4913 | * Adds a link to device which is upper to this one. The caller must hold |
4903 | * the RTNL lock. On a failure a negative errno code is returned. | 4914 | * the RTNL lock. On a failure a negative errno code is returned. |
4904 | * On success the reference counts are adjusted and the function | 4915 | * On success the reference counts are adjusted and the function |
4905 | * returns zero. | 4916 | * returns zero. |
4906 | */ | 4917 | */ |
4907 | int netdev_upper_dev_link(struct net_device *dev, | 4918 | int netdev_upper_dev_link(struct net_device *dev, |
4908 | struct net_device *upper_dev) | 4919 | struct net_device *upper_dev) |
4909 | { | 4920 | { |
4910 | return __netdev_upper_dev_link(dev, upper_dev, false); | 4921 | return __netdev_upper_dev_link(dev, upper_dev, false); |
4911 | } | 4922 | } |
4912 | EXPORT_SYMBOL(netdev_upper_dev_link); | 4923 | EXPORT_SYMBOL(netdev_upper_dev_link); |
4913 | 4924 | ||
4914 | /** | 4925 | /** |
4915 | * netdev_master_upper_dev_link - Add a master link to the upper device | 4926 | * netdev_master_upper_dev_link - Add a master link to the upper device |
4916 | * @dev: device | 4927 | * @dev: device |
4917 | * @upper_dev: new upper device | 4928 | * @upper_dev: new upper device |
4918 | * | 4929 | * |
4919 | * Adds a link to device which is upper to this one. In this case, only | 4930 | * Adds a link to device which is upper to this one. In this case, only |
4920 | * one master upper device can be linked, although other non-master devices | 4931 | * one master upper device can be linked, although other non-master devices |
4921 | * might be linked as well. The caller must hold the RTNL lock. | 4932 | * might be linked as well. The caller must hold the RTNL lock. |
4922 | * On a failure a negative errno code is returned. On success the reference | 4933 | * On a failure a negative errno code is returned. On success the reference |
4923 | * counts are adjusted and the function returns zero. | 4934 | * counts are adjusted and the function returns zero. |
4924 | */ | 4935 | */ |
4925 | int netdev_master_upper_dev_link(struct net_device *dev, | 4936 | int netdev_master_upper_dev_link(struct net_device *dev, |
4926 | struct net_device *upper_dev) | 4937 | struct net_device *upper_dev) |
4927 | { | 4938 | { |
4928 | return __netdev_upper_dev_link(dev, upper_dev, true); | 4939 | return __netdev_upper_dev_link(dev, upper_dev, true); |
4929 | } | 4940 | } |
4930 | EXPORT_SYMBOL(netdev_master_upper_dev_link); | 4941 | EXPORT_SYMBOL(netdev_master_upper_dev_link); |
4931 | 4942 | ||
4932 | /** | 4943 | /** |
4933 | * netdev_upper_dev_unlink - Removes a link to upper device | 4944 | * netdev_upper_dev_unlink - Removes a link to upper device |
4934 | * @dev: device | 4945 | * @dev: device |
4935 | * @upper_dev: new upper device | 4946 | * @upper_dev: new upper device |
4936 | * | 4947 | * |
4937 | * Removes a link to device which is upper to this one. The caller must hold | 4948 | * Removes a link to device which is upper to this one. The caller must hold |
4938 | * the RTNL lock. | 4949 | * the RTNL lock. |
4939 | */ | 4950 | */ |
4940 | void netdev_upper_dev_unlink(struct net_device *dev, | 4951 | void netdev_upper_dev_unlink(struct net_device *dev, |
4941 | struct net_device *upper_dev) | 4952 | struct net_device *upper_dev) |
4942 | { | 4953 | { |
4943 | struct netdev_upper *upper; | 4954 | struct netdev_upper *upper; |
4944 | 4955 | ||
4945 | ASSERT_RTNL(); | 4956 | ASSERT_RTNL(); |
4946 | 4957 | ||
4947 | upper = __netdev_find_upper(dev, upper_dev); | 4958 | upper = __netdev_find_upper(dev, upper_dev); |
4948 | if (!upper) | 4959 | if (!upper) |
4949 | return; | 4960 | return; |
4950 | list_del_rcu(&upper->list); | 4961 | list_del_rcu(&upper->list); |
4951 | dev_put(upper_dev); | 4962 | dev_put(upper_dev); |
4952 | kfree_rcu(upper, rcu); | 4963 | kfree_rcu(upper, rcu); |
4953 | } | 4964 | } |
4954 | EXPORT_SYMBOL(netdev_upper_dev_unlink); | 4965 | EXPORT_SYMBOL(netdev_upper_dev_unlink); |
4955 | 4966 | ||
4956 | static void dev_change_rx_flags(struct net_device *dev, int flags) | 4967 | static void dev_change_rx_flags(struct net_device *dev, int flags) |
4957 | { | 4968 | { |
4958 | const struct net_device_ops *ops = dev->netdev_ops; | 4969 | const struct net_device_ops *ops = dev->netdev_ops; |
4959 | 4970 | ||
4960 | if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) | 4971 | if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) |
4961 | ops->ndo_change_rx_flags(dev, flags); | 4972 | ops->ndo_change_rx_flags(dev, flags); |
4962 | } | 4973 | } |
4963 | 4974 | ||
4964 | static int __dev_set_promiscuity(struct net_device *dev, int inc) | 4975 | static int __dev_set_promiscuity(struct net_device *dev, int inc) |
4965 | { | 4976 | { |
4966 | unsigned int old_flags = dev->flags; | 4977 | unsigned int old_flags = dev->flags; |
4967 | kuid_t uid; | 4978 | kuid_t uid; |
4968 | kgid_t gid; | 4979 | kgid_t gid; |
4969 | 4980 | ||
4970 | ASSERT_RTNL(); | 4981 | ASSERT_RTNL(); |
4971 | 4982 | ||
4972 | dev->flags |= IFF_PROMISC; | 4983 | dev->flags |= IFF_PROMISC; |
4973 | dev->promiscuity += inc; | 4984 | dev->promiscuity += inc; |
4974 | if (dev->promiscuity == 0) { | 4985 | if (dev->promiscuity == 0) { |
4975 | /* | 4986 | /* |
4976 | * Avoid overflow. | 4987 | * Avoid overflow. |
4977 | * If inc causes overflow, untouch promisc and return error. | 4988 | * If inc causes overflow, untouch promisc and return error. |
4978 | */ | 4989 | */ |
4979 | if (inc < 0) | 4990 | if (inc < 0) |
4980 | dev->flags &= ~IFF_PROMISC; | 4991 | dev->flags &= ~IFF_PROMISC; |
4981 | else { | 4992 | else { |
4982 | dev->promiscuity -= inc; | 4993 | dev->promiscuity -= inc; |
4983 | pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n", | 4994 | pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n", |
4984 | dev->name); | 4995 | dev->name); |
4985 | return -EOVERFLOW; | 4996 | return -EOVERFLOW; |
4986 | } | 4997 | } |
4987 | } | 4998 | } |
4988 | if (dev->flags != old_flags) { | 4999 | if (dev->flags != old_flags) { |
4989 | pr_info("device %s %s promiscuous mode\n", | 5000 | pr_info("device %s %s promiscuous mode\n", |
4990 | dev->name, | 5001 | dev->name, |
4991 | dev->flags & IFF_PROMISC ? "entered" : "left"); | 5002 | dev->flags & IFF_PROMISC ? "entered" : "left"); |
4992 | if (audit_enabled) { | 5003 | if (audit_enabled) { |
4993 | current_uid_gid(&uid, &gid); | 5004 | current_uid_gid(&uid, &gid); |
4994 | audit_log(current->audit_context, GFP_ATOMIC, | 5005 | audit_log(current->audit_context, GFP_ATOMIC, |
4995 | AUDIT_ANOM_PROMISCUOUS, | 5006 | AUDIT_ANOM_PROMISCUOUS, |
4996 | "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", | 5007 | "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", |
4997 | dev->name, (dev->flags & IFF_PROMISC), | 5008 | dev->name, (dev->flags & IFF_PROMISC), |
4998 | (old_flags & IFF_PROMISC), | 5009 | (old_flags & IFF_PROMISC), |
4999 | from_kuid(&init_user_ns, audit_get_loginuid(current)), | 5010 | from_kuid(&init_user_ns, audit_get_loginuid(current)), |
5000 | from_kuid(&init_user_ns, uid), | 5011 | from_kuid(&init_user_ns, uid), |
5001 | from_kgid(&init_user_ns, gid), | 5012 | from_kgid(&init_user_ns, gid), |
5002 | audit_get_sessionid(current)); | 5013 | audit_get_sessionid(current)); |
5003 | } | 5014 | } |
5004 | 5015 | ||
5005 | dev_change_rx_flags(dev, IFF_PROMISC); | 5016 | dev_change_rx_flags(dev, IFF_PROMISC); |
5006 | } | 5017 | } |
5007 | return 0; | 5018 | return 0; |
5008 | } | 5019 | } |
5009 | 5020 | ||
5010 | /** | 5021 | /** |
5011 | * dev_set_promiscuity - update promiscuity count on a device | 5022 | * dev_set_promiscuity - update promiscuity count on a device |
5012 | * @dev: device | 5023 | * @dev: device |
5013 | * @inc: modifier | 5024 | * @inc: modifier |
5014 | * | 5025 | * |
5015 | * Add or remove promiscuity from a device. While the count in the device | 5026 | * Add or remove promiscuity from a device. While the count in the device |
5016 | * remains above zero the interface remains promiscuous. Once it hits zero | 5027 | * remains above zero the interface remains promiscuous. Once it hits zero |
5017 | * the device reverts back to normal filtering operation. A negative inc | 5028 | * the device reverts back to normal filtering operation. A negative inc |
5018 | * value is used to drop promiscuity on the device. | 5029 | * value is used to drop promiscuity on the device. |
5019 | * Return 0 if successful or a negative errno code on error. | 5030 | * Return 0 if successful or a negative errno code on error. |
5020 | */ | 5031 | */ |
5021 | int dev_set_promiscuity(struct net_device *dev, int inc) | 5032 | int dev_set_promiscuity(struct net_device *dev, int inc) |
5022 | { | 5033 | { |
5023 | unsigned int old_flags = dev->flags; | 5034 | unsigned int old_flags = dev->flags; |
5024 | int err; | 5035 | int err; |
5025 | 5036 | ||
5026 | err = __dev_set_promiscuity(dev, inc); | 5037 | err = __dev_set_promiscuity(dev, inc); |
5027 | if (err < 0) | 5038 | if (err < 0) |
5028 | return err; | 5039 | return err; |
5029 | if (dev->flags != old_flags) | 5040 | if (dev->flags != old_flags) |
5030 | dev_set_rx_mode(dev); | 5041 | dev_set_rx_mode(dev); |
5031 | return err; | 5042 | return err; |
5032 | } | 5043 | } |
5033 | EXPORT_SYMBOL(dev_set_promiscuity); | 5044 | EXPORT_SYMBOL(dev_set_promiscuity); |
5034 | 5045 | ||
5035 | /** | 5046 | /** |
5036 | * dev_set_allmulti - update allmulti count on a device | 5047 | * dev_set_allmulti - update allmulti count on a device |
5037 | * @dev: device | 5048 | * @dev: device |
5038 | * @inc: modifier | 5049 | * @inc: modifier |
5039 | * | 5050 | * |
5040 | * Add or remove reception of all multicast frames to a device. While the | 5051 | * Add or remove reception of all multicast frames to a device. While the |
5041 | * count in the device remains above zero the interface remains listening | 5052 | * count in the device remains above zero the interface remains listening |
5042 | * to all interfaces. Once it hits zero the device reverts back to normal | 5053 | * to all interfaces. Once it hits zero the device reverts back to normal |
5043 | * filtering operation. A negative @inc value is used to drop the counter | 5054 | * filtering operation. A negative @inc value is used to drop the counter |
5044 | * when releasing a resource needing all multicasts. | 5055 | * when releasing a resource needing all multicasts. |
5045 | * Return 0 if successful or a negative errno code on error. | 5056 | * Return 0 if successful or a negative errno code on error. |
5046 | */ | 5057 | */ |
5047 | 5058 | ||
5048 | int dev_set_allmulti(struct net_device *dev, int inc) | 5059 | int dev_set_allmulti(struct net_device *dev, int inc) |
5049 | { | 5060 | { |
5050 | unsigned int old_flags = dev->flags; | 5061 | unsigned int old_flags = dev->flags; |
5051 | 5062 | ||
5052 | ASSERT_RTNL(); | 5063 | ASSERT_RTNL(); |
5053 | 5064 | ||
5054 | dev->flags |= IFF_ALLMULTI; | 5065 | dev->flags |= IFF_ALLMULTI; |
5055 | dev->allmulti += inc; | 5066 | dev->allmulti += inc; |
5056 | if (dev->allmulti == 0) { | 5067 | if (dev->allmulti == 0) { |
5057 | /* | 5068 | /* |
5058 | * Avoid overflow. | 5069 | * Avoid overflow. |
5059 | * If inc causes overflow, untouch allmulti and return error. | 5070 | * If inc causes overflow, untouch allmulti and return error. |
5060 | */ | 5071 | */ |
5061 | if (inc < 0) | 5072 | if (inc < 0) |
5062 | dev->flags &= ~IFF_ALLMULTI; | 5073 | dev->flags &= ~IFF_ALLMULTI; |
5063 | else { | 5074 | else { |
5064 | dev->allmulti -= inc; | 5075 | dev->allmulti -= inc; |
5065 | pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n", | 5076 | pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n", |
5066 | dev->name); | 5077 | dev->name); |
5067 | return -EOVERFLOW; | 5078 | return -EOVERFLOW; |
5068 | } | 5079 | } |
5069 | } | 5080 | } |
5070 | if (dev->flags ^ old_flags) { | 5081 | if (dev->flags ^ old_flags) { |
5071 | dev_change_rx_flags(dev, IFF_ALLMULTI); | 5082 | dev_change_rx_flags(dev, IFF_ALLMULTI); |
5072 | dev_set_rx_mode(dev); | 5083 | dev_set_rx_mode(dev); |
5073 | } | 5084 | } |
5074 | return 0; | 5085 | return 0; |
5075 | } | 5086 | } |
5076 | EXPORT_SYMBOL(dev_set_allmulti); | 5087 | EXPORT_SYMBOL(dev_set_allmulti); |
5077 | 5088 | ||
5078 | /* | 5089 | /* |
5079 | * Upload unicast and multicast address lists to device and | 5090 | * Upload unicast and multicast address lists to device and |
5080 | * configure RX filtering. When the device doesn't support unicast | 5091 | * configure RX filtering. When the device doesn't support unicast |
5081 | * filtering it is put in promiscuous mode while unicast addresses | 5092 | * filtering it is put in promiscuous mode while unicast addresses |
5082 | * are present. | 5093 | * are present. |
5083 | */ | 5094 | */ |
5084 | void __dev_set_rx_mode(struct net_device *dev) | 5095 | void __dev_set_rx_mode(struct net_device *dev) |
5085 | { | 5096 | { |
5086 | const struct net_device_ops *ops = dev->netdev_ops; | 5097 | const struct net_device_ops *ops = dev->netdev_ops; |
5087 | 5098 | ||
5088 | /* dev_open will call this function so the list will stay sane. */ | 5099 | /* dev_open will call this function so the list will stay sane. */ |
5089 | if (!(dev->flags&IFF_UP)) | 5100 | if (!(dev->flags&IFF_UP)) |
5090 | return; | 5101 | return; |
5091 | 5102 | ||
5092 | if (!netif_device_present(dev)) | 5103 | if (!netif_device_present(dev)) |
5093 | return; | 5104 | return; |
5094 | 5105 | ||
5095 | if (!(dev->priv_flags & IFF_UNICAST_FLT)) { | 5106 | if (!(dev->priv_flags & IFF_UNICAST_FLT)) { |
5096 | /* Unicast addresses changes may only happen under the rtnl, | 5107 | /* Unicast addresses changes may only happen under the rtnl, |
5097 | * therefore calling __dev_set_promiscuity here is safe. | 5108 | * therefore calling __dev_set_promiscuity here is safe. |
5098 | */ | 5109 | */ |
5099 | if (!netdev_uc_empty(dev) && !dev->uc_promisc) { | 5110 | if (!netdev_uc_empty(dev) && !dev->uc_promisc) { |
5100 | __dev_set_promiscuity(dev, 1); | 5111 | __dev_set_promiscuity(dev, 1); |
5101 | dev->uc_promisc = true; | 5112 | dev->uc_promisc = true; |
5102 | } else if (netdev_uc_empty(dev) && dev->uc_promisc) { | 5113 | } else if (netdev_uc_empty(dev) && dev->uc_promisc) { |
5103 | __dev_set_promiscuity(dev, -1); | 5114 | __dev_set_promiscuity(dev, -1); |
5104 | dev->uc_promisc = false; | 5115 | dev->uc_promisc = false; |
5105 | } | 5116 | } |
5106 | } | 5117 | } |
5107 | 5118 | ||
5108 | if (ops->ndo_set_rx_mode) | 5119 | if (ops->ndo_set_rx_mode) |
5109 | ops->ndo_set_rx_mode(dev); | 5120 | ops->ndo_set_rx_mode(dev); |
5110 | } | 5121 | } |
5111 | 5122 | ||
5112 | void dev_set_rx_mode(struct net_device *dev) | 5123 | void dev_set_rx_mode(struct net_device *dev) |
5113 | { | 5124 | { |
5114 | netif_addr_lock_bh(dev); | 5125 | netif_addr_lock_bh(dev); |
5115 | __dev_set_rx_mode(dev); | 5126 | __dev_set_rx_mode(dev); |
5116 | netif_addr_unlock_bh(dev); | 5127 | netif_addr_unlock_bh(dev); |
5117 | } | 5128 | } |
5118 | 5129 | ||
5119 | /** | 5130 | /** |
5120 | * dev_get_flags - get flags reported to userspace | 5131 | * dev_get_flags - get flags reported to userspace |
5121 | * @dev: device | 5132 | * @dev: device |
5122 | * | 5133 | * |
5123 | * Get the combination of flag bits exported through APIs to userspace. | 5134 | * Get the combination of flag bits exported through APIs to userspace. |
5124 | */ | 5135 | */ |
5125 | unsigned int dev_get_flags(const struct net_device *dev) | 5136 | unsigned int dev_get_flags(const struct net_device *dev) |
5126 | { | 5137 | { |
5127 | unsigned int flags; | 5138 | unsigned int flags; |
5128 | 5139 | ||
5129 | flags = (dev->flags & ~(IFF_PROMISC | | 5140 | flags = (dev->flags & ~(IFF_PROMISC | |
5130 | IFF_ALLMULTI | | 5141 | IFF_ALLMULTI | |
5131 | IFF_RUNNING | | 5142 | IFF_RUNNING | |
5132 | IFF_LOWER_UP | | 5143 | IFF_LOWER_UP | |
5133 | IFF_DORMANT)) | | 5144 | IFF_DORMANT)) | |
5134 | (dev->gflags & (IFF_PROMISC | | 5145 | (dev->gflags & (IFF_PROMISC | |
5135 | IFF_ALLMULTI)); | 5146 | IFF_ALLMULTI)); |
5136 | 5147 | ||
5137 | if (netif_running(dev)) { | 5148 | if (netif_running(dev)) { |
5138 | if (netif_oper_up(dev)) | 5149 | if (netif_oper_up(dev)) |
5139 | flags |= IFF_RUNNING; | 5150 | flags |= IFF_RUNNING; |
5140 | if (netif_carrier_ok(dev)) | 5151 | if (netif_carrier_ok(dev)) |
5141 | flags |= IFF_LOWER_UP; | 5152 | flags |= IFF_LOWER_UP; |
5142 | if (netif_dormant(dev)) | 5153 | if (netif_dormant(dev)) |
5143 | flags |= IFF_DORMANT; | 5154 | flags |= IFF_DORMANT; |
5144 | } | 5155 | } |
5145 | 5156 | ||
5146 | return flags; | 5157 | return flags; |
5147 | } | 5158 | } |
5148 | EXPORT_SYMBOL(dev_get_flags); | 5159 | EXPORT_SYMBOL(dev_get_flags); |
5149 | 5160 | ||
5150 | int __dev_change_flags(struct net_device *dev, unsigned int flags) | 5161 | int __dev_change_flags(struct net_device *dev, unsigned int flags) |
5151 | { | 5162 | { |
5152 | unsigned int old_flags = dev->flags; | 5163 | unsigned int old_flags = dev->flags; |
5153 | int ret; | 5164 | int ret; |
5154 | 5165 | ||
5155 | ASSERT_RTNL(); | 5166 | ASSERT_RTNL(); |
5156 | 5167 | ||
5157 | /* | 5168 | /* |
5158 | * Set the flags on our device. | 5169 | * Set the flags on our device. |
5159 | */ | 5170 | */ |
5160 | 5171 | ||
5161 | dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | | 5172 | dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | |
5162 | IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | | 5173 | IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | |
5163 | IFF_AUTOMEDIA)) | | 5174 | IFF_AUTOMEDIA)) | |
5164 | (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | | 5175 | (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | |
5165 | IFF_ALLMULTI)); | 5176 | IFF_ALLMULTI)); |
5166 | 5177 | ||
5167 | /* | 5178 | /* |
5168 | * Load in the correct multicast list now the flags have changed. | 5179 | * Load in the correct multicast list now the flags have changed. |
5169 | */ | 5180 | */ |
5170 | 5181 | ||
5171 | if ((old_flags ^ flags) & IFF_MULTICAST) | 5182 | if ((old_flags ^ flags) & IFF_MULTICAST) |
5172 | dev_change_rx_flags(dev, IFF_MULTICAST); | 5183 | dev_change_rx_flags(dev, IFF_MULTICAST); |
5173 | 5184 | ||
5174 | dev_set_rx_mode(dev); | 5185 | dev_set_rx_mode(dev); |
5175 | 5186 | ||
5176 | /* | 5187 | /* |
5177 | * Have we downed the interface. We handle IFF_UP ourselves | 5188 | * Have we downed the interface. We handle IFF_UP ourselves |
5178 | * according to user attempts to set it, rather than blindly | 5189 | * according to user attempts to set it, rather than blindly |
5179 | * setting it. | 5190 | * setting it. |
5180 | */ | 5191 | */ |
5181 | 5192 | ||
5182 | ret = 0; | 5193 | ret = 0; |
5183 | if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ | 5194 | if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ |
5184 | ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev); | 5195 | ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev); |
5185 | 5196 | ||
5186 | if (!ret) | 5197 | if (!ret) |
5187 | dev_set_rx_mode(dev); | 5198 | dev_set_rx_mode(dev); |
5188 | } | 5199 | } |
5189 | 5200 | ||
5190 | if ((flags ^ dev->gflags) & IFF_PROMISC) { | 5201 | if ((flags ^ dev->gflags) & IFF_PROMISC) { |
5191 | int inc = (flags & IFF_PROMISC) ? 1 : -1; | 5202 | int inc = (flags & IFF_PROMISC) ? 1 : -1; |
5192 | 5203 | ||
5193 | dev->gflags ^= IFF_PROMISC; | 5204 | dev->gflags ^= IFF_PROMISC; |
5194 | dev_set_promiscuity(dev, inc); | 5205 | dev_set_promiscuity(dev, inc); |
5195 | } | 5206 | } |
5196 | 5207 | ||
5197 | /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI | 5208 | /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI |
5198 | is important. Some (broken) drivers set IFF_PROMISC, when | 5209 | is important. Some (broken) drivers set IFF_PROMISC, when |
5199 | IFF_ALLMULTI is requested not asking us and not reporting. | 5210 | IFF_ALLMULTI is requested not asking us and not reporting. |
5200 | */ | 5211 | */ |
5201 | if ((flags ^ dev->gflags) & IFF_ALLMULTI) { | 5212 | if ((flags ^ dev->gflags) & IFF_ALLMULTI) { |
5202 | int inc = (flags & IFF_ALLMULTI) ? 1 : -1; | 5213 | int inc = (flags & IFF_ALLMULTI) ? 1 : -1; |
5203 | 5214 | ||
5204 | dev->gflags ^= IFF_ALLMULTI; | 5215 | dev->gflags ^= IFF_ALLMULTI; |
5205 | dev_set_allmulti(dev, inc); | 5216 | dev_set_allmulti(dev, inc); |
5206 | } | 5217 | } |
5207 | 5218 | ||
5208 | return ret; | 5219 | return ret; |
5209 | } | 5220 | } |
5210 | 5221 | ||
5211 | void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) | 5222 | void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) |
5212 | { | 5223 | { |
5213 | unsigned int changes = dev->flags ^ old_flags; | 5224 | unsigned int changes = dev->flags ^ old_flags; |
5214 | 5225 | ||
5215 | if (changes & IFF_UP) { | 5226 | if (changes & IFF_UP) { |
5216 | if (dev->flags & IFF_UP) | 5227 | if (dev->flags & IFF_UP) |
5217 | call_netdevice_notifiers(NETDEV_UP, dev); | 5228 | call_netdevice_notifiers(NETDEV_UP, dev); |
5218 | else | 5229 | else |
5219 | call_netdevice_notifiers(NETDEV_DOWN, dev); | 5230 | call_netdevice_notifiers(NETDEV_DOWN, dev); |
5220 | } | 5231 | } |
5221 | 5232 | ||
5222 | if (dev->flags & IFF_UP && | 5233 | if (dev->flags & IFF_UP && |
5223 | (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) | 5234 | (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) |
5224 | call_netdevice_notifiers(NETDEV_CHANGE, dev); | 5235 | call_netdevice_notifiers(NETDEV_CHANGE, dev); |
5225 | } | 5236 | } |
5226 | 5237 | ||
5227 | /** | 5238 | /** |
5228 | * dev_change_flags - change device settings | 5239 | * dev_change_flags - change device settings |
5229 | * @dev: device | 5240 | * @dev: device |
5230 | * @flags: device state flags | 5241 | * @flags: device state flags |
5231 | * | 5242 | * |
5232 | * Change settings on device based state flags. The flags are | 5243 | * Change settings on device based state flags. The flags are |
5233 | * in the userspace exported format. | 5244 | * in the userspace exported format. |
5234 | */ | 5245 | */ |
5235 | int dev_change_flags(struct net_device *dev, unsigned int flags) | 5246 | int dev_change_flags(struct net_device *dev, unsigned int flags) |
5236 | { | 5247 | { |
5237 | int ret; | 5248 | int ret; |
5238 | unsigned int changes, old_flags = dev->flags; | 5249 | unsigned int changes, old_flags = dev->flags; |
5239 | 5250 | ||
5240 | ret = __dev_change_flags(dev, flags); | 5251 | ret = __dev_change_flags(dev, flags); |
5241 | if (ret < 0) | 5252 | if (ret < 0) |
5242 | return ret; | 5253 | return ret; |
5243 | 5254 | ||
5244 | changes = old_flags ^ dev->flags; | 5255 | changes = old_flags ^ dev->flags; |
5245 | if (changes) | 5256 | if (changes) |
5246 | rtmsg_ifinfo(RTM_NEWLINK, dev, changes); | 5257 | rtmsg_ifinfo(RTM_NEWLINK, dev, changes); |
5247 | 5258 | ||
5248 | __dev_notify_flags(dev, old_flags); | 5259 | __dev_notify_flags(dev, old_flags); |
5249 | return ret; | 5260 | return ret; |
5250 | } | 5261 | } |
5251 | EXPORT_SYMBOL(dev_change_flags); | 5262 | EXPORT_SYMBOL(dev_change_flags); |
5252 | 5263 | ||
5253 | /** | 5264 | /** |
5254 | * dev_set_mtu - Change maximum transfer unit | 5265 | * dev_set_mtu - Change maximum transfer unit |
5255 | * @dev: device | 5266 | * @dev: device |
5256 | * @new_mtu: new transfer unit | 5267 | * @new_mtu: new transfer unit |
5257 | * | 5268 | * |
5258 | * Change the maximum transfer size of the network device. | 5269 | * Change the maximum transfer size of the network device. |
5259 | */ | 5270 | */ |
5260 | int dev_set_mtu(struct net_device *dev, int new_mtu) | 5271 | int dev_set_mtu(struct net_device *dev, int new_mtu) |
5261 | { | 5272 | { |
5262 | const struct net_device_ops *ops = dev->netdev_ops; | 5273 | const struct net_device_ops *ops = dev->netdev_ops; |
5263 | int err; | 5274 | int err; |
5264 | 5275 | ||
5265 | if (new_mtu == dev->mtu) | 5276 | if (new_mtu == dev->mtu) |
5266 | return 0; | 5277 | return 0; |
5267 | 5278 | ||
5268 | /* MTU must be positive. */ | 5279 | /* MTU must be positive. */ |
5269 | if (new_mtu < 0) | 5280 | if (new_mtu < 0) |
5270 | return -EINVAL; | 5281 | return -EINVAL; |
5271 | 5282 | ||
5272 | if (!netif_device_present(dev)) | 5283 | if (!netif_device_present(dev)) |
5273 | return -ENODEV; | 5284 | return -ENODEV; |
5274 | 5285 | ||
5275 | err = 0; | 5286 | err = 0; |
5276 | if (ops->ndo_change_mtu) | 5287 | if (ops->ndo_change_mtu) |
5277 | err = ops->ndo_change_mtu(dev, new_mtu); | 5288 | err = ops->ndo_change_mtu(dev, new_mtu); |
5278 | else | 5289 | else |
5279 | dev->mtu = new_mtu; | 5290 | dev->mtu = new_mtu; |
5280 | 5291 | ||
5281 | if (!err) | 5292 | if (!err) |
5282 | call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); | 5293 | call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); |
5283 | return err; | 5294 | return err; |
5284 | } | 5295 | } |
5285 | EXPORT_SYMBOL(dev_set_mtu); | 5296 | EXPORT_SYMBOL(dev_set_mtu); |
5286 | 5297 | ||
5287 | /** | 5298 | /** |
5288 | * dev_set_group - Change group this device belongs to | 5299 | * dev_set_group - Change group this device belongs to |
5289 | * @dev: device | 5300 | * @dev: device |
5290 | * @new_group: group this device should belong to | 5301 | * @new_group: group this device should belong to |
5291 | */ | 5302 | */ |
5292 | void dev_set_group(struct net_device *dev, int new_group) | 5303 | void dev_set_group(struct net_device *dev, int new_group) |
5293 | { | 5304 | { |
5294 | dev->group = new_group; | 5305 | dev->group = new_group; |
5295 | } | 5306 | } |
5296 | EXPORT_SYMBOL(dev_set_group); | 5307 | EXPORT_SYMBOL(dev_set_group); |
5297 | 5308 | ||
5298 | /** | 5309 | /** |
5299 | * dev_set_mac_address - Change Media Access Control Address | 5310 | * dev_set_mac_address - Change Media Access Control Address |
5300 | * @dev: device | 5311 | * @dev: device |
5301 | * @sa: new address | 5312 | * @sa: new address |
5302 | * | 5313 | * |
5303 | * Change the hardware (MAC) address of the device | 5314 | * Change the hardware (MAC) address of the device |
5304 | */ | 5315 | */ |
5305 | int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) | 5316 | int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) |
5306 | { | 5317 | { |
5307 | const struct net_device_ops *ops = dev->netdev_ops; | 5318 | const struct net_device_ops *ops = dev->netdev_ops; |
5308 | int err; | 5319 | int err; |
5309 | 5320 | ||
5310 | if (!ops->ndo_set_mac_address) | 5321 | if (!ops->ndo_set_mac_address) |
5311 | return -EOPNOTSUPP; | 5322 | return -EOPNOTSUPP; |
5312 | if (sa->sa_family != dev->type) | 5323 | if (sa->sa_family != dev->type) |
5313 | return -EINVAL; | 5324 | return -EINVAL; |
5314 | if (!netif_device_present(dev)) | 5325 | if (!netif_device_present(dev)) |
5315 | return -ENODEV; | 5326 | return -ENODEV; |
5316 | err = ops->ndo_set_mac_address(dev, sa); | 5327 | err = ops->ndo_set_mac_address(dev, sa); |
5317 | if (err) | 5328 | if (err) |
5318 | return err; | 5329 | return err; |
5319 | dev->addr_assign_type = NET_ADDR_SET; | 5330 | dev->addr_assign_type = NET_ADDR_SET; |
5320 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | 5331 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); |
5321 | add_device_randomness(dev->dev_addr, dev->addr_len); | 5332 | add_device_randomness(dev->dev_addr, dev->addr_len); |
5322 | return 0; | 5333 | return 0; |
5323 | } | 5334 | } |
5324 | EXPORT_SYMBOL(dev_set_mac_address); | 5335 | EXPORT_SYMBOL(dev_set_mac_address); |
5325 | 5336 | ||
5326 | /** | 5337 | /** |
5327 | * dev_change_carrier - Change device carrier | 5338 | * dev_change_carrier - Change device carrier |
5328 | * @dev: device | 5339 | * @dev: device |
5329 | * @new_carries: new value | 5340 | * @new_carries: new value |
5330 | * | 5341 | * |
5331 | * Change device carrier | 5342 | * Change device carrier |
5332 | */ | 5343 | */ |
5333 | int dev_change_carrier(struct net_device *dev, bool new_carrier) | 5344 | int dev_change_carrier(struct net_device *dev, bool new_carrier) |
5334 | { | 5345 | { |
5335 | const struct net_device_ops *ops = dev->netdev_ops; | 5346 | const struct net_device_ops *ops = dev->netdev_ops; |
5336 | 5347 | ||
5337 | if (!ops->ndo_change_carrier) | 5348 | if (!ops->ndo_change_carrier) |
5338 | return -EOPNOTSUPP; | 5349 | return -EOPNOTSUPP; |
5339 | if (!netif_device_present(dev)) | 5350 | if (!netif_device_present(dev)) |
5340 | return -ENODEV; | 5351 | return -ENODEV; |
5341 | return ops->ndo_change_carrier(dev, new_carrier); | 5352 | return ops->ndo_change_carrier(dev, new_carrier); |
5342 | } | 5353 | } |
5343 | EXPORT_SYMBOL(dev_change_carrier); | 5354 | EXPORT_SYMBOL(dev_change_carrier); |
5344 | 5355 | ||
5345 | /* | 5356 | /* |
5346 | * Perform the SIOCxIFxxx calls, inside rcu_read_lock() | 5357 | * Perform the SIOCxIFxxx calls, inside rcu_read_lock() |
5347 | */ | 5358 | */ |
5348 | static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) | 5359 | static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) |
5349 | { | 5360 | { |
5350 | int err; | 5361 | int err; |
5351 | struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name); | 5362 | struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name); |
5352 | 5363 | ||
5353 | if (!dev) | 5364 | if (!dev) |
5354 | return -ENODEV; | 5365 | return -ENODEV; |
5355 | 5366 | ||
5356 | switch (cmd) { | 5367 | switch (cmd) { |
5357 | case SIOCGIFFLAGS: /* Get interface flags */ | 5368 | case SIOCGIFFLAGS: /* Get interface flags */ |
5358 | ifr->ifr_flags = (short) dev_get_flags(dev); | 5369 | ifr->ifr_flags = (short) dev_get_flags(dev); |
5359 | return 0; | 5370 | return 0; |
5360 | 5371 | ||
5361 | case SIOCGIFMETRIC: /* Get the metric on the interface | 5372 | case SIOCGIFMETRIC: /* Get the metric on the interface |
5362 | (currently unused) */ | 5373 | (currently unused) */ |
5363 | ifr->ifr_metric = 0; | 5374 | ifr->ifr_metric = 0; |
5364 | return 0; | 5375 | return 0; |
5365 | 5376 | ||
5366 | case SIOCGIFMTU: /* Get the MTU of a device */ | 5377 | case SIOCGIFMTU: /* Get the MTU of a device */ |
5367 | ifr->ifr_mtu = dev->mtu; | 5378 | ifr->ifr_mtu = dev->mtu; |
5368 | return 0; | 5379 | return 0; |
5369 | 5380 | ||
5370 | case SIOCGIFHWADDR: | 5381 | case SIOCGIFHWADDR: |
5371 | if (!dev->addr_len) | 5382 | if (!dev->addr_len) |
5372 | memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); | 5383 | memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); |
5373 | else | 5384 | else |
5374 | memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, | 5385 | memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, |
5375 | min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); | 5386 | min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); |
5376 | ifr->ifr_hwaddr.sa_family = dev->type; | 5387 | ifr->ifr_hwaddr.sa_family = dev->type; |
5377 | return 0; | 5388 | return 0; |
5378 | 5389 | ||
5379 | case SIOCGIFSLAVE: | 5390 | case SIOCGIFSLAVE: |
5380 | err = -EINVAL; | 5391 | err = -EINVAL; |
5381 | break; | 5392 | break; |
5382 | 5393 | ||
5383 | case SIOCGIFMAP: | 5394 | case SIOCGIFMAP: |
5384 | ifr->ifr_map.mem_start = dev->mem_start; | 5395 | ifr->ifr_map.mem_start = dev->mem_start; |
5385 | ifr->ifr_map.mem_end = dev->mem_end; | 5396 | ifr->ifr_map.mem_end = dev->mem_end; |
5386 | ifr->ifr_map.base_addr = dev->base_addr; | 5397 | ifr->ifr_map.base_addr = dev->base_addr; |
5387 | ifr->ifr_map.irq = dev->irq; | 5398 | ifr->ifr_map.irq = dev->irq; |
5388 | ifr->ifr_map.dma = dev->dma; | 5399 | ifr->ifr_map.dma = dev->dma; |
5389 | ifr->ifr_map.port = dev->if_port; | 5400 | ifr->ifr_map.port = dev->if_port; |
5390 | return 0; | 5401 | return 0; |
5391 | 5402 | ||
5392 | case SIOCGIFINDEX: | 5403 | case SIOCGIFINDEX: |
5393 | ifr->ifr_ifindex = dev->ifindex; | 5404 | ifr->ifr_ifindex = dev->ifindex; |
5394 | return 0; | 5405 | return 0; |
5395 | 5406 | ||
5396 | case SIOCGIFTXQLEN: | 5407 | case SIOCGIFTXQLEN: |
5397 | ifr->ifr_qlen = dev->tx_queue_len; | 5408 | ifr->ifr_qlen = dev->tx_queue_len; |
5398 | return 0; | 5409 | return 0; |
5399 | 5410 | ||
5400 | default: | 5411 | default: |
5401 | /* dev_ioctl() should ensure this case | 5412 | /* dev_ioctl() should ensure this case |
5402 | * is never reached | 5413 | * is never reached |
5403 | */ | 5414 | */ |
5404 | WARN_ON(1); | 5415 | WARN_ON(1); |
5405 | err = -ENOTTY; | 5416 | err = -ENOTTY; |
5406 | break; | 5417 | break; |
5407 | 5418 | ||
5408 | } | 5419 | } |
5409 | return err; | 5420 | return err; |
5410 | } | 5421 | } |
5411 | 5422 | ||
5412 | /* | 5423 | /* |
5413 | * Perform the SIOCxIFxxx calls, inside rtnl_lock() | 5424 | * Perform the SIOCxIFxxx calls, inside rtnl_lock() |
5414 | */ | 5425 | */ |
5415 | static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) | 5426 | static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) |
5416 | { | 5427 | { |
5417 | int err; | 5428 | int err; |
5418 | struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); | 5429 | struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); |
5419 | const struct net_device_ops *ops; | 5430 | const struct net_device_ops *ops; |
5420 | 5431 | ||
5421 | if (!dev) | 5432 | if (!dev) |
5422 | return -ENODEV; | 5433 | return -ENODEV; |
5423 | 5434 | ||
5424 | ops = dev->netdev_ops; | 5435 | ops = dev->netdev_ops; |
5425 | 5436 | ||
5426 | switch (cmd) { | 5437 | switch (cmd) { |
5427 | case SIOCSIFFLAGS: /* Set interface flags */ | 5438 | case SIOCSIFFLAGS: /* Set interface flags */ |
5428 | return dev_change_flags(dev, ifr->ifr_flags); | 5439 | return dev_change_flags(dev, ifr->ifr_flags); |
5429 | 5440 | ||
5430 | case SIOCSIFMETRIC: /* Set the metric on the interface | 5441 | case SIOCSIFMETRIC: /* Set the metric on the interface |
5431 | (currently unused) */ | 5442 | (currently unused) */ |
5432 | return -EOPNOTSUPP; | 5443 | return -EOPNOTSUPP; |
5433 | 5444 | ||
5434 | case SIOCSIFMTU: /* Set the MTU of a device */ | 5445 | case SIOCSIFMTU: /* Set the MTU of a device */ |
5435 | return dev_set_mtu(dev, ifr->ifr_mtu); | 5446 | return dev_set_mtu(dev, ifr->ifr_mtu); |
5436 | 5447 | ||
5437 | case SIOCSIFHWADDR: | 5448 | case SIOCSIFHWADDR: |
5438 | return dev_set_mac_address(dev, &ifr->ifr_hwaddr); | 5449 | return dev_set_mac_address(dev, &ifr->ifr_hwaddr); |
5439 | 5450 | ||
5440 | case SIOCSIFHWBROADCAST: | 5451 | case SIOCSIFHWBROADCAST: |
5441 | if (ifr->ifr_hwaddr.sa_family != dev->type) | 5452 | if (ifr->ifr_hwaddr.sa_family != dev->type) |
5442 | return -EINVAL; | 5453 | return -EINVAL; |
5443 | memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, | 5454 | memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, |
5444 | min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); | 5455 | min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); |
5445 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | 5456 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); |
5446 | return 0; | 5457 | return 0; |
5447 | 5458 | ||
5448 | case SIOCSIFMAP: | 5459 | case SIOCSIFMAP: |
5449 | if (ops->ndo_set_config) { | 5460 | if (ops->ndo_set_config) { |
5450 | if (!netif_device_present(dev)) | 5461 | if (!netif_device_present(dev)) |
5451 | return -ENODEV; | 5462 | return -ENODEV; |
5452 | return ops->ndo_set_config(dev, &ifr->ifr_map); | 5463 | return ops->ndo_set_config(dev, &ifr->ifr_map); |
5453 | } | 5464 | } |
5454 | return -EOPNOTSUPP; | 5465 | return -EOPNOTSUPP; |
5455 | 5466 | ||
5456 | case SIOCADDMULTI: | 5467 | case SIOCADDMULTI: |
5457 | if (!ops->ndo_set_rx_mode || | 5468 | if (!ops->ndo_set_rx_mode || |
5458 | ifr->ifr_hwaddr.sa_family != AF_UNSPEC) | 5469 | ifr->ifr_hwaddr.sa_family != AF_UNSPEC) |
5459 | return -EINVAL; | 5470 | return -EINVAL; |
5460 | if (!netif_device_present(dev)) | 5471 | if (!netif_device_present(dev)) |
5461 | return -ENODEV; | 5472 | return -ENODEV; |
5462 | return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); | 5473 | return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); |
5463 | 5474 | ||
5464 | case SIOCDELMULTI: | 5475 | case SIOCDELMULTI: |
5465 | if (!ops->ndo_set_rx_mode || | 5476 | if (!ops->ndo_set_rx_mode || |
5466 | ifr->ifr_hwaddr.sa_family != AF_UNSPEC) | 5477 | ifr->ifr_hwaddr.sa_family != AF_UNSPEC) |
5467 | return -EINVAL; | 5478 | return -EINVAL; |
5468 | if (!netif_device_present(dev)) | 5479 | if (!netif_device_present(dev)) |
5469 | return -ENODEV; | 5480 | return -ENODEV; |
5470 | return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); | 5481 | return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); |
5471 | 5482 | ||
5472 | case SIOCSIFTXQLEN: | 5483 | case SIOCSIFTXQLEN: |
5473 | if (ifr->ifr_qlen < 0) | 5484 | if (ifr->ifr_qlen < 0) |
5474 | return -EINVAL; | 5485 | return -EINVAL; |
5475 | dev->tx_queue_len = ifr->ifr_qlen; | 5486 | dev->tx_queue_len = ifr->ifr_qlen; |
5476 | return 0; | 5487 | return 0; |
5477 | 5488 | ||
5478 | case SIOCSIFNAME: | 5489 | case SIOCSIFNAME: |
5479 | ifr->ifr_newname[IFNAMSIZ-1] = '\0'; | 5490 | ifr->ifr_newname[IFNAMSIZ-1] = '\0'; |
5480 | return dev_change_name(dev, ifr->ifr_newname); | 5491 | return dev_change_name(dev, ifr->ifr_newname); |
5481 | 5492 | ||
5482 | case SIOCSHWTSTAMP: | 5493 | case SIOCSHWTSTAMP: |
5483 | err = net_hwtstamp_validate(ifr); | 5494 | err = net_hwtstamp_validate(ifr); |
5484 | if (err) | 5495 | if (err) |
5485 | return err; | 5496 | return err; |
5486 | /* fall through */ | 5497 | /* fall through */ |
5487 | 5498 | ||
5488 | /* | 5499 | /* |
5489 | * Unknown or private ioctl | 5500 | * Unknown or private ioctl |
5490 | */ | 5501 | */ |
5491 | default: | 5502 | default: |
5492 | if ((cmd >= SIOCDEVPRIVATE && | 5503 | if ((cmd >= SIOCDEVPRIVATE && |
5493 | cmd <= SIOCDEVPRIVATE + 15) || | 5504 | cmd <= SIOCDEVPRIVATE + 15) || |
5494 | cmd == SIOCBONDENSLAVE || | 5505 | cmd == SIOCBONDENSLAVE || |
5495 | cmd == SIOCBONDRELEASE || | 5506 | cmd == SIOCBONDRELEASE || |
5496 | cmd == SIOCBONDSETHWADDR || | 5507 | cmd == SIOCBONDSETHWADDR || |
5497 | cmd == SIOCBONDSLAVEINFOQUERY || | 5508 | cmd == SIOCBONDSLAVEINFOQUERY || |
5498 | cmd == SIOCBONDINFOQUERY || | 5509 | cmd == SIOCBONDINFOQUERY || |
5499 | cmd == SIOCBONDCHANGEACTIVE || | 5510 | cmd == SIOCBONDCHANGEACTIVE || |
5500 | cmd == SIOCGMIIPHY || | 5511 | cmd == SIOCGMIIPHY || |
5501 | cmd == SIOCGMIIREG || | 5512 | cmd == SIOCGMIIREG || |
5502 | cmd == SIOCSMIIREG || | 5513 | cmd == SIOCSMIIREG || |
5503 | cmd == SIOCBRADDIF || | 5514 | cmd == SIOCBRADDIF || |
5504 | cmd == SIOCBRDELIF || | 5515 | cmd == SIOCBRDELIF || |
5505 | cmd == SIOCSHWTSTAMP || | 5516 | cmd == SIOCSHWTSTAMP || |
5506 | cmd == SIOCWANDEV) { | 5517 | cmd == SIOCWANDEV) { |
5507 | err = -EOPNOTSUPP; | 5518 | err = -EOPNOTSUPP; |
5508 | if (ops->ndo_do_ioctl) { | 5519 | if (ops->ndo_do_ioctl) { |
5509 | if (netif_device_present(dev)) | 5520 | if (netif_device_present(dev)) |
5510 | err = ops->ndo_do_ioctl(dev, ifr, cmd); | 5521 | err = ops->ndo_do_ioctl(dev, ifr, cmd); |
5511 | else | 5522 | else |
5512 | err = -ENODEV; | 5523 | err = -ENODEV; |
5513 | } | 5524 | } |
5514 | } else | 5525 | } else |
5515 | err = -EINVAL; | 5526 | err = -EINVAL; |
5516 | 5527 | ||
5517 | } | 5528 | } |
5518 | return err; | 5529 | return err; |
5519 | } | 5530 | } |
5520 | 5531 | ||
5521 | /* | 5532 | /* |
5522 | * This function handles all "interface"-type I/O control requests. The actual | 5533 | * This function handles all "interface"-type I/O control requests. The actual |
5523 | * 'doing' part of this is dev_ifsioc above. | 5534 | * 'doing' part of this is dev_ifsioc above. |
5524 | */ | 5535 | */ |
5525 | 5536 | ||
5526 | /** | 5537 | /** |
5527 | * dev_ioctl - network device ioctl | 5538 | * dev_ioctl - network device ioctl |
5528 | * @net: the applicable net namespace | 5539 | * @net: the applicable net namespace |
5529 | * @cmd: command to issue | 5540 | * @cmd: command to issue |
5530 | * @arg: pointer to a struct ifreq in user space | 5541 | * @arg: pointer to a struct ifreq in user space |
5531 | * | 5542 | * |
5532 | * Issue ioctl functions to devices. This is normally called by the | 5543 | * Issue ioctl functions to devices. This is normally called by the |
5533 | * user space syscall interfaces but can sometimes be useful for | 5544 | * user space syscall interfaces but can sometimes be useful for |
5534 | * other purposes. The return value is the return from the syscall if | 5545 | * other purposes. The return value is the return from the syscall if |
5535 | * positive or a negative errno code on error. | 5546 | * positive or a negative errno code on error. |
5536 | */ | 5547 | */ |
5537 | 5548 | ||
5538 | int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | 5549 | int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) |
5539 | { | 5550 | { |
5540 | struct ifreq ifr; | 5551 | struct ifreq ifr; |
5541 | int ret; | 5552 | int ret; |
5542 | char *colon; | 5553 | char *colon; |
5543 | 5554 | ||
5544 | /* One special case: SIOCGIFCONF takes ifconf argument | 5555 | /* One special case: SIOCGIFCONF takes ifconf argument |
5545 | and requires shared lock, because it sleeps writing | 5556 | and requires shared lock, because it sleeps writing |
5546 | to user space. | 5557 | to user space. |
5547 | */ | 5558 | */ |
5548 | 5559 | ||
5549 | if (cmd == SIOCGIFCONF) { | 5560 | if (cmd == SIOCGIFCONF) { |
5550 | rtnl_lock(); | 5561 | rtnl_lock(); |
5551 | ret = dev_ifconf(net, (char __user *) arg); | 5562 | ret = dev_ifconf(net, (char __user *) arg); |
5552 | rtnl_unlock(); | 5563 | rtnl_unlock(); |
5553 | return ret; | 5564 | return ret; |
5554 | } | 5565 | } |
5555 | if (cmd == SIOCGIFNAME) | 5566 | if (cmd == SIOCGIFNAME) |
5556 | return dev_ifname(net, (struct ifreq __user *)arg); | 5567 | return dev_ifname(net, (struct ifreq __user *)arg); |
5557 | 5568 | ||
5558 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) | 5569 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) |
5559 | return -EFAULT; | 5570 | return -EFAULT; |
5560 | 5571 | ||
5561 | ifr.ifr_name[IFNAMSIZ-1] = 0; | 5572 | ifr.ifr_name[IFNAMSIZ-1] = 0; |
5562 | 5573 | ||
5563 | colon = strchr(ifr.ifr_name, ':'); | 5574 | colon = strchr(ifr.ifr_name, ':'); |
5564 | if (colon) | 5575 | if (colon) |
5565 | *colon = 0; | 5576 | *colon = 0; |
5566 | 5577 | ||
5567 | /* | 5578 | /* |
5568 | * See which interface the caller is talking about. | 5579 | * See which interface the caller is talking about. |
5569 | */ | 5580 | */ |
5570 | 5581 | ||
5571 | switch (cmd) { | 5582 | switch (cmd) { |
5572 | /* | 5583 | /* |
5573 | * These ioctl calls: | 5584 | * These ioctl calls: |
5574 | * - can be done by all. | 5585 | * - can be done by all. |
5575 | * - atomic and do not require locking. | 5586 | * - atomic and do not require locking. |
5576 | * - return a value | 5587 | * - return a value |
5577 | */ | 5588 | */ |
5578 | case SIOCGIFFLAGS: | 5589 | case SIOCGIFFLAGS: |
5579 | case SIOCGIFMETRIC: | 5590 | case SIOCGIFMETRIC: |
5580 | case SIOCGIFMTU: | 5591 | case SIOCGIFMTU: |
5581 | case SIOCGIFHWADDR: | 5592 | case SIOCGIFHWADDR: |
5582 | case SIOCGIFSLAVE: | 5593 | case SIOCGIFSLAVE: |
5583 | case SIOCGIFMAP: | 5594 | case SIOCGIFMAP: |
5584 | case SIOCGIFINDEX: | 5595 | case SIOCGIFINDEX: |
5585 | case SIOCGIFTXQLEN: | 5596 | case SIOCGIFTXQLEN: |
5586 | dev_load(net, ifr.ifr_name); | 5597 | dev_load(net, ifr.ifr_name); |
5587 | rcu_read_lock(); | 5598 | rcu_read_lock(); |
5588 | ret = dev_ifsioc_locked(net, &ifr, cmd); | 5599 | ret = dev_ifsioc_locked(net, &ifr, cmd); |
5589 | rcu_read_unlock(); | 5600 | rcu_read_unlock(); |
5590 | if (!ret) { | 5601 | if (!ret) { |
5591 | if (colon) | 5602 | if (colon) |
5592 | *colon = ':'; | 5603 | *colon = ':'; |
5593 | if (copy_to_user(arg, &ifr, | 5604 | if (copy_to_user(arg, &ifr, |
5594 | sizeof(struct ifreq))) | 5605 | sizeof(struct ifreq))) |
5595 | ret = -EFAULT; | 5606 | ret = -EFAULT; |
5596 | } | 5607 | } |
5597 | return ret; | 5608 | return ret; |
5598 | 5609 | ||
5599 | case SIOCETHTOOL: | 5610 | case SIOCETHTOOL: |
5600 | dev_load(net, ifr.ifr_name); | 5611 | dev_load(net, ifr.ifr_name); |
5601 | rtnl_lock(); | 5612 | rtnl_lock(); |
5602 | ret = dev_ethtool(net, &ifr); | 5613 | ret = dev_ethtool(net, &ifr); |
5603 | rtnl_unlock(); | 5614 | rtnl_unlock(); |
5604 | if (!ret) { | 5615 | if (!ret) { |
5605 | if (colon) | 5616 | if (colon) |
5606 | *colon = ':'; | 5617 | *colon = ':'; |
5607 | if (copy_to_user(arg, &ifr, | 5618 | if (copy_to_user(arg, &ifr, |
5608 | sizeof(struct ifreq))) | 5619 | sizeof(struct ifreq))) |
5609 | ret = -EFAULT; | 5620 | ret = -EFAULT; |
5610 | } | 5621 | } |
5611 | return ret; | 5622 | return ret; |
5612 | 5623 | ||
5613 | /* | 5624 | /* |
5614 | * These ioctl calls: | 5625 | * These ioctl calls: |
5615 | * - require superuser power. | 5626 | * - require superuser power. |
5616 | * - require strict serialization. | 5627 | * - require strict serialization. |
5617 | * - return a value | 5628 | * - return a value |
5618 | */ | 5629 | */ |
5619 | case SIOCGMIIPHY: | 5630 | case SIOCGMIIPHY: |
5620 | case SIOCGMIIREG: | 5631 | case SIOCGMIIREG: |
5621 | case SIOCSIFNAME: | 5632 | case SIOCSIFNAME: |
5622 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | 5633 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) |
5623 | return -EPERM; | 5634 | return -EPERM; |
5624 | dev_load(net, ifr.ifr_name); | 5635 | dev_load(net, ifr.ifr_name); |
5625 | rtnl_lock(); | 5636 | rtnl_lock(); |
5626 | ret = dev_ifsioc(net, &ifr, cmd); | 5637 | ret = dev_ifsioc(net, &ifr, cmd); |
5627 | rtnl_unlock(); | 5638 | rtnl_unlock(); |
5628 | if (!ret) { | 5639 | if (!ret) { |
5629 | if (colon) | 5640 | if (colon) |
5630 | *colon = ':'; | 5641 | *colon = ':'; |
5631 | if (copy_to_user(arg, &ifr, | 5642 | if (copy_to_user(arg, &ifr, |
5632 | sizeof(struct ifreq))) | 5643 | sizeof(struct ifreq))) |
5633 | ret = -EFAULT; | 5644 | ret = -EFAULT; |
5634 | } | 5645 | } |
5635 | return ret; | 5646 | return ret; |
5636 | 5647 | ||
5637 | /* | 5648 | /* |
5638 | * These ioctl calls: | 5649 | * These ioctl calls: |
5639 | * - require superuser power. | 5650 | * - require superuser power. |
5640 | * - require strict serialization. | 5651 | * - require strict serialization. |
5641 | * - do not return a value | 5652 | * - do not return a value |
5642 | */ | 5653 | */ |
5643 | case SIOCSIFMAP: | 5654 | case SIOCSIFMAP: |
5644 | case SIOCSIFTXQLEN: | 5655 | case SIOCSIFTXQLEN: |
5645 | if (!capable(CAP_NET_ADMIN)) | 5656 | if (!capable(CAP_NET_ADMIN)) |
5646 | return -EPERM; | 5657 | return -EPERM; |
5647 | /* fall through */ | 5658 | /* fall through */ |
5648 | /* | 5659 | /* |
5649 | * These ioctl calls: | 5660 | * These ioctl calls: |
5650 | * - require local superuser power. | 5661 | * - require local superuser power. |
5651 | * - require strict serialization. | 5662 | * - require strict serialization. |
5652 | * - do not return a value | 5663 | * - do not return a value |
5653 | */ | 5664 | */ |
5654 | case SIOCSIFFLAGS: | 5665 | case SIOCSIFFLAGS: |
5655 | case SIOCSIFMETRIC: | 5666 | case SIOCSIFMETRIC: |
5656 | case SIOCSIFMTU: | 5667 | case SIOCSIFMTU: |
5657 | case SIOCSIFHWADDR: | 5668 | case SIOCSIFHWADDR: |
5658 | case SIOCSIFSLAVE: | 5669 | case SIOCSIFSLAVE: |
5659 | case SIOCADDMULTI: | 5670 | case SIOCADDMULTI: |
5660 | case SIOCDELMULTI: | 5671 | case SIOCDELMULTI: |
5661 | case SIOCSIFHWBROADCAST: | 5672 | case SIOCSIFHWBROADCAST: |
5662 | case SIOCSMIIREG: | 5673 | case SIOCSMIIREG: |
5663 | case SIOCBONDENSLAVE: | 5674 | case SIOCBONDENSLAVE: |
5664 | case SIOCBONDRELEASE: | 5675 | case SIOCBONDRELEASE: |
5665 | case SIOCBONDSETHWADDR: | 5676 | case SIOCBONDSETHWADDR: |
5666 | case SIOCBONDCHANGEACTIVE: | 5677 | case SIOCBONDCHANGEACTIVE: |
5667 | case SIOCBRADDIF: | 5678 | case SIOCBRADDIF: |
5668 | case SIOCBRDELIF: | 5679 | case SIOCBRDELIF: |
5669 | case SIOCSHWTSTAMP: | 5680 | case SIOCSHWTSTAMP: |
5670 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | 5681 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) |
5671 | return -EPERM; | 5682 | return -EPERM; |
5672 | /* fall through */ | 5683 | /* fall through */ |
5673 | case SIOCBONDSLAVEINFOQUERY: | 5684 | case SIOCBONDSLAVEINFOQUERY: |
5674 | case SIOCBONDINFOQUERY: | 5685 | case SIOCBONDINFOQUERY: |
5675 | dev_load(net, ifr.ifr_name); | 5686 | dev_load(net, ifr.ifr_name); |
5676 | rtnl_lock(); | 5687 | rtnl_lock(); |
5677 | ret = dev_ifsioc(net, &ifr, cmd); | 5688 | ret = dev_ifsioc(net, &ifr, cmd); |
5678 | rtnl_unlock(); | 5689 | rtnl_unlock(); |
5679 | return ret; | 5690 | return ret; |
5680 | 5691 | ||
5681 | case SIOCGIFMEM: | 5692 | case SIOCGIFMEM: |
5682 | /* Get the per device memory space. We can add this but | 5693 | /* Get the per device memory space. We can add this but |
5683 | * currently do not support it */ | 5694 | * currently do not support it */ |
5684 | case SIOCSIFMEM: | 5695 | case SIOCSIFMEM: |
5685 | /* Set the per device memory buffer space. | 5696 | /* Set the per device memory buffer space. |
5686 | * Not applicable in our case */ | 5697 | * Not applicable in our case */ |
5687 | case SIOCSIFLINK: | 5698 | case SIOCSIFLINK: |
5688 | return -ENOTTY; | 5699 | return -ENOTTY; |
5689 | 5700 | ||
5690 | /* | 5701 | /* |
5691 | * Unknown or private ioctl. | 5702 | * Unknown or private ioctl. |
5692 | */ | 5703 | */ |
5693 | default: | 5704 | default: |
5694 | if (cmd == SIOCWANDEV || | 5705 | if (cmd == SIOCWANDEV || |
5695 | (cmd >= SIOCDEVPRIVATE && | 5706 | (cmd >= SIOCDEVPRIVATE && |
5696 | cmd <= SIOCDEVPRIVATE + 15)) { | 5707 | cmd <= SIOCDEVPRIVATE + 15)) { |
5697 | dev_load(net, ifr.ifr_name); | 5708 | dev_load(net, ifr.ifr_name); |
5698 | rtnl_lock(); | 5709 | rtnl_lock(); |
5699 | ret = dev_ifsioc(net, &ifr, cmd); | 5710 | ret = dev_ifsioc(net, &ifr, cmd); |
5700 | rtnl_unlock(); | 5711 | rtnl_unlock(); |
5701 | if (!ret && copy_to_user(arg, &ifr, | 5712 | if (!ret && copy_to_user(arg, &ifr, |
5702 | sizeof(struct ifreq))) | 5713 | sizeof(struct ifreq))) |
5703 | ret = -EFAULT; | 5714 | ret = -EFAULT; |
5704 | return ret; | 5715 | return ret; |
5705 | } | 5716 | } |
5706 | /* Take care of Wireless Extensions */ | 5717 | /* Take care of Wireless Extensions */ |
5707 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) | 5718 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) |
5708 | return wext_handle_ioctl(net, &ifr, cmd, arg); | 5719 | return wext_handle_ioctl(net, &ifr, cmd, arg); |
5709 | return -ENOTTY; | 5720 | return -ENOTTY; |
5710 | } | 5721 | } |
5711 | } | 5722 | } |
5712 | 5723 | ||
5713 | 5724 | ||
5714 | /** | 5725 | /** |
5715 | * dev_new_index - allocate an ifindex | 5726 | * dev_new_index - allocate an ifindex |
5716 | * @net: the applicable net namespace | 5727 | * @net: the applicable net namespace |
5717 | * | 5728 | * |
5718 | * Returns a suitable unique value for a new device interface | 5729 | * Returns a suitable unique value for a new device interface |
5719 | * number. The caller must hold the rtnl semaphore or the | 5730 | * number. The caller must hold the rtnl semaphore or the |
5720 | * dev_base_lock to be sure it remains unique. | 5731 | * dev_base_lock to be sure it remains unique. |
5721 | */ | 5732 | */ |
5722 | static int dev_new_index(struct net *net) | 5733 | static int dev_new_index(struct net *net) |
5723 | { | 5734 | { |
5724 | int ifindex = net->ifindex; | 5735 | int ifindex = net->ifindex; |
5725 | for (;;) { | 5736 | for (;;) { |
5726 | if (++ifindex <= 0) | 5737 | if (++ifindex <= 0) |
5727 | ifindex = 1; | 5738 | ifindex = 1; |
5728 | if (!__dev_get_by_index(net, ifindex)) | 5739 | if (!__dev_get_by_index(net, ifindex)) |
5729 | return net->ifindex = ifindex; | 5740 | return net->ifindex = ifindex; |
5730 | } | 5741 | } |
5731 | } | 5742 | } |
5732 | 5743 | ||
5733 | /* Delayed registration/unregisteration */ | 5744 | /* Delayed registration/unregisteration */ |
5734 | static LIST_HEAD(net_todo_list); | 5745 | static LIST_HEAD(net_todo_list); |
5735 | 5746 | ||
5736 | static void net_set_todo(struct net_device *dev) | 5747 | static void net_set_todo(struct net_device *dev) |
5737 | { | 5748 | { |
5738 | list_add_tail(&dev->todo_list, &net_todo_list); | 5749 | list_add_tail(&dev->todo_list, &net_todo_list); |
5739 | } | 5750 | } |
5740 | 5751 | ||
5741 | static void rollback_registered_many(struct list_head *head) | 5752 | static void rollback_registered_many(struct list_head *head) |
5742 | { | 5753 | { |
5743 | struct net_device *dev, *tmp; | 5754 | struct net_device *dev, *tmp; |
5744 | 5755 | ||
5745 | BUG_ON(dev_boot_phase); | 5756 | BUG_ON(dev_boot_phase); |
5746 | ASSERT_RTNL(); | 5757 | ASSERT_RTNL(); |
5747 | 5758 | ||
5748 | list_for_each_entry_safe(dev, tmp, head, unreg_list) { | 5759 | list_for_each_entry_safe(dev, tmp, head, unreg_list) { |
5749 | /* Some devices call without registering | 5760 | /* Some devices call without registering |
5750 | * for initialization unwind. Remove those | 5761 | * for initialization unwind. Remove those |
5751 | * devices and proceed with the remaining. | 5762 | * devices and proceed with the remaining. |
5752 | */ | 5763 | */ |
5753 | if (dev->reg_state == NETREG_UNINITIALIZED) { | 5764 | if (dev->reg_state == NETREG_UNINITIALIZED) { |
5754 | pr_debug("unregister_netdevice: device %s/%p never was registered\n", | 5765 | pr_debug("unregister_netdevice: device %s/%p never was registered\n", |
5755 | dev->name, dev); | 5766 | dev->name, dev); |
5756 | 5767 | ||
5757 | WARN_ON(1); | 5768 | WARN_ON(1); |
5758 | list_del(&dev->unreg_list); | 5769 | list_del(&dev->unreg_list); |
5759 | continue; | 5770 | continue; |
5760 | } | 5771 | } |
5761 | dev->dismantle = true; | 5772 | dev->dismantle = true; |
5762 | BUG_ON(dev->reg_state != NETREG_REGISTERED); | 5773 | BUG_ON(dev->reg_state != NETREG_REGISTERED); |
5763 | } | 5774 | } |
5764 | 5775 | ||
5765 | /* If device is running, close it first. */ | 5776 | /* If device is running, close it first. */ |
5766 | dev_close_many(head); | 5777 | dev_close_many(head); |
5767 | 5778 | ||
5768 | list_for_each_entry(dev, head, unreg_list) { | 5779 | list_for_each_entry(dev, head, unreg_list) { |
5769 | /* And unlink it from device chain. */ | 5780 | /* And unlink it from device chain. */ |
5770 | unlist_netdevice(dev); | 5781 | unlist_netdevice(dev); |
5771 | 5782 | ||
5772 | dev->reg_state = NETREG_UNREGISTERING; | 5783 | dev->reg_state = NETREG_UNREGISTERING; |
5773 | } | 5784 | } |
5774 | 5785 | ||
5775 | synchronize_net(); | 5786 | synchronize_net(); |
5776 | 5787 | ||
5777 | list_for_each_entry(dev, head, unreg_list) { | 5788 | list_for_each_entry(dev, head, unreg_list) { |
5778 | /* Shutdown queueing discipline. */ | 5789 | /* Shutdown queueing discipline. */ |
5779 | dev_shutdown(dev); | 5790 | dev_shutdown(dev); |
5780 | 5791 | ||
5781 | 5792 | ||
5782 | /* Notify protocols, that we are about to destroy | 5793 | /* Notify protocols, that we are about to destroy |
5783 | this device. They should clean all the things. | 5794 | this device. They should clean all the things. |
5784 | */ | 5795 | */ |
5785 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 5796 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
5786 | 5797 | ||
5787 | if (!dev->rtnl_link_ops || | 5798 | if (!dev->rtnl_link_ops || |
5788 | dev->rtnl_link_state == RTNL_LINK_INITIALIZED) | 5799 | dev->rtnl_link_state == RTNL_LINK_INITIALIZED) |
5789 | rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); | 5800 | rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); |
5790 | 5801 | ||
5791 | /* | 5802 | /* |
5792 | * Flush the unicast and multicast chains | 5803 | * Flush the unicast and multicast chains |
5793 | */ | 5804 | */ |
5794 | dev_uc_flush(dev); | 5805 | dev_uc_flush(dev); |
5795 | dev_mc_flush(dev); | 5806 | dev_mc_flush(dev); |
5796 | 5807 | ||
5797 | if (dev->netdev_ops->ndo_uninit) | 5808 | if (dev->netdev_ops->ndo_uninit) |
5798 | dev->netdev_ops->ndo_uninit(dev); | 5809 | dev->netdev_ops->ndo_uninit(dev); |
5799 | 5810 | ||
5800 | /* Notifier chain MUST detach us all upper devices. */ | 5811 | /* Notifier chain MUST detach us all upper devices. */ |
5801 | WARN_ON(netdev_has_any_upper_dev(dev)); | 5812 | WARN_ON(netdev_has_any_upper_dev(dev)); |
5802 | 5813 | ||
5803 | /* Remove entries from kobject tree */ | 5814 | /* Remove entries from kobject tree */ |
5804 | netdev_unregister_kobject(dev); | 5815 | netdev_unregister_kobject(dev); |
5805 | #ifdef CONFIG_XPS | 5816 | #ifdef CONFIG_XPS |
5806 | /* Remove XPS queueing entries */ | 5817 | /* Remove XPS queueing entries */ |
5807 | netif_reset_xps_queues_gt(dev, 0); | 5818 | netif_reset_xps_queues_gt(dev, 0); |
5808 | #endif | 5819 | #endif |
5809 | } | 5820 | } |
5810 | 5821 | ||
5811 | synchronize_net(); | 5822 | synchronize_net(); |
5812 | 5823 | ||
5813 | list_for_each_entry(dev, head, unreg_list) | 5824 | list_for_each_entry(dev, head, unreg_list) |
5814 | dev_put(dev); | 5825 | dev_put(dev); |
5815 | } | 5826 | } |
5816 | 5827 | ||
5817 | static void rollback_registered(struct net_device *dev) | 5828 | static void rollback_registered(struct net_device *dev) |
5818 | { | 5829 | { |
5819 | LIST_HEAD(single); | 5830 | LIST_HEAD(single); |
5820 | 5831 | ||
5821 | list_add(&dev->unreg_list, &single); | 5832 | list_add(&dev->unreg_list, &single); |
5822 | rollback_registered_many(&single); | 5833 | rollback_registered_many(&single); |
5823 | list_del(&single); | 5834 | list_del(&single); |
5824 | } | 5835 | } |
5825 | 5836 | ||
5826 | static netdev_features_t netdev_fix_features(struct net_device *dev, | 5837 | static netdev_features_t netdev_fix_features(struct net_device *dev, |
5827 | netdev_features_t features) | 5838 | netdev_features_t features) |
5828 | { | 5839 | { |
5829 | /* Fix illegal checksum combinations */ | 5840 | /* Fix illegal checksum combinations */ |
5830 | if ((features & NETIF_F_HW_CSUM) && | 5841 | if ((features & NETIF_F_HW_CSUM) && |
5831 | (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | 5842 | (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { |
5832 | netdev_warn(dev, "mixed HW and IP checksum settings.\n"); | 5843 | netdev_warn(dev, "mixed HW and IP checksum settings.\n"); |
5833 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); | 5844 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); |
5834 | } | 5845 | } |
5835 | 5846 | ||
5836 | /* Fix illegal SG+CSUM combinations. */ | 5847 | /* Fix illegal SG+CSUM combinations. */ |
5837 | if ((features & NETIF_F_SG) && | 5848 | if ((features & NETIF_F_SG) && |
5838 | !(features & NETIF_F_ALL_CSUM)) { | 5849 | !(features & NETIF_F_ALL_CSUM)) { |
5839 | netdev_dbg(dev, | 5850 | netdev_dbg(dev, |
5840 | "Dropping NETIF_F_SG since no checksum feature.\n"); | 5851 | "Dropping NETIF_F_SG since no checksum feature.\n"); |
5841 | features &= ~NETIF_F_SG; | 5852 | features &= ~NETIF_F_SG; |
5842 | } | 5853 | } |
5843 | 5854 | ||
5844 | /* TSO requires that SG is present as well. */ | 5855 | /* TSO requires that SG is present as well. */ |
5845 | if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { | 5856 | if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { |
5846 | netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); | 5857 | netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); |
5847 | features &= ~NETIF_F_ALL_TSO; | 5858 | features &= ~NETIF_F_ALL_TSO; |
5848 | } | 5859 | } |
5849 | 5860 | ||
5850 | /* TSO ECN requires that TSO is present as well. */ | 5861 | /* TSO ECN requires that TSO is present as well. */ |
5851 | if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) | 5862 | if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) |
5852 | features &= ~NETIF_F_TSO_ECN; | 5863 | features &= ~NETIF_F_TSO_ECN; |
5853 | 5864 | ||
5854 | /* Software GSO depends on SG. */ | 5865 | /* Software GSO depends on SG. */ |
5855 | if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { | 5866 | if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { |
5856 | netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); | 5867 | netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); |
5857 | features &= ~NETIF_F_GSO; | 5868 | features &= ~NETIF_F_GSO; |
5858 | } | 5869 | } |
5859 | 5870 | ||
5860 | /* UFO needs SG and checksumming */ | 5871 | /* UFO needs SG and checksumming */ |
5861 | if (features & NETIF_F_UFO) { | 5872 | if (features & NETIF_F_UFO) { |
5862 | /* maybe split UFO into V4 and V6? */ | 5873 | /* maybe split UFO into V4 and V6? */ |
5863 | if (!((features & NETIF_F_GEN_CSUM) || | 5874 | if (!((features & NETIF_F_GEN_CSUM) || |
5864 | (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) | 5875 | (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) |
5865 | == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | 5876 | == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { |
5866 | netdev_dbg(dev, | 5877 | netdev_dbg(dev, |
5867 | "Dropping NETIF_F_UFO since no checksum offload features.\n"); | 5878 | "Dropping NETIF_F_UFO since no checksum offload features.\n"); |
5868 | features &= ~NETIF_F_UFO; | 5879 | features &= ~NETIF_F_UFO; |
5869 | } | 5880 | } |
5870 | 5881 | ||
5871 | if (!(features & NETIF_F_SG)) { | 5882 | if (!(features & NETIF_F_SG)) { |
5872 | netdev_dbg(dev, | 5883 | netdev_dbg(dev, |
5873 | "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); | 5884 | "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); |
5874 | features &= ~NETIF_F_UFO; | 5885 | features &= ~NETIF_F_UFO; |
5875 | } | 5886 | } |
5876 | } | 5887 | } |
5877 | 5888 | ||
5878 | return features; | 5889 | return features; |
5879 | } | 5890 | } |
5880 | 5891 | ||
5881 | int __netdev_update_features(struct net_device *dev) | 5892 | int __netdev_update_features(struct net_device *dev) |
5882 | { | 5893 | { |
5883 | netdev_features_t features; | 5894 | netdev_features_t features; |
5884 | int err = 0; | 5895 | int err = 0; |
5885 | 5896 | ||
5886 | ASSERT_RTNL(); | 5897 | ASSERT_RTNL(); |
5887 | 5898 | ||
5888 | features = netdev_get_wanted_features(dev); | 5899 | features = netdev_get_wanted_features(dev); |
5889 | 5900 | ||
5890 | if (dev->netdev_ops->ndo_fix_features) | 5901 | if (dev->netdev_ops->ndo_fix_features) |
5891 | features = dev->netdev_ops->ndo_fix_features(dev, features); | 5902 | features = dev->netdev_ops->ndo_fix_features(dev, features); |
5892 | 5903 | ||
5893 | /* driver might be less strict about feature dependencies */ | 5904 | /* driver might be less strict about feature dependencies */ |
5894 | features = netdev_fix_features(dev, features); | 5905 | features = netdev_fix_features(dev, features); |
5895 | 5906 | ||
5896 | if (dev->features == features) | 5907 | if (dev->features == features) |
5897 | return 0; | 5908 | return 0; |
5898 | 5909 | ||
5899 | netdev_dbg(dev, "Features changed: %pNF -> %pNF\n", | 5910 | netdev_dbg(dev, "Features changed: %pNF -> %pNF\n", |
5900 | &dev->features, &features); | 5911 | &dev->features, &features); |
5901 | 5912 | ||
5902 | if (dev->netdev_ops->ndo_set_features) | 5913 | if (dev->netdev_ops->ndo_set_features) |
5903 | err = dev->netdev_ops->ndo_set_features(dev, features); | 5914 | err = dev->netdev_ops->ndo_set_features(dev, features); |
5904 | 5915 | ||
5905 | if (unlikely(err < 0)) { | 5916 | if (unlikely(err < 0)) { |
5906 | netdev_err(dev, | 5917 | netdev_err(dev, |
5907 | "set_features() failed (%d); wanted %pNF, left %pNF\n", | 5918 | "set_features() failed (%d); wanted %pNF, left %pNF\n", |
5908 | err, &features, &dev->features); | 5919 | err, &features, &dev->features); |
5909 | return -1; | 5920 | return -1; |
5910 | } | 5921 | } |
5911 | 5922 | ||
5912 | if (!err) | 5923 | if (!err) |
5913 | dev->features = features; | 5924 | dev->features = features; |
5914 | 5925 | ||
5915 | return 1; | 5926 | return 1; |
5916 | } | 5927 | } |
5917 | 5928 | ||
5918 | /** | 5929 | /** |
5919 | * netdev_update_features - recalculate device features | 5930 | * netdev_update_features - recalculate device features |
5920 | * @dev: the device to check | 5931 | * @dev: the device to check |
5921 | * | 5932 | * |
5922 | * Recalculate dev->features set and send notifications if it | 5933 | * Recalculate dev->features set and send notifications if it |
5923 | * has changed. Should be called after driver or hardware dependent | 5934 | * has changed. Should be called after driver or hardware dependent |
5924 | * conditions might have changed that influence the features. | 5935 | * conditions might have changed that influence the features. |
5925 | */ | 5936 | */ |
5926 | void netdev_update_features(struct net_device *dev) | 5937 | void netdev_update_features(struct net_device *dev) |
5927 | { | 5938 | { |
5928 | if (__netdev_update_features(dev)) | 5939 | if (__netdev_update_features(dev)) |
5929 | netdev_features_change(dev); | 5940 | netdev_features_change(dev); |
5930 | } | 5941 | } |
5931 | EXPORT_SYMBOL(netdev_update_features); | 5942 | EXPORT_SYMBOL(netdev_update_features); |
5932 | 5943 | ||
5933 | /** | 5944 | /** |
5934 | * netdev_change_features - recalculate device features | 5945 | * netdev_change_features - recalculate device features |
5935 | * @dev: the device to check | 5946 | * @dev: the device to check |
5936 | * | 5947 | * |
5937 | * Recalculate dev->features set and send notifications even | 5948 | * Recalculate dev->features set and send notifications even |
5938 | * if they have not changed. Should be called instead of | 5949 | * if they have not changed. Should be called instead of |
5939 | * netdev_update_features() if also dev->vlan_features might | 5950 | * netdev_update_features() if also dev->vlan_features might |
5940 | * have changed to allow the changes to be propagated to stacked | 5951 | * have changed to allow the changes to be propagated to stacked |
5941 | * VLAN devices. | 5952 | * VLAN devices. |
5942 | */ | 5953 | */ |
5943 | void netdev_change_features(struct net_device *dev) | 5954 | void netdev_change_features(struct net_device *dev) |
5944 | { | 5955 | { |
5945 | __netdev_update_features(dev); | 5956 | __netdev_update_features(dev); |
5946 | netdev_features_change(dev); | 5957 | netdev_features_change(dev); |
5947 | } | 5958 | } |
5948 | EXPORT_SYMBOL(netdev_change_features); | 5959 | EXPORT_SYMBOL(netdev_change_features); |
5949 | 5960 | ||
5950 | /** | 5961 | /** |
5951 | * netif_stacked_transfer_operstate - transfer operstate | 5962 | * netif_stacked_transfer_operstate - transfer operstate |
5952 | * @rootdev: the root or lower level device to transfer state from | 5963 | * @rootdev: the root or lower level device to transfer state from |
5953 | * @dev: the device to transfer operstate to | 5964 | * @dev: the device to transfer operstate to |
5954 | * | 5965 | * |
5955 | * Transfer operational state from root to device. This is normally | 5966 | * Transfer operational state from root to device. This is normally |
5956 | * called when a stacking relationship exists between the root | 5967 | * called when a stacking relationship exists between the root |
5957 | * device and the device(a leaf device). | 5968 | * device and the device(a leaf device). |
5958 | */ | 5969 | */ |
5959 | void netif_stacked_transfer_operstate(const struct net_device *rootdev, | 5970 | void netif_stacked_transfer_operstate(const struct net_device *rootdev, |
5960 | struct net_device *dev) | 5971 | struct net_device *dev) |
5961 | { | 5972 | { |
5962 | if (rootdev->operstate == IF_OPER_DORMANT) | 5973 | if (rootdev->operstate == IF_OPER_DORMANT) |
5963 | netif_dormant_on(dev); | 5974 | netif_dormant_on(dev); |
5964 | else | 5975 | else |
5965 | netif_dormant_off(dev); | 5976 | netif_dormant_off(dev); |
5966 | 5977 | ||
5967 | if (netif_carrier_ok(rootdev)) { | 5978 | if (netif_carrier_ok(rootdev)) { |
5968 | if (!netif_carrier_ok(dev)) | 5979 | if (!netif_carrier_ok(dev)) |
5969 | netif_carrier_on(dev); | 5980 | netif_carrier_on(dev); |
5970 | } else { | 5981 | } else { |
5971 | if (netif_carrier_ok(dev)) | 5982 | if (netif_carrier_ok(dev)) |
5972 | netif_carrier_off(dev); | 5983 | netif_carrier_off(dev); |
5973 | } | 5984 | } |
5974 | } | 5985 | } |
5975 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); | 5986 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); |
5976 | 5987 | ||
5977 | #ifdef CONFIG_RPS | 5988 | #ifdef CONFIG_RPS |
5978 | static int netif_alloc_rx_queues(struct net_device *dev) | 5989 | static int netif_alloc_rx_queues(struct net_device *dev) |
5979 | { | 5990 | { |
5980 | unsigned int i, count = dev->num_rx_queues; | 5991 | unsigned int i, count = dev->num_rx_queues; |
5981 | struct netdev_rx_queue *rx; | 5992 | struct netdev_rx_queue *rx; |
5982 | 5993 | ||
5983 | BUG_ON(count < 1); | 5994 | BUG_ON(count < 1); |
5984 | 5995 | ||
5985 | rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); | 5996 | rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); |
5986 | if (!rx) | 5997 | if (!rx) |
5987 | return -ENOMEM; | 5998 | return -ENOMEM; |
5988 | 5999 | ||
5989 | dev->_rx = rx; | 6000 | dev->_rx = rx; |
5990 | 6001 | ||
5991 | for (i = 0; i < count; i++) | 6002 | for (i = 0; i < count; i++) |
5992 | rx[i].dev = dev; | 6003 | rx[i].dev = dev; |
5993 | return 0; | 6004 | return 0; |
5994 | } | 6005 | } |
5995 | #endif | 6006 | #endif |
5996 | 6007 | ||
5997 | static void netdev_init_one_queue(struct net_device *dev, | 6008 | static void netdev_init_one_queue(struct net_device *dev, |
5998 | struct netdev_queue *queue, void *_unused) | 6009 | struct netdev_queue *queue, void *_unused) |
5999 | { | 6010 | { |
6000 | /* Initialize queue lock */ | 6011 | /* Initialize queue lock */ |
6001 | spin_lock_init(&queue->_xmit_lock); | 6012 | spin_lock_init(&queue->_xmit_lock); |
6002 | netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); | 6013 | netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); |
6003 | queue->xmit_lock_owner = -1; | 6014 | queue->xmit_lock_owner = -1; |
6004 | netdev_queue_numa_node_write(queue, NUMA_NO_NODE); | 6015 | netdev_queue_numa_node_write(queue, NUMA_NO_NODE); |
6005 | queue->dev = dev; | 6016 | queue->dev = dev; |
6006 | #ifdef CONFIG_BQL | 6017 | #ifdef CONFIG_BQL |
6007 | dql_init(&queue->dql, HZ); | 6018 | dql_init(&queue->dql, HZ); |
6008 | #endif | 6019 | #endif |
6009 | } | 6020 | } |
6010 | 6021 | ||
6011 | static int netif_alloc_netdev_queues(struct net_device *dev) | 6022 | static int netif_alloc_netdev_queues(struct net_device *dev) |
6012 | { | 6023 | { |
6013 | unsigned int count = dev->num_tx_queues; | 6024 | unsigned int count = dev->num_tx_queues; |
6014 | struct netdev_queue *tx; | 6025 | struct netdev_queue *tx; |
6015 | 6026 | ||
6016 | BUG_ON(count < 1); | 6027 | BUG_ON(count < 1); |
6017 | 6028 | ||
6018 | tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); | 6029 | tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); |
6019 | if (!tx) | 6030 | if (!tx) |
6020 | return -ENOMEM; | 6031 | return -ENOMEM; |
6021 | 6032 | ||
6022 | dev->_tx = tx; | 6033 | dev->_tx = tx; |
6023 | 6034 | ||
6024 | netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); | 6035 | netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); |
6025 | spin_lock_init(&dev->tx_global_lock); | 6036 | spin_lock_init(&dev->tx_global_lock); |
6026 | 6037 | ||
6027 | return 0; | 6038 | return 0; |
6028 | } | 6039 | } |
6029 | 6040 | ||
6030 | /** | 6041 | /** |
6031 | * register_netdevice - register a network device | 6042 | * register_netdevice - register a network device |
6032 | * @dev: device to register | 6043 | * @dev: device to register |
6033 | * | 6044 | * |
6034 | * Take a completed network device structure and add it to the kernel | 6045 | * Take a completed network device structure and add it to the kernel |
6035 | * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier | 6046 | * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier |
6036 | * chain. 0 is returned on success. A negative errno code is returned | 6047 | * chain. 0 is returned on success. A negative errno code is returned |
6037 | * on a failure to set up the device, or if the name is a duplicate. | 6048 | * on a failure to set up the device, or if the name is a duplicate. |
6038 | * | 6049 | * |
6039 | * Callers must hold the rtnl semaphore. You may want | 6050 | * Callers must hold the rtnl semaphore. You may want |
6040 | * register_netdev() instead of this. | 6051 | * register_netdev() instead of this. |
6041 | * | 6052 | * |
6042 | * BUGS: | 6053 | * BUGS: |
6043 | * The locking appears insufficient to guarantee two parallel registers | 6054 | * The locking appears insufficient to guarantee two parallel registers |
6044 | * will not get the same name. | 6055 | * will not get the same name. |
6045 | */ | 6056 | */ |
6046 | 6057 | ||
6047 | int register_netdevice(struct net_device *dev) | 6058 | int register_netdevice(struct net_device *dev) |
6048 | { | 6059 | { |
6049 | int ret; | 6060 | int ret; |
6050 | struct net *net = dev_net(dev); | 6061 | struct net *net = dev_net(dev); |
6051 | 6062 | ||
6052 | BUG_ON(dev_boot_phase); | 6063 | BUG_ON(dev_boot_phase); |
6053 | ASSERT_RTNL(); | 6064 | ASSERT_RTNL(); |
6054 | 6065 | ||
6055 | might_sleep(); | 6066 | might_sleep(); |
6056 | 6067 | ||
6057 | /* When net_device's are persistent, this will be fatal. */ | 6068 | /* When net_device's are persistent, this will be fatal. */ |
6058 | BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); | 6069 | BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); |
6059 | BUG_ON(!net); | 6070 | BUG_ON(!net); |
6060 | 6071 | ||
6061 | spin_lock_init(&dev->addr_list_lock); | 6072 | spin_lock_init(&dev->addr_list_lock); |
6062 | netdev_set_addr_lockdep_class(dev); | 6073 | netdev_set_addr_lockdep_class(dev); |
6063 | 6074 | ||
6064 | dev->iflink = -1; | 6075 | dev->iflink = -1; |
6065 | 6076 | ||
6066 | ret = dev_get_valid_name(net, dev, dev->name); | 6077 | ret = dev_get_valid_name(net, dev, dev->name); |
6067 | if (ret < 0) | 6078 | if (ret < 0) |
6068 | goto out; | 6079 | goto out; |
6069 | 6080 | ||
6070 | /* Init, if this function is available */ | 6081 | /* Init, if this function is available */ |
6071 | if (dev->netdev_ops->ndo_init) { | 6082 | if (dev->netdev_ops->ndo_init) { |
6072 | ret = dev->netdev_ops->ndo_init(dev); | 6083 | ret = dev->netdev_ops->ndo_init(dev); |
6073 | if (ret) { | 6084 | if (ret) { |
6074 | if (ret > 0) | 6085 | if (ret > 0) |
6075 | ret = -EIO; | 6086 | ret = -EIO; |
6076 | goto out; | 6087 | goto out; |
6077 | } | 6088 | } |
6078 | } | 6089 | } |
6079 | 6090 | ||
6080 | if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) && | 6091 | if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) && |
6081 | (!dev->netdev_ops->ndo_vlan_rx_add_vid || | 6092 | (!dev->netdev_ops->ndo_vlan_rx_add_vid || |
6082 | !dev->netdev_ops->ndo_vlan_rx_kill_vid)) { | 6093 | !dev->netdev_ops->ndo_vlan_rx_kill_vid)) { |
6083 | netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n"); | 6094 | netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n"); |
6084 | ret = -EINVAL; | 6095 | ret = -EINVAL; |
6085 | goto err_uninit; | 6096 | goto err_uninit; |
6086 | } | 6097 | } |
6087 | 6098 | ||
6088 | ret = -EBUSY; | 6099 | ret = -EBUSY; |
6089 | if (!dev->ifindex) | 6100 | if (!dev->ifindex) |
6090 | dev->ifindex = dev_new_index(net); | 6101 | dev->ifindex = dev_new_index(net); |
6091 | else if (__dev_get_by_index(net, dev->ifindex)) | 6102 | else if (__dev_get_by_index(net, dev->ifindex)) |
6092 | goto err_uninit; | 6103 | goto err_uninit; |
6093 | 6104 | ||
6094 | if (dev->iflink == -1) | 6105 | if (dev->iflink == -1) |
6095 | dev->iflink = dev->ifindex; | 6106 | dev->iflink = dev->ifindex; |
6096 | 6107 | ||
6097 | /* Transfer changeable features to wanted_features and enable | 6108 | /* Transfer changeable features to wanted_features and enable |
6098 | * software offloads (GSO and GRO). | 6109 | * software offloads (GSO and GRO). |
6099 | */ | 6110 | */ |
6100 | dev->hw_features |= NETIF_F_SOFT_FEATURES; | 6111 | dev->hw_features |= NETIF_F_SOFT_FEATURES; |
6101 | dev->features |= NETIF_F_SOFT_FEATURES; | 6112 | dev->features |= NETIF_F_SOFT_FEATURES; |
6102 | dev->wanted_features = dev->features & dev->hw_features; | 6113 | dev->wanted_features = dev->features & dev->hw_features; |
6103 | 6114 | ||
6104 | /* Turn on no cache copy if HW is doing checksum */ | 6115 | /* Turn on no cache copy if HW is doing checksum */ |
6105 | if (!(dev->flags & IFF_LOOPBACK)) { | 6116 | if (!(dev->flags & IFF_LOOPBACK)) { |
6106 | dev->hw_features |= NETIF_F_NOCACHE_COPY; | 6117 | dev->hw_features |= NETIF_F_NOCACHE_COPY; |
6107 | if (dev->features & NETIF_F_ALL_CSUM) { | 6118 | if (dev->features & NETIF_F_ALL_CSUM) { |
6108 | dev->wanted_features |= NETIF_F_NOCACHE_COPY; | 6119 | dev->wanted_features |= NETIF_F_NOCACHE_COPY; |
6109 | dev->features |= NETIF_F_NOCACHE_COPY; | 6120 | dev->features |= NETIF_F_NOCACHE_COPY; |
6110 | } | 6121 | } |
6111 | } | 6122 | } |
6112 | 6123 | ||
6113 | /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. | 6124 | /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. |
6114 | */ | 6125 | */ |
6115 | dev->vlan_features |= NETIF_F_HIGHDMA; | 6126 | dev->vlan_features |= NETIF_F_HIGHDMA; |
6116 | 6127 | ||
6117 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); | 6128 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); |
6118 | ret = notifier_to_errno(ret); | 6129 | ret = notifier_to_errno(ret); |
6119 | if (ret) | 6130 | if (ret) |
6120 | goto err_uninit; | 6131 | goto err_uninit; |
6121 | 6132 | ||
6122 | ret = netdev_register_kobject(dev); | 6133 | ret = netdev_register_kobject(dev); |
6123 | if (ret) | 6134 | if (ret) |
6124 | goto err_uninit; | 6135 | goto err_uninit; |
6125 | dev->reg_state = NETREG_REGISTERED; | 6136 | dev->reg_state = NETREG_REGISTERED; |
6126 | 6137 | ||
6127 | __netdev_update_features(dev); | 6138 | __netdev_update_features(dev); |
6128 | 6139 | ||
6129 | /* | 6140 | /* |
6130 | * Default initial state at registry is that the | 6141 | * Default initial state at registry is that the |
6131 | * device is present. | 6142 | * device is present. |
6132 | */ | 6143 | */ |
6133 | 6144 | ||
6134 | set_bit(__LINK_STATE_PRESENT, &dev->state); | 6145 | set_bit(__LINK_STATE_PRESENT, &dev->state); |
6135 | 6146 | ||
6136 | linkwatch_init_dev(dev); | 6147 | linkwatch_init_dev(dev); |
6137 | 6148 | ||
6138 | dev_init_scheduler(dev); | 6149 | dev_init_scheduler(dev); |
6139 | dev_hold(dev); | 6150 | dev_hold(dev); |
6140 | list_netdevice(dev); | 6151 | list_netdevice(dev); |
6141 | add_device_randomness(dev->dev_addr, dev->addr_len); | 6152 | add_device_randomness(dev->dev_addr, dev->addr_len); |
6142 | 6153 | ||
6143 | /* If the device has permanent device address, driver should | 6154 | /* If the device has permanent device address, driver should |
6144 | * set dev_addr and also addr_assign_type should be set to | 6155 | * set dev_addr and also addr_assign_type should be set to |
6145 | * NET_ADDR_PERM (default value). | 6156 | * NET_ADDR_PERM (default value). |
6146 | */ | 6157 | */ |
6147 | if (dev->addr_assign_type == NET_ADDR_PERM) | 6158 | if (dev->addr_assign_type == NET_ADDR_PERM) |
6148 | memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); | 6159 | memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); |
6149 | 6160 | ||
6150 | /* Notify protocols, that a new device appeared. */ | 6161 | /* Notify protocols, that a new device appeared. */ |
6151 | ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); | 6162 | ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); |
6152 | ret = notifier_to_errno(ret); | 6163 | ret = notifier_to_errno(ret); |
6153 | if (ret) { | 6164 | if (ret) { |
6154 | rollback_registered(dev); | 6165 | rollback_registered(dev); |
6155 | dev->reg_state = NETREG_UNREGISTERED; | 6166 | dev->reg_state = NETREG_UNREGISTERED; |
6156 | } | 6167 | } |
6157 | /* | 6168 | /* |
6158 | * Prevent userspace races by waiting until the network | 6169 | * Prevent userspace races by waiting until the network |
6159 | * device is fully setup before sending notifications. | 6170 | * device is fully setup before sending notifications. |
6160 | */ | 6171 | */ |
6161 | if (!dev->rtnl_link_ops || | 6172 | if (!dev->rtnl_link_ops || |
6162 | dev->rtnl_link_state == RTNL_LINK_INITIALIZED) | 6173 | dev->rtnl_link_state == RTNL_LINK_INITIALIZED) |
6163 | rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); | 6174 | rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); |
6164 | 6175 | ||
6165 | out: | 6176 | out: |
6166 | return ret; | 6177 | return ret; |
6167 | 6178 | ||
6168 | err_uninit: | 6179 | err_uninit: |
6169 | if (dev->netdev_ops->ndo_uninit) | 6180 | if (dev->netdev_ops->ndo_uninit) |
6170 | dev->netdev_ops->ndo_uninit(dev); | 6181 | dev->netdev_ops->ndo_uninit(dev); |
6171 | goto out; | 6182 | goto out; |
6172 | } | 6183 | } |
6173 | EXPORT_SYMBOL(register_netdevice); | 6184 | EXPORT_SYMBOL(register_netdevice); |
6174 | 6185 | ||
6175 | /** | 6186 | /** |
6176 | * init_dummy_netdev - init a dummy network device for NAPI | 6187 | * init_dummy_netdev - init a dummy network device for NAPI |
6177 | * @dev: device to init | 6188 | * @dev: device to init |
6178 | * | 6189 | * |
6179 | * This takes a network device structure and initialize the minimum | 6190 | * This takes a network device structure and initialize the minimum |
6180 | * amount of fields so it can be used to schedule NAPI polls without | 6191 | * amount of fields so it can be used to schedule NAPI polls without |
6181 | * registering a full blown interface. This is to be used by drivers | 6192 | * registering a full blown interface. This is to be used by drivers |
6182 | * that need to tie several hardware interfaces to a single NAPI | 6193 | * that need to tie several hardware interfaces to a single NAPI |
6183 | * poll scheduler due to HW limitations. | 6194 | * poll scheduler due to HW limitations. |
6184 | */ | 6195 | */ |
6185 | int init_dummy_netdev(struct net_device *dev) | 6196 | int init_dummy_netdev(struct net_device *dev) |
6186 | { | 6197 | { |
6187 | /* Clear everything. Note we don't initialize spinlocks | 6198 | /* Clear everything. Note we don't initialize spinlocks |
6188 | * are they aren't supposed to be taken by any of the | 6199 | * are they aren't supposed to be taken by any of the |
6189 | * NAPI code and this dummy netdev is supposed to be | 6200 | * NAPI code and this dummy netdev is supposed to be |
6190 | * only ever used for NAPI polls | 6201 | * only ever used for NAPI polls |
6191 | */ | 6202 | */ |
6192 | memset(dev, 0, sizeof(struct net_device)); | 6203 | memset(dev, 0, sizeof(struct net_device)); |
6193 | 6204 | ||
6194 | /* make sure we BUG if trying to hit standard | 6205 | /* make sure we BUG if trying to hit standard |
6195 | * register/unregister code path | 6206 | * register/unregister code path |
6196 | */ | 6207 | */ |
6197 | dev->reg_state = NETREG_DUMMY; | 6208 | dev->reg_state = NETREG_DUMMY; |
6198 | 6209 | ||
6199 | /* NAPI wants this */ | 6210 | /* NAPI wants this */ |
6200 | INIT_LIST_HEAD(&dev->napi_list); | 6211 | INIT_LIST_HEAD(&dev->napi_list); |
6201 | 6212 | ||
6202 | /* a dummy interface is started by default */ | 6213 | /* a dummy interface is started by default */ |
6203 | set_bit(__LINK_STATE_PRESENT, &dev->state); | 6214 | set_bit(__LINK_STATE_PRESENT, &dev->state); |
6204 | set_bit(__LINK_STATE_START, &dev->state); | 6215 | set_bit(__LINK_STATE_START, &dev->state); |
6205 | 6216 | ||
6206 | /* Note : We dont allocate pcpu_refcnt for dummy devices, | 6217 | /* Note : We dont allocate pcpu_refcnt for dummy devices, |
6207 | * because users of this 'device' dont need to change | 6218 | * because users of this 'device' dont need to change |
6208 | * its refcount. | 6219 | * its refcount. |
6209 | */ | 6220 | */ |
6210 | 6221 | ||
6211 | return 0; | 6222 | return 0; |
6212 | } | 6223 | } |
6213 | EXPORT_SYMBOL_GPL(init_dummy_netdev); | 6224 | EXPORT_SYMBOL_GPL(init_dummy_netdev); |
6214 | 6225 | ||
6215 | 6226 | ||
6216 | /** | 6227 | /** |
6217 | * register_netdev - register a network device | 6228 | * register_netdev - register a network device |
6218 | * @dev: device to register | 6229 | * @dev: device to register |
6219 | * | 6230 | * |
6220 | * Take a completed network device structure and add it to the kernel | 6231 | * Take a completed network device structure and add it to the kernel |
6221 | * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier | 6232 | * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier |
6222 | * chain. 0 is returned on success. A negative errno code is returned | 6233 | * chain. 0 is returned on success. A negative errno code is returned |
6223 | * on a failure to set up the device, or if the name is a duplicate. | 6234 | * on a failure to set up the device, or if the name is a duplicate. |
6224 | * | 6235 | * |
6225 | * This is a wrapper around register_netdevice that takes the rtnl semaphore | 6236 | * This is a wrapper around register_netdevice that takes the rtnl semaphore |
6226 | * and expands the device name if you passed a format string to | 6237 | * and expands the device name if you passed a format string to |
6227 | * alloc_netdev. | 6238 | * alloc_netdev. |
6228 | */ | 6239 | */ |
6229 | int register_netdev(struct net_device *dev) | 6240 | int register_netdev(struct net_device *dev) |
6230 | { | 6241 | { |
6231 | int err; | 6242 | int err; |
6232 | 6243 | ||
6233 | rtnl_lock(); | 6244 | rtnl_lock(); |
6234 | err = register_netdevice(dev); | 6245 | err = register_netdevice(dev); |
6235 | rtnl_unlock(); | 6246 | rtnl_unlock(); |
6236 | return err; | 6247 | return err; |
6237 | } | 6248 | } |
6238 | EXPORT_SYMBOL(register_netdev); | 6249 | EXPORT_SYMBOL(register_netdev); |
6239 | 6250 | ||
6240 | int netdev_refcnt_read(const struct net_device *dev) | 6251 | int netdev_refcnt_read(const struct net_device *dev) |
6241 | { | 6252 | { |
6242 | int i, refcnt = 0; | 6253 | int i, refcnt = 0; |
6243 | 6254 | ||
6244 | for_each_possible_cpu(i) | 6255 | for_each_possible_cpu(i) |
6245 | refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i); | 6256 | refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i); |
6246 | return refcnt; | 6257 | return refcnt; |
6247 | } | 6258 | } |
6248 | EXPORT_SYMBOL(netdev_refcnt_read); | 6259 | EXPORT_SYMBOL(netdev_refcnt_read); |
6249 | 6260 | ||
6250 | /** | 6261 | /** |
6251 | * netdev_wait_allrefs - wait until all references are gone. | 6262 | * netdev_wait_allrefs - wait until all references are gone. |
6252 | * @dev: target net_device | 6263 | * @dev: target net_device |
6253 | * | 6264 | * |
6254 | * This is called when unregistering network devices. | 6265 | * This is called when unregistering network devices. |
6255 | * | 6266 | * |
6256 | * Any protocol or device that holds a reference should register | 6267 | * Any protocol or device that holds a reference should register |
6257 | * for netdevice notification, and cleanup and put back the | 6268 | * for netdevice notification, and cleanup and put back the |
6258 | * reference if they receive an UNREGISTER event. | 6269 | * reference if they receive an UNREGISTER event. |
6259 | * We can get stuck here if buggy protocols don't correctly | 6270 | * We can get stuck here if buggy protocols don't correctly |
6260 | * call dev_put. | 6271 | * call dev_put. |
6261 | */ | 6272 | */ |
6262 | static void netdev_wait_allrefs(struct net_device *dev) | 6273 | static void netdev_wait_allrefs(struct net_device *dev) |
6263 | { | 6274 | { |
6264 | unsigned long rebroadcast_time, warning_time; | 6275 | unsigned long rebroadcast_time, warning_time; |
6265 | int refcnt; | 6276 | int refcnt; |
6266 | 6277 | ||
6267 | linkwatch_forget_dev(dev); | 6278 | linkwatch_forget_dev(dev); |
6268 | 6279 | ||
6269 | rebroadcast_time = warning_time = jiffies; | 6280 | rebroadcast_time = warning_time = jiffies; |
6270 | refcnt = netdev_refcnt_read(dev); | 6281 | refcnt = netdev_refcnt_read(dev); |
6271 | 6282 | ||
6272 | while (refcnt != 0) { | 6283 | while (refcnt != 0) { |
6273 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { | 6284 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { |
6274 | rtnl_lock(); | 6285 | rtnl_lock(); |
6275 | 6286 | ||
6276 | /* Rebroadcast unregister notification */ | 6287 | /* Rebroadcast unregister notification */ |
6277 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 6288 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
6278 | 6289 | ||
6279 | __rtnl_unlock(); | 6290 | __rtnl_unlock(); |
6280 | rcu_barrier(); | 6291 | rcu_barrier(); |
6281 | rtnl_lock(); | 6292 | rtnl_lock(); |
6282 | 6293 | ||
6283 | call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); | 6294 | call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); |
6284 | if (test_bit(__LINK_STATE_LINKWATCH_PENDING, | 6295 | if (test_bit(__LINK_STATE_LINKWATCH_PENDING, |
6285 | &dev->state)) { | 6296 | &dev->state)) { |
6286 | /* We must not have linkwatch events | 6297 | /* We must not have linkwatch events |
6287 | * pending on unregister. If this | 6298 | * pending on unregister. If this |
6288 | * happens, we simply run the queue | 6299 | * happens, we simply run the queue |
6289 | * unscheduled, resulting in a noop | 6300 | * unscheduled, resulting in a noop |
6290 | * for this device. | 6301 | * for this device. |
6291 | */ | 6302 | */ |
6292 | linkwatch_run_queue(); | 6303 | linkwatch_run_queue(); |
6293 | } | 6304 | } |
6294 | 6305 | ||
6295 | __rtnl_unlock(); | 6306 | __rtnl_unlock(); |
6296 | 6307 | ||
6297 | rebroadcast_time = jiffies; | 6308 | rebroadcast_time = jiffies; |
6298 | } | 6309 | } |
6299 | 6310 | ||
6300 | msleep(250); | 6311 | msleep(250); |
6301 | 6312 | ||
6302 | refcnt = netdev_refcnt_read(dev); | 6313 | refcnt = netdev_refcnt_read(dev); |
6303 | 6314 | ||
6304 | if (time_after(jiffies, warning_time + 10 * HZ)) { | 6315 | if (time_after(jiffies, warning_time + 10 * HZ)) { |
6305 | pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", | 6316 | pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", |
6306 | dev->name, refcnt); | 6317 | dev->name, refcnt); |
6307 | warning_time = jiffies; | 6318 | warning_time = jiffies; |
6308 | } | 6319 | } |
6309 | } | 6320 | } |
6310 | } | 6321 | } |
6311 | 6322 | ||
6312 | /* The sequence is: | 6323 | /* The sequence is: |
6313 | * | 6324 | * |
6314 | * rtnl_lock(); | 6325 | * rtnl_lock(); |
6315 | * ... | 6326 | * ... |
6316 | * register_netdevice(x1); | 6327 | * register_netdevice(x1); |
6317 | * register_netdevice(x2); | 6328 | * register_netdevice(x2); |
6318 | * ... | 6329 | * ... |
6319 | * unregister_netdevice(y1); | 6330 | * unregister_netdevice(y1); |
6320 | * unregister_netdevice(y2); | 6331 | * unregister_netdevice(y2); |
6321 | * ... | 6332 | * ... |
6322 | * rtnl_unlock(); | 6333 | * rtnl_unlock(); |
6323 | * free_netdev(y1); | 6334 | * free_netdev(y1); |
6324 | * free_netdev(y2); | 6335 | * free_netdev(y2); |
6325 | * | 6336 | * |
6326 | * We are invoked by rtnl_unlock(). | 6337 | * We are invoked by rtnl_unlock(). |
6327 | * This allows us to deal with problems: | 6338 | * This allows us to deal with problems: |
6328 | * 1) We can delete sysfs objects which invoke hotplug | 6339 | * 1) We can delete sysfs objects which invoke hotplug |
6329 | * without deadlocking with linkwatch via keventd. | 6340 | * without deadlocking with linkwatch via keventd. |
6330 | * 2) Since we run with the RTNL semaphore not held, we can sleep | 6341 | * 2) Since we run with the RTNL semaphore not held, we can sleep |
6331 | * safely in order to wait for the netdev refcnt to drop to zero. | 6342 | * safely in order to wait for the netdev refcnt to drop to zero. |
6332 | * | 6343 | * |
6333 | * We must not return until all unregister events added during | 6344 | * We must not return until all unregister events added during |
6334 | * the interval the lock was held have been completed. | 6345 | * the interval the lock was held have been completed. |
6335 | */ | 6346 | */ |
6336 | void netdev_run_todo(void) | 6347 | void netdev_run_todo(void) |
6337 | { | 6348 | { |
6338 | struct list_head list; | 6349 | struct list_head list; |
6339 | 6350 | ||
6340 | /* Snapshot list, allow later requests */ | 6351 | /* Snapshot list, allow later requests */ |
6341 | list_replace_init(&net_todo_list, &list); | 6352 | list_replace_init(&net_todo_list, &list); |
6342 | 6353 | ||
6343 | __rtnl_unlock(); | 6354 | __rtnl_unlock(); |
6344 | 6355 | ||
6345 | 6356 | ||
6346 | /* Wait for rcu callbacks to finish before next phase */ | 6357 | /* Wait for rcu callbacks to finish before next phase */ |
6347 | if (!list_empty(&list)) | 6358 | if (!list_empty(&list)) |
6348 | rcu_barrier(); | 6359 | rcu_barrier(); |
6349 | 6360 | ||
6350 | while (!list_empty(&list)) { | 6361 | while (!list_empty(&list)) { |
6351 | struct net_device *dev | 6362 | struct net_device *dev |
6352 | = list_first_entry(&list, struct net_device, todo_list); | 6363 | = list_first_entry(&list, struct net_device, todo_list); |
6353 | list_del(&dev->todo_list); | 6364 | list_del(&dev->todo_list); |
6354 | 6365 | ||
6355 | rtnl_lock(); | 6366 | rtnl_lock(); |
6356 | call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); | 6367 | call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); |
6357 | __rtnl_unlock(); | 6368 | __rtnl_unlock(); |
6358 | 6369 | ||
6359 | if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { | 6370 | if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { |
6360 | pr_err("network todo '%s' but state %d\n", | 6371 | pr_err("network todo '%s' but state %d\n", |
6361 | dev->name, dev->reg_state); | 6372 | dev->name, dev->reg_state); |
6362 | dump_stack(); | 6373 | dump_stack(); |
6363 | continue; | 6374 | continue; |
6364 | } | 6375 | } |
6365 | 6376 | ||
6366 | dev->reg_state = NETREG_UNREGISTERED; | 6377 | dev->reg_state = NETREG_UNREGISTERED; |
6367 | 6378 | ||
6368 | on_each_cpu(flush_backlog, dev, 1); | 6379 | on_each_cpu(flush_backlog, dev, 1); |
6369 | 6380 | ||
6370 | netdev_wait_allrefs(dev); | 6381 | netdev_wait_allrefs(dev); |
6371 | 6382 | ||
6372 | /* paranoia */ | 6383 | /* paranoia */ |
6373 | BUG_ON(netdev_refcnt_read(dev)); | 6384 | BUG_ON(netdev_refcnt_read(dev)); |
6374 | WARN_ON(rcu_access_pointer(dev->ip_ptr)); | 6385 | WARN_ON(rcu_access_pointer(dev->ip_ptr)); |
6375 | WARN_ON(rcu_access_pointer(dev->ip6_ptr)); | 6386 | WARN_ON(rcu_access_pointer(dev->ip6_ptr)); |
6376 | WARN_ON(dev->dn_ptr); | 6387 | WARN_ON(dev->dn_ptr); |
6377 | 6388 | ||
6378 | if (dev->destructor) | 6389 | if (dev->destructor) |
6379 | dev->destructor(dev); | 6390 | dev->destructor(dev); |
6380 | 6391 | ||
6381 | /* Free network device */ | 6392 | /* Free network device */ |
6382 | kobject_put(&dev->dev.kobj); | 6393 | kobject_put(&dev->dev.kobj); |
6383 | } | 6394 | } |
6384 | } | 6395 | } |
6385 | 6396 | ||
6386 | /* Convert net_device_stats to rtnl_link_stats64. They have the same | 6397 | /* Convert net_device_stats to rtnl_link_stats64. They have the same |
6387 | * fields in the same order, with only the type differing. | 6398 | * fields in the same order, with only the type differing. |
6388 | */ | 6399 | */ |
6389 | void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, | 6400 | void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, |
6390 | const struct net_device_stats *netdev_stats) | 6401 | const struct net_device_stats *netdev_stats) |
6391 | { | 6402 | { |
6392 | #if BITS_PER_LONG == 64 | 6403 | #if BITS_PER_LONG == 64 |
6393 | BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); | 6404 | BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); |
6394 | memcpy(stats64, netdev_stats, sizeof(*stats64)); | 6405 | memcpy(stats64, netdev_stats, sizeof(*stats64)); |
6395 | #else | 6406 | #else |
6396 | size_t i, n = sizeof(*stats64) / sizeof(u64); | 6407 | size_t i, n = sizeof(*stats64) / sizeof(u64); |
6397 | const unsigned long *src = (const unsigned long *)netdev_stats; | 6408 | const unsigned long *src = (const unsigned long *)netdev_stats; |
6398 | u64 *dst = (u64 *)stats64; | 6409 | u64 *dst = (u64 *)stats64; |
6399 | 6410 | ||
6400 | BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) != | 6411 | BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) != |
6401 | sizeof(*stats64) / sizeof(u64)); | 6412 | sizeof(*stats64) / sizeof(u64)); |
6402 | for (i = 0; i < n; i++) | 6413 | for (i = 0; i < n; i++) |
6403 | dst[i] = src[i]; | 6414 | dst[i] = src[i]; |
6404 | #endif | 6415 | #endif |
6405 | } | 6416 | } |
6406 | EXPORT_SYMBOL(netdev_stats_to_stats64); | 6417 | EXPORT_SYMBOL(netdev_stats_to_stats64); |
6407 | 6418 | ||
6408 | /** | 6419 | /** |
6409 | * dev_get_stats - get network device statistics | 6420 | * dev_get_stats - get network device statistics |
6410 | * @dev: device to get statistics from | 6421 | * @dev: device to get statistics from |
6411 | * @storage: place to store stats | 6422 | * @storage: place to store stats |
6412 | * | 6423 | * |
6413 | * Get network statistics from device. Return @storage. | 6424 | * Get network statistics from device. Return @storage. |
6414 | * The device driver may provide its own method by setting | 6425 | * The device driver may provide its own method by setting |
6415 | * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats; | 6426 | * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats; |
6416 | * otherwise the internal statistics structure is used. | 6427 | * otherwise the internal statistics structure is used. |
6417 | */ | 6428 | */ |
6418 | struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, | 6429 | struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, |
6419 | struct rtnl_link_stats64 *storage) | 6430 | struct rtnl_link_stats64 *storage) |
6420 | { | 6431 | { |
6421 | const struct net_device_ops *ops = dev->netdev_ops; | 6432 | const struct net_device_ops *ops = dev->netdev_ops; |
6422 | 6433 | ||
6423 | if (ops->ndo_get_stats64) { | 6434 | if (ops->ndo_get_stats64) { |
6424 | memset(storage, 0, sizeof(*storage)); | 6435 | memset(storage, 0, sizeof(*storage)); |
6425 | ops->ndo_get_stats64(dev, storage); | 6436 | ops->ndo_get_stats64(dev, storage); |
6426 | } else if (ops->ndo_get_stats) { | 6437 | } else if (ops->ndo_get_stats) { |
6427 | netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); | 6438 | netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); |
6428 | } else { | 6439 | } else { |
6429 | netdev_stats_to_stats64(storage, &dev->stats); | 6440 | netdev_stats_to_stats64(storage, &dev->stats); |
6430 | } | 6441 | } |
6431 | storage->rx_dropped += atomic_long_read(&dev->rx_dropped); | 6442 | storage->rx_dropped += atomic_long_read(&dev->rx_dropped); |
6432 | return storage; | 6443 | return storage; |
6433 | } | 6444 | } |
6434 | EXPORT_SYMBOL(dev_get_stats); | 6445 | EXPORT_SYMBOL(dev_get_stats); |
6435 | 6446 | ||
6436 | struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) | 6447 | struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) |
6437 | { | 6448 | { |
6438 | struct netdev_queue *queue = dev_ingress_queue(dev); | 6449 | struct netdev_queue *queue = dev_ingress_queue(dev); |
6439 | 6450 | ||
6440 | #ifdef CONFIG_NET_CLS_ACT | 6451 | #ifdef CONFIG_NET_CLS_ACT |
6441 | if (queue) | 6452 | if (queue) |
6442 | return queue; | 6453 | return queue; |
6443 | queue = kzalloc(sizeof(*queue), GFP_KERNEL); | 6454 | queue = kzalloc(sizeof(*queue), GFP_KERNEL); |
6444 | if (!queue) | 6455 | if (!queue) |
6445 | return NULL; | 6456 | return NULL; |
6446 | netdev_init_one_queue(dev, queue, NULL); | 6457 | netdev_init_one_queue(dev, queue, NULL); |
6447 | queue->qdisc = &noop_qdisc; | 6458 | queue->qdisc = &noop_qdisc; |
6448 | queue->qdisc_sleeping = &noop_qdisc; | 6459 | queue->qdisc_sleeping = &noop_qdisc; |
6449 | rcu_assign_pointer(dev->ingress_queue, queue); | 6460 | rcu_assign_pointer(dev->ingress_queue, queue); |
6450 | #endif | 6461 | #endif |
6451 | return queue; | 6462 | return queue; |
6452 | } | 6463 | } |
6453 | 6464 | ||
6454 | static const struct ethtool_ops default_ethtool_ops; | 6465 | static const struct ethtool_ops default_ethtool_ops; |
6455 | 6466 | ||
6456 | void netdev_set_default_ethtool_ops(struct net_device *dev, | 6467 | void netdev_set_default_ethtool_ops(struct net_device *dev, |
6457 | const struct ethtool_ops *ops) | 6468 | const struct ethtool_ops *ops) |
6458 | { | 6469 | { |
6459 | if (dev->ethtool_ops == &default_ethtool_ops) | 6470 | if (dev->ethtool_ops == &default_ethtool_ops) |
6460 | dev->ethtool_ops = ops; | 6471 | dev->ethtool_ops = ops; |
6461 | } | 6472 | } |
6462 | EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); | 6473 | EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); |
6463 | 6474 | ||
6464 | /** | 6475 | /** |
6465 | * alloc_netdev_mqs - allocate network device | 6476 | * alloc_netdev_mqs - allocate network device |
6466 | * @sizeof_priv: size of private data to allocate space for | 6477 | * @sizeof_priv: size of private data to allocate space for |
6467 | * @name: device name format string | 6478 | * @name: device name format string |
6468 | * @setup: callback to initialize device | 6479 | * @setup: callback to initialize device |
6469 | * @txqs: the number of TX subqueues to allocate | 6480 | * @txqs: the number of TX subqueues to allocate |
6470 | * @rxqs: the number of RX subqueues to allocate | 6481 | * @rxqs: the number of RX subqueues to allocate |
6471 | * | 6482 | * |
6472 | * Allocates a struct net_device with private data area for driver use | 6483 | * Allocates a struct net_device with private data area for driver use |
6473 | * and performs basic initialization. Also allocates subquue structs | 6484 | * and performs basic initialization. Also allocates subquue structs |
6474 | * for each queue on the device. | 6485 | * for each queue on the device. |
6475 | */ | 6486 | */ |
6476 | struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | 6487 | struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, |
6477 | void (*setup)(struct net_device *), | 6488 | void (*setup)(struct net_device *), |
6478 | unsigned int txqs, unsigned int rxqs) | 6489 | unsigned int txqs, unsigned int rxqs) |
6479 | { | 6490 | { |
6480 | struct net_device *dev; | 6491 | struct net_device *dev; |
6481 | size_t alloc_size; | 6492 | size_t alloc_size; |
6482 | struct net_device *p; | 6493 | struct net_device *p; |
6483 | 6494 | ||
6484 | BUG_ON(strlen(name) >= sizeof(dev->name)); | 6495 | BUG_ON(strlen(name) >= sizeof(dev->name)); |
6485 | 6496 | ||
6486 | if (txqs < 1) { | 6497 | if (txqs < 1) { |
6487 | pr_err("alloc_netdev: Unable to allocate device with zero queues\n"); | 6498 | pr_err("alloc_netdev: Unable to allocate device with zero queues\n"); |
6488 | return NULL; | 6499 | return NULL; |
6489 | } | 6500 | } |
6490 | 6501 | ||
6491 | #ifdef CONFIG_RPS | 6502 | #ifdef CONFIG_RPS |
6492 | if (rxqs < 1) { | 6503 | if (rxqs < 1) { |
6493 | pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); | 6504 | pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); |
6494 | return NULL; | 6505 | return NULL; |
6495 | } | 6506 | } |
6496 | #endif | 6507 | #endif |
6497 | 6508 | ||
6498 | alloc_size = sizeof(struct net_device); | 6509 | alloc_size = sizeof(struct net_device); |
6499 | if (sizeof_priv) { | 6510 | if (sizeof_priv) { |
6500 | /* ensure 32-byte alignment of private area */ | 6511 | /* ensure 32-byte alignment of private area */ |
6501 | alloc_size = ALIGN(alloc_size, NETDEV_ALIGN); | 6512 | alloc_size = ALIGN(alloc_size, NETDEV_ALIGN); |
6502 | alloc_size += sizeof_priv; | 6513 | alloc_size += sizeof_priv; |
6503 | } | 6514 | } |
6504 | /* ensure 32-byte alignment of whole construct */ | 6515 | /* ensure 32-byte alignment of whole construct */ |
6505 | alloc_size += NETDEV_ALIGN - 1; | 6516 | alloc_size += NETDEV_ALIGN - 1; |
6506 | 6517 | ||
6507 | p = kzalloc(alloc_size, GFP_KERNEL); | 6518 | p = kzalloc(alloc_size, GFP_KERNEL); |
6508 | if (!p) | 6519 | if (!p) |
6509 | return NULL; | 6520 | return NULL; |
6510 | 6521 | ||
6511 | dev = PTR_ALIGN(p, NETDEV_ALIGN); | 6522 | dev = PTR_ALIGN(p, NETDEV_ALIGN); |
6512 | dev->padded = (char *)dev - (char *)p; | 6523 | dev->padded = (char *)dev - (char *)p; |
6513 | 6524 | ||
6514 | dev->pcpu_refcnt = alloc_percpu(int); | 6525 | dev->pcpu_refcnt = alloc_percpu(int); |
6515 | if (!dev->pcpu_refcnt) | 6526 | if (!dev->pcpu_refcnt) |
6516 | goto free_p; | 6527 | goto free_p; |
6517 | 6528 | ||
6518 | if (dev_addr_init(dev)) | 6529 | if (dev_addr_init(dev)) |
6519 | goto free_pcpu; | 6530 | goto free_pcpu; |
6520 | 6531 | ||
6521 | dev_mc_init(dev); | 6532 | dev_mc_init(dev); |
6522 | dev_uc_init(dev); | 6533 | dev_uc_init(dev); |
6523 | 6534 | ||
6524 | dev_net_set(dev, &init_net); | 6535 | dev_net_set(dev, &init_net); |
6525 | 6536 | ||
6526 | dev->gso_max_size = GSO_MAX_SIZE; | 6537 | dev->gso_max_size = GSO_MAX_SIZE; |
6527 | dev->gso_max_segs = GSO_MAX_SEGS; | 6538 | dev->gso_max_segs = GSO_MAX_SEGS; |
6528 | 6539 | ||
6529 | INIT_LIST_HEAD(&dev->napi_list); | 6540 | INIT_LIST_HEAD(&dev->napi_list); |
6530 | INIT_LIST_HEAD(&dev->unreg_list); | 6541 | INIT_LIST_HEAD(&dev->unreg_list); |
6531 | INIT_LIST_HEAD(&dev->link_watch_list); | 6542 | INIT_LIST_HEAD(&dev->link_watch_list); |
6532 | INIT_LIST_HEAD(&dev->upper_dev_list); | 6543 | INIT_LIST_HEAD(&dev->upper_dev_list); |
6533 | dev->priv_flags = IFF_XMIT_DST_RELEASE; | 6544 | dev->priv_flags = IFF_XMIT_DST_RELEASE; |
6534 | setup(dev); | 6545 | setup(dev); |
6535 | 6546 | ||
6536 | dev->num_tx_queues = txqs; | 6547 | dev->num_tx_queues = txqs; |
6537 | dev->real_num_tx_queues = txqs; | 6548 | dev->real_num_tx_queues = txqs; |
6538 | if (netif_alloc_netdev_queues(dev)) | 6549 | if (netif_alloc_netdev_queues(dev)) |
6539 | goto free_all; | 6550 | goto free_all; |
6540 | 6551 | ||
6541 | #ifdef CONFIG_RPS | 6552 | #ifdef CONFIG_RPS |
6542 | dev->num_rx_queues = rxqs; | 6553 | dev->num_rx_queues = rxqs; |
6543 | dev->real_num_rx_queues = rxqs; | 6554 | dev->real_num_rx_queues = rxqs; |
6544 | if (netif_alloc_rx_queues(dev)) | 6555 | if (netif_alloc_rx_queues(dev)) |
6545 | goto free_all; | 6556 | goto free_all; |
6546 | #endif | 6557 | #endif |
6547 | 6558 | ||
6548 | strcpy(dev->name, name); | 6559 | strcpy(dev->name, name); |
6549 | dev->group = INIT_NETDEV_GROUP; | 6560 | dev->group = INIT_NETDEV_GROUP; |
6550 | if (!dev->ethtool_ops) | 6561 | if (!dev->ethtool_ops) |
6551 | dev->ethtool_ops = &default_ethtool_ops; | 6562 | dev->ethtool_ops = &default_ethtool_ops; |
6552 | return dev; | 6563 | return dev; |
6553 | 6564 | ||
6554 | free_all: | 6565 | free_all: |
6555 | free_netdev(dev); | 6566 | free_netdev(dev); |
6556 | return NULL; | 6567 | return NULL; |
6557 | 6568 | ||
6558 | free_pcpu: | 6569 | free_pcpu: |
6559 | free_percpu(dev->pcpu_refcnt); | 6570 | free_percpu(dev->pcpu_refcnt); |
6560 | kfree(dev->_tx); | 6571 | kfree(dev->_tx); |
6561 | #ifdef CONFIG_RPS | 6572 | #ifdef CONFIG_RPS |
6562 | kfree(dev->_rx); | 6573 | kfree(dev->_rx); |
6563 | #endif | 6574 | #endif |
6564 | 6575 | ||
6565 | free_p: | 6576 | free_p: |
6566 | kfree(p); | 6577 | kfree(p); |
6567 | return NULL; | 6578 | return NULL; |
6568 | } | 6579 | } |
6569 | EXPORT_SYMBOL(alloc_netdev_mqs); | 6580 | EXPORT_SYMBOL(alloc_netdev_mqs); |
6570 | 6581 | ||
6571 | /** | 6582 | /** |
6572 | * free_netdev - free network device | 6583 | * free_netdev - free network device |
6573 | * @dev: device | 6584 | * @dev: device |
6574 | * | 6585 | * |
6575 | * This function does the last stage of destroying an allocated device | 6586 | * This function does the last stage of destroying an allocated device |
6576 | * interface. The reference to the device object is released. | 6587 | * interface. The reference to the device object is released. |
6577 | * If this is the last reference then it will be freed. | 6588 | * If this is the last reference then it will be freed. |
6578 | */ | 6589 | */ |
6579 | void free_netdev(struct net_device *dev) | 6590 | void free_netdev(struct net_device *dev) |
6580 | { | 6591 | { |
6581 | struct napi_struct *p, *n; | 6592 | struct napi_struct *p, *n; |
6582 | 6593 | ||
6583 | release_net(dev_net(dev)); | 6594 | release_net(dev_net(dev)); |
6584 | 6595 | ||
6585 | kfree(dev->_tx); | 6596 | kfree(dev->_tx); |
6586 | #ifdef CONFIG_RPS | 6597 | #ifdef CONFIG_RPS |
6587 | kfree(dev->_rx); | 6598 | kfree(dev->_rx); |
6588 | #endif | 6599 | #endif |
6589 | 6600 | ||
6590 | kfree(rcu_dereference_protected(dev->ingress_queue, 1)); | 6601 | kfree(rcu_dereference_protected(dev->ingress_queue, 1)); |
6591 | 6602 | ||
6592 | /* Flush device addresses */ | 6603 | /* Flush device addresses */ |
6593 | dev_addr_flush(dev); | 6604 | dev_addr_flush(dev); |
6594 | 6605 | ||
6595 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) | 6606 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) |
6596 | netif_napi_del(p); | 6607 | netif_napi_del(p); |
6597 | 6608 | ||
6598 | free_percpu(dev->pcpu_refcnt); | 6609 | free_percpu(dev->pcpu_refcnt); |
6599 | dev->pcpu_refcnt = NULL; | 6610 | dev->pcpu_refcnt = NULL; |
6600 | 6611 | ||
6601 | /* Compatibility with error handling in drivers */ | 6612 | /* Compatibility with error handling in drivers */ |
6602 | if (dev->reg_state == NETREG_UNINITIALIZED) { | 6613 | if (dev->reg_state == NETREG_UNINITIALIZED) { |
6603 | kfree((char *)dev - dev->padded); | 6614 | kfree((char *)dev - dev->padded); |
6604 | return; | 6615 | return; |
6605 | } | 6616 | } |
6606 | 6617 | ||
6607 | BUG_ON(dev->reg_state != NETREG_UNREGISTERED); | 6618 | BUG_ON(dev->reg_state != NETREG_UNREGISTERED); |
6608 | dev->reg_state = NETREG_RELEASED; | 6619 | dev->reg_state = NETREG_RELEASED; |
6609 | 6620 | ||
6610 | /* will free via device release */ | 6621 | /* will free via device release */ |
6611 | put_device(&dev->dev); | 6622 | put_device(&dev->dev); |
6612 | } | 6623 | } |
6613 | EXPORT_SYMBOL(free_netdev); | 6624 | EXPORT_SYMBOL(free_netdev); |
6614 | 6625 | ||
6615 | /** | 6626 | /** |
6616 | * synchronize_net - Synchronize with packet receive processing | 6627 | * synchronize_net - Synchronize with packet receive processing |
6617 | * | 6628 | * |
6618 | * Wait for packets currently being received to be done. | 6629 | * Wait for packets currently being received to be done. |
6619 | * Does not block later packets from starting. | 6630 | * Does not block later packets from starting. |
6620 | */ | 6631 | */ |
6621 | void synchronize_net(void) | 6632 | void synchronize_net(void) |
6622 | { | 6633 | { |
6623 | might_sleep(); | 6634 | might_sleep(); |
6624 | if (rtnl_is_locked()) | 6635 | if (rtnl_is_locked()) |
6625 | synchronize_rcu_expedited(); | 6636 | synchronize_rcu_expedited(); |
6626 | else | 6637 | else |
6627 | synchronize_rcu(); | 6638 | synchronize_rcu(); |
6628 | } | 6639 | } |
6629 | EXPORT_SYMBOL(synchronize_net); | 6640 | EXPORT_SYMBOL(synchronize_net); |
6630 | 6641 | ||
6631 | /** | 6642 | /** |
6632 | * unregister_netdevice_queue - remove device from the kernel | 6643 | * unregister_netdevice_queue - remove device from the kernel |
6633 | * @dev: device | 6644 | * @dev: device |
6634 | * @head: list | 6645 | * @head: list |
6635 | * | 6646 | * |
6636 | * This function shuts down a device interface and removes it | 6647 | * This function shuts down a device interface and removes it |
6637 | * from the kernel tables. | 6648 | * from the kernel tables. |
6638 | * If head not NULL, device is queued to be unregistered later. | 6649 | * If head not NULL, device is queued to be unregistered later. |
6639 | * | 6650 | * |
6640 | * Callers must hold the rtnl semaphore. You may want | 6651 | * Callers must hold the rtnl semaphore. You may want |
6641 | * unregister_netdev() instead of this. | 6652 | * unregister_netdev() instead of this. |
6642 | */ | 6653 | */ |
6643 | 6654 | ||
6644 | void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) | 6655 | void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) |
6645 | { | 6656 | { |
6646 | ASSERT_RTNL(); | 6657 | ASSERT_RTNL(); |
6647 | 6658 | ||
6648 | if (head) { | 6659 | if (head) { |
6649 | list_move_tail(&dev->unreg_list, head); | 6660 | list_move_tail(&dev->unreg_list, head); |
6650 | } else { | 6661 | } else { |
6651 | rollback_registered(dev); | 6662 | rollback_registered(dev); |
6652 | /* Finish processing unregister after unlock */ | 6663 | /* Finish processing unregister after unlock */ |
6653 | net_set_todo(dev); | 6664 | net_set_todo(dev); |
6654 | } | 6665 | } |
6655 | } | 6666 | } |
6656 | EXPORT_SYMBOL(unregister_netdevice_queue); | 6667 | EXPORT_SYMBOL(unregister_netdevice_queue); |
6657 | 6668 | ||
6658 | /** | 6669 | /** |
6659 | * unregister_netdevice_many - unregister many devices | 6670 | * unregister_netdevice_many - unregister many devices |
6660 | * @head: list of devices | 6671 | * @head: list of devices |
6661 | */ | 6672 | */ |
6662 | void unregister_netdevice_many(struct list_head *head) | 6673 | void unregister_netdevice_many(struct list_head *head) |
6663 | { | 6674 | { |
6664 | struct net_device *dev; | 6675 | struct net_device *dev; |
6665 | 6676 | ||
6666 | if (!list_empty(head)) { | 6677 | if (!list_empty(head)) { |
6667 | rollback_registered_many(head); | 6678 | rollback_registered_many(head); |
6668 | list_for_each_entry(dev, head, unreg_list) | 6679 | list_for_each_entry(dev, head, unreg_list) |
6669 | net_set_todo(dev); | 6680 | net_set_todo(dev); |
6670 | } | 6681 | } |
6671 | } | 6682 | } |
6672 | EXPORT_SYMBOL(unregister_netdevice_many); | 6683 | EXPORT_SYMBOL(unregister_netdevice_many); |
6673 | 6684 | ||
6674 | /** | 6685 | /** |
6675 | * unregister_netdev - remove device from the kernel | 6686 | * unregister_netdev - remove device from the kernel |
6676 | * @dev: device | 6687 | * @dev: device |
6677 | * | 6688 | * |
6678 | * This function shuts down a device interface and removes it | 6689 | * This function shuts down a device interface and removes it |
6679 | * from the kernel tables. | 6690 | * from the kernel tables. |
6680 | * | 6691 | * |
6681 | * This is just a wrapper for unregister_netdevice that takes | 6692 | * This is just a wrapper for unregister_netdevice that takes |
6682 | * the rtnl semaphore. In general you want to use this and not | 6693 | * the rtnl semaphore. In general you want to use this and not |
6683 | * unregister_netdevice. | 6694 | * unregister_netdevice. |
6684 | */ | 6695 | */ |
6685 | void unregister_netdev(struct net_device *dev) | 6696 | void unregister_netdev(struct net_device *dev) |
6686 | { | 6697 | { |
6687 | rtnl_lock(); | 6698 | rtnl_lock(); |
6688 | unregister_netdevice(dev); | 6699 | unregister_netdevice(dev); |
6689 | rtnl_unlock(); | 6700 | rtnl_unlock(); |
6690 | } | 6701 | } |
6691 | EXPORT_SYMBOL(unregister_netdev); | 6702 | EXPORT_SYMBOL(unregister_netdev); |
6692 | 6703 | ||
6693 | /** | 6704 | /** |
6694 | * dev_change_net_namespace - move device to different nethost namespace | 6705 | * dev_change_net_namespace - move device to different nethost namespace |
6695 | * @dev: device | 6706 | * @dev: device |
6696 | * @net: network namespace | 6707 | * @net: network namespace |
6697 | * @pat: If not NULL name pattern to try if the current device name | 6708 | * @pat: If not NULL name pattern to try if the current device name |
6698 | * is already taken in the destination network namespace. | 6709 | * is already taken in the destination network namespace. |
6699 | * | 6710 | * |
6700 | * This function shuts down a device interface and moves it | 6711 | * This function shuts down a device interface and moves it |
6701 | * to a new network namespace. On success 0 is returned, on | 6712 | * to a new network namespace. On success 0 is returned, on |
6702 | * a failure a netagive errno code is returned. | 6713 | * a failure a netagive errno code is returned. |
6703 | * | 6714 | * |
6704 | * Callers must hold the rtnl semaphore. | 6715 | * Callers must hold the rtnl semaphore. |
6705 | */ | 6716 | */ |
6706 | 6717 | ||
6707 | int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) | 6718 | int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) |
6708 | { | 6719 | { |
6709 | int err; | 6720 | int err; |
6710 | 6721 | ||
6711 | ASSERT_RTNL(); | 6722 | ASSERT_RTNL(); |
6712 | 6723 | ||
6713 | /* Don't allow namespace local devices to be moved. */ | 6724 | /* Don't allow namespace local devices to be moved. */ |
6714 | err = -EINVAL; | 6725 | err = -EINVAL; |
6715 | if (dev->features & NETIF_F_NETNS_LOCAL) | 6726 | if (dev->features & NETIF_F_NETNS_LOCAL) |
6716 | goto out; | 6727 | goto out; |
6717 | 6728 | ||
6718 | /* Ensure the device has been registrered */ | 6729 | /* Ensure the device has been registrered */ |
6719 | if (dev->reg_state != NETREG_REGISTERED) | 6730 | if (dev->reg_state != NETREG_REGISTERED) |
6720 | goto out; | 6731 | goto out; |
6721 | 6732 | ||
6722 | /* Get out if there is nothing todo */ | 6733 | /* Get out if there is nothing todo */ |
6723 | err = 0; | 6734 | err = 0; |
6724 | if (net_eq(dev_net(dev), net)) | 6735 | if (net_eq(dev_net(dev), net)) |
6725 | goto out; | 6736 | goto out; |
6726 | 6737 | ||
6727 | /* Pick the destination device name, and ensure | 6738 | /* Pick the destination device name, and ensure |
6728 | * we can use it in the destination network namespace. | 6739 | * we can use it in the destination network namespace. |
6729 | */ | 6740 | */ |
6730 | err = -EEXIST; | 6741 | err = -EEXIST; |
6731 | if (__dev_get_by_name(net, dev->name)) { | 6742 | if (__dev_get_by_name(net, dev->name)) { |
6732 | /* We get here if we can't use the current device name */ | 6743 | /* We get here if we can't use the current device name */ |
6733 | if (!pat) | 6744 | if (!pat) |
6734 | goto out; | 6745 | goto out; |
6735 | if (dev_get_valid_name(net, dev, pat) < 0) | 6746 | if (dev_get_valid_name(net, dev, pat) < 0) |
6736 | goto out; | 6747 | goto out; |
6737 | } | 6748 | } |
6738 | 6749 | ||
6739 | /* | 6750 | /* |
6740 | * And now a mini version of register_netdevice unregister_netdevice. | 6751 | * And now a mini version of register_netdevice unregister_netdevice. |
6741 | */ | 6752 | */ |
6742 | 6753 | ||
6743 | /* If device is running close it first. */ | 6754 | /* If device is running close it first. */ |
6744 | dev_close(dev); | 6755 | dev_close(dev); |
6745 | 6756 | ||
6746 | /* And unlink it from device chain */ | 6757 | /* And unlink it from device chain */ |
6747 | err = -ENODEV; | 6758 | err = -ENODEV; |
6748 | unlist_netdevice(dev); | 6759 | unlist_netdevice(dev); |
6749 | 6760 | ||
6750 | synchronize_net(); | 6761 | synchronize_net(); |
6751 | 6762 | ||
6752 | /* Shutdown queueing discipline. */ | 6763 | /* Shutdown queueing discipline. */ |
6753 | dev_shutdown(dev); | 6764 | dev_shutdown(dev); |
6754 | 6765 | ||
6755 | /* Notify protocols, that we are about to destroy | 6766 | /* Notify protocols, that we are about to destroy |
6756 | this device. They should clean all the things. | 6767 | this device. They should clean all the things. |
6757 | 6768 | ||
6758 | Note that dev->reg_state stays at NETREG_REGISTERED. | 6769 | Note that dev->reg_state stays at NETREG_REGISTERED. |
6759 | This is wanted because this way 8021q and macvlan know | 6770 | This is wanted because this way 8021q and macvlan know |
6760 | the device is just moving and can keep their slaves up. | 6771 | the device is just moving and can keep their slaves up. |
6761 | */ | 6772 | */ |
6762 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 6773 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
6763 | rcu_barrier(); | 6774 | rcu_barrier(); |
6764 | call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); | 6775 | call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); |
6765 | rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); | 6776 | rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); |
6766 | 6777 | ||
6767 | /* | 6778 | /* |
6768 | * Flush the unicast and multicast chains | 6779 | * Flush the unicast and multicast chains |
6769 | */ | 6780 | */ |
6770 | dev_uc_flush(dev); | 6781 | dev_uc_flush(dev); |
6771 | dev_mc_flush(dev); | 6782 | dev_mc_flush(dev); |
6772 | 6783 | ||
6773 | /* Send a netdev-removed uevent to the old namespace */ | 6784 | /* Send a netdev-removed uevent to the old namespace */ |
6774 | kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); | 6785 | kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); |
6775 | 6786 | ||
6776 | /* Actually switch the network namespace */ | 6787 | /* Actually switch the network namespace */ |
6777 | dev_net_set(dev, net); | 6788 | dev_net_set(dev, net); |
6778 | 6789 | ||
6779 | /* If there is an ifindex conflict assign a new one */ | 6790 | /* If there is an ifindex conflict assign a new one */ |
6780 | if (__dev_get_by_index(net, dev->ifindex)) { | 6791 | if (__dev_get_by_index(net, dev->ifindex)) { |
6781 | int iflink = (dev->iflink == dev->ifindex); | 6792 | int iflink = (dev->iflink == dev->ifindex); |
6782 | dev->ifindex = dev_new_index(net); | 6793 | dev->ifindex = dev_new_index(net); |
6783 | if (iflink) | 6794 | if (iflink) |
6784 | dev->iflink = dev->ifindex; | 6795 | dev->iflink = dev->ifindex; |
6785 | } | 6796 | } |
6786 | 6797 | ||
6787 | /* Send a netdev-add uevent to the new namespace */ | 6798 | /* Send a netdev-add uevent to the new namespace */ |
6788 | kobject_uevent(&dev->dev.kobj, KOBJ_ADD); | 6799 | kobject_uevent(&dev->dev.kobj, KOBJ_ADD); |
6789 | 6800 | ||
6790 | /* Fixup kobjects */ | 6801 | /* Fixup kobjects */ |
6791 | err = device_rename(&dev->dev, dev->name); | 6802 | err = device_rename(&dev->dev, dev->name); |
6792 | WARN_ON(err); | 6803 | WARN_ON(err); |
6793 | 6804 | ||
6794 | /* Add the device back in the hashes */ | 6805 | /* Add the device back in the hashes */ |
6795 | list_netdevice(dev); | 6806 | list_netdevice(dev); |
6796 | 6807 | ||
6797 | /* Notify protocols, that a new device appeared. */ | 6808 | /* Notify protocols, that a new device appeared. */ |
6798 | call_netdevice_notifiers(NETDEV_REGISTER, dev); | 6809 | call_netdevice_notifiers(NETDEV_REGISTER, dev); |
6799 | 6810 | ||
6800 | /* | 6811 | /* |
6801 | * Prevent userspace races by waiting until the network | 6812 | * Prevent userspace races by waiting until the network |
6802 | * device is fully setup before sending notifications. | 6813 | * device is fully setup before sending notifications. |
6803 | */ | 6814 | */ |
6804 | rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); | 6815 | rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); |
6805 | 6816 | ||
6806 | synchronize_net(); | 6817 | synchronize_net(); |
6807 | err = 0; | 6818 | err = 0; |
6808 | out: | 6819 | out: |
6809 | return err; | 6820 | return err; |
6810 | } | 6821 | } |
6811 | EXPORT_SYMBOL_GPL(dev_change_net_namespace); | 6822 | EXPORT_SYMBOL_GPL(dev_change_net_namespace); |
6812 | 6823 | ||
6813 | static int dev_cpu_callback(struct notifier_block *nfb, | 6824 | static int dev_cpu_callback(struct notifier_block *nfb, |
6814 | unsigned long action, | 6825 | unsigned long action, |
6815 | void *ocpu) | 6826 | void *ocpu) |
6816 | { | 6827 | { |
6817 | struct sk_buff **list_skb; | 6828 | struct sk_buff **list_skb; |
6818 | struct sk_buff *skb; | 6829 | struct sk_buff *skb; |
6819 | unsigned int cpu, oldcpu = (unsigned long)ocpu; | 6830 | unsigned int cpu, oldcpu = (unsigned long)ocpu; |
6820 | struct softnet_data *sd, *oldsd; | 6831 | struct softnet_data *sd, *oldsd; |
6821 | 6832 | ||
6822 | if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) | 6833 | if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) |
6823 | return NOTIFY_OK; | 6834 | return NOTIFY_OK; |
6824 | 6835 | ||
6825 | local_irq_disable(); | 6836 | local_irq_disable(); |
6826 | cpu = smp_processor_id(); | 6837 | cpu = smp_processor_id(); |
6827 | sd = &per_cpu(softnet_data, cpu); | 6838 | sd = &per_cpu(softnet_data, cpu); |
6828 | oldsd = &per_cpu(softnet_data, oldcpu); | 6839 | oldsd = &per_cpu(softnet_data, oldcpu); |
6829 | 6840 | ||
6830 | /* Find end of our completion_queue. */ | 6841 | /* Find end of our completion_queue. */ |
6831 | list_skb = &sd->completion_queue; | 6842 | list_skb = &sd->completion_queue; |
6832 | while (*list_skb) | 6843 | while (*list_skb) |
6833 | list_skb = &(*list_skb)->next; | 6844 | list_skb = &(*list_skb)->next; |
6834 | /* Append completion queue from offline CPU. */ | 6845 | /* Append completion queue from offline CPU. */ |
6835 | *list_skb = oldsd->completion_queue; | 6846 | *list_skb = oldsd->completion_queue; |
6836 | oldsd->completion_queue = NULL; | 6847 | oldsd->completion_queue = NULL; |
6837 | 6848 | ||
6838 | /* Append output queue from offline CPU. */ | 6849 | /* Append output queue from offline CPU. */ |
6839 | if (oldsd->output_queue) { | 6850 | if (oldsd->output_queue) { |
6840 | *sd->output_queue_tailp = oldsd->output_queue; | 6851 | *sd->output_queue_tailp = oldsd->output_queue; |
6841 | sd->output_queue_tailp = oldsd->output_queue_tailp; | 6852 | sd->output_queue_tailp = oldsd->output_queue_tailp; |
6842 | oldsd->output_queue = NULL; | 6853 | oldsd->output_queue = NULL; |
6843 | oldsd->output_queue_tailp = &oldsd->output_queue; | 6854 | oldsd->output_queue_tailp = &oldsd->output_queue; |
6844 | } | 6855 | } |
6845 | /* Append NAPI poll list from offline CPU. */ | 6856 | /* Append NAPI poll list from offline CPU. */ |
6846 | if (!list_empty(&oldsd->poll_list)) { | 6857 | if (!list_empty(&oldsd->poll_list)) { |
6847 | list_splice_init(&oldsd->poll_list, &sd->poll_list); | 6858 | list_splice_init(&oldsd->poll_list, &sd->poll_list); |
6848 | raise_softirq_irqoff(NET_RX_SOFTIRQ); | 6859 | raise_softirq_irqoff(NET_RX_SOFTIRQ); |
6849 | } | 6860 | } |
6850 | 6861 | ||
6851 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | 6862 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
6852 | local_irq_enable(); | 6863 | local_irq_enable(); |
6853 | 6864 | ||
6854 | /* Process offline CPU's input_pkt_queue */ | 6865 | /* Process offline CPU's input_pkt_queue */ |
6855 | while ((skb = __skb_dequeue(&oldsd->process_queue))) { | 6866 | while ((skb = __skb_dequeue(&oldsd->process_queue))) { |
6856 | netif_rx(skb); | 6867 | netif_rx(skb); |
6857 | input_queue_head_incr(oldsd); | 6868 | input_queue_head_incr(oldsd); |
6858 | } | 6869 | } |
6859 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { | 6870 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { |
6860 | netif_rx(skb); | 6871 | netif_rx(skb); |
6861 | input_queue_head_incr(oldsd); | 6872 | input_queue_head_incr(oldsd); |
6862 | } | 6873 | } |
6863 | 6874 | ||
6864 | return NOTIFY_OK; | 6875 | return NOTIFY_OK; |
6865 | } | 6876 | } |
6866 | 6877 | ||
6867 | 6878 | ||
6868 | /** | 6879 | /** |
6869 | * netdev_increment_features - increment feature set by one | 6880 | * netdev_increment_features - increment feature set by one |
6870 | * @all: current feature set | 6881 | * @all: current feature set |
6871 | * @one: new feature set | 6882 | * @one: new feature set |
6872 | * @mask: mask feature set | 6883 | * @mask: mask feature set |
6873 | * | 6884 | * |
6874 | * Computes a new feature set after adding a device with feature set | 6885 | * Computes a new feature set after adding a device with feature set |
6875 | * @one to the master device with current feature set @all. Will not | 6886 | * @one to the master device with current feature set @all. Will not |
6876 | * enable anything that is off in @mask. Returns the new feature set. | 6887 | * enable anything that is off in @mask. Returns the new feature set. |
6877 | */ | 6888 | */ |
6878 | netdev_features_t netdev_increment_features(netdev_features_t all, | 6889 | netdev_features_t netdev_increment_features(netdev_features_t all, |
6879 | netdev_features_t one, netdev_features_t mask) | 6890 | netdev_features_t one, netdev_features_t mask) |
6880 | { | 6891 | { |
6881 | if (mask & NETIF_F_GEN_CSUM) | 6892 | if (mask & NETIF_F_GEN_CSUM) |
6882 | mask |= NETIF_F_ALL_CSUM; | 6893 | mask |= NETIF_F_ALL_CSUM; |
6883 | mask |= NETIF_F_VLAN_CHALLENGED; | 6894 | mask |= NETIF_F_VLAN_CHALLENGED; |
6884 | 6895 | ||
6885 | all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; | 6896 | all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; |
6886 | all &= one | ~NETIF_F_ALL_FOR_ALL; | 6897 | all &= one | ~NETIF_F_ALL_FOR_ALL; |
6887 | 6898 | ||
6888 | /* If one device supports hw checksumming, set for all. */ | 6899 | /* If one device supports hw checksumming, set for all. */ |
6889 | if (all & NETIF_F_GEN_CSUM) | 6900 | if (all & NETIF_F_GEN_CSUM) |
6890 | all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); | 6901 | all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); |
6891 | 6902 | ||
6892 | return all; | 6903 | return all; |
6893 | } | 6904 | } |
6894 | EXPORT_SYMBOL(netdev_increment_features); | 6905 | EXPORT_SYMBOL(netdev_increment_features); |
6895 | 6906 | ||
6896 | static struct hlist_head *netdev_create_hash(void) | 6907 | static struct hlist_head *netdev_create_hash(void) |
6897 | { | 6908 | { |
6898 | int i; | 6909 | int i; |
6899 | struct hlist_head *hash; | 6910 | struct hlist_head *hash; |
6900 | 6911 | ||
6901 | hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL); | 6912 | hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL); |
6902 | if (hash != NULL) | 6913 | if (hash != NULL) |
6903 | for (i = 0; i < NETDEV_HASHENTRIES; i++) | 6914 | for (i = 0; i < NETDEV_HASHENTRIES; i++) |
6904 | INIT_HLIST_HEAD(&hash[i]); | 6915 | INIT_HLIST_HEAD(&hash[i]); |
6905 | 6916 | ||
6906 | return hash; | 6917 | return hash; |
6907 | } | 6918 | } |
6908 | 6919 | ||
6909 | /* Initialize per network namespace state */ | 6920 | /* Initialize per network namespace state */ |
6910 | static int __net_init netdev_init(struct net *net) | 6921 | static int __net_init netdev_init(struct net *net) |
6911 | { | 6922 | { |
6912 | if (net != &init_net) | 6923 | if (net != &init_net) |
6913 | INIT_LIST_HEAD(&net->dev_base_head); | 6924 | INIT_LIST_HEAD(&net->dev_base_head); |
6914 | 6925 | ||
6915 | net->dev_name_head = netdev_create_hash(); | 6926 | net->dev_name_head = netdev_create_hash(); |
6916 | if (net->dev_name_head == NULL) | 6927 | if (net->dev_name_head == NULL) |
6917 | goto err_name; | 6928 | goto err_name; |
6918 | 6929 | ||
6919 | net->dev_index_head = netdev_create_hash(); | 6930 | net->dev_index_head = netdev_create_hash(); |
6920 | if (net->dev_index_head == NULL) | 6931 | if (net->dev_index_head == NULL) |
6921 | goto err_idx; | 6932 | goto err_idx; |
6922 | 6933 | ||
6923 | return 0; | 6934 | return 0; |
6924 | 6935 | ||
6925 | err_idx: | 6936 | err_idx: |
6926 | kfree(net->dev_name_head); | 6937 | kfree(net->dev_name_head); |
6927 | err_name: | 6938 | err_name: |
6928 | return -ENOMEM; | 6939 | return -ENOMEM; |
6929 | } | 6940 | } |
6930 | 6941 | ||
6931 | /** | 6942 | /** |
6932 | * netdev_drivername - network driver for the device | 6943 | * netdev_drivername - network driver for the device |
6933 | * @dev: network device | 6944 | * @dev: network device |
6934 | * | 6945 | * |
6935 | * Determine network driver for device. | 6946 | * Determine network driver for device. |
6936 | */ | 6947 | */ |
6937 | const char *netdev_drivername(const struct net_device *dev) | 6948 | const char *netdev_drivername(const struct net_device *dev) |
6938 | { | 6949 | { |
6939 | const struct device_driver *driver; | 6950 | const struct device_driver *driver; |
6940 | const struct device *parent; | 6951 | const struct device *parent; |
6941 | const char *empty = ""; | 6952 | const char *empty = ""; |
6942 | 6953 | ||
6943 | parent = dev->dev.parent; | 6954 | parent = dev->dev.parent; |
6944 | if (!parent) | 6955 | if (!parent) |
6945 | return empty; | 6956 | return empty; |
6946 | 6957 | ||
6947 | driver = parent->driver; | 6958 | driver = parent->driver; |
6948 | if (driver && driver->name) | 6959 | if (driver && driver->name) |
6949 | return driver->name; | 6960 | return driver->name; |
6950 | return empty; | 6961 | return empty; |
6951 | } | 6962 | } |
6952 | 6963 | ||
6953 | static int __netdev_printk(const char *level, const struct net_device *dev, | 6964 | static int __netdev_printk(const char *level, const struct net_device *dev, |
6954 | struct va_format *vaf) | 6965 | struct va_format *vaf) |
6955 | { | 6966 | { |
6956 | int r; | 6967 | int r; |
6957 | 6968 | ||
6958 | if (dev && dev->dev.parent) { | 6969 | if (dev && dev->dev.parent) { |
6959 | r = dev_printk_emit(level[1] - '0', | 6970 | r = dev_printk_emit(level[1] - '0', |
6960 | dev->dev.parent, | 6971 | dev->dev.parent, |
6961 | "%s %s %s: %pV", | 6972 | "%s %s %s: %pV", |
6962 | dev_driver_string(dev->dev.parent), | 6973 | dev_driver_string(dev->dev.parent), |
6963 | dev_name(dev->dev.parent), | 6974 | dev_name(dev->dev.parent), |
6964 | netdev_name(dev), vaf); | 6975 | netdev_name(dev), vaf); |
6965 | } else if (dev) { | 6976 | } else if (dev) { |
6966 | r = printk("%s%s: %pV", level, netdev_name(dev), vaf); | 6977 | r = printk("%s%s: %pV", level, netdev_name(dev), vaf); |
6967 | } else { | 6978 | } else { |
6968 | r = printk("%s(NULL net_device): %pV", level, vaf); | 6979 | r = printk("%s(NULL net_device): %pV", level, vaf); |
6969 | } | 6980 | } |
6970 | 6981 | ||
6971 | return r; | 6982 | return r; |
6972 | } | 6983 | } |
6973 | 6984 | ||
6974 | int netdev_printk(const char *level, const struct net_device *dev, | 6985 | int netdev_printk(const char *level, const struct net_device *dev, |
6975 | const char *format, ...) | 6986 | const char *format, ...) |
6976 | { | 6987 | { |
6977 | struct va_format vaf; | 6988 | struct va_format vaf; |
6978 | va_list args; | 6989 | va_list args; |
6979 | int r; | 6990 | int r; |
6980 | 6991 | ||
6981 | va_start(args, format); | 6992 | va_start(args, format); |
6982 | 6993 | ||
6983 | vaf.fmt = format; | 6994 | vaf.fmt = format; |
6984 | vaf.va = &args; | 6995 | vaf.va = &args; |
6985 | 6996 | ||
6986 | r = __netdev_printk(level, dev, &vaf); | 6997 | r = __netdev_printk(level, dev, &vaf); |
6987 | 6998 | ||
6988 | va_end(args); | 6999 | va_end(args); |
6989 | 7000 | ||
6990 | return r; | 7001 | return r; |
6991 | } | 7002 | } |
6992 | EXPORT_SYMBOL(netdev_printk); | 7003 | EXPORT_SYMBOL(netdev_printk); |
6993 | 7004 | ||
6994 | #define define_netdev_printk_level(func, level) \ | 7005 | #define define_netdev_printk_level(func, level) \ |
6995 | int func(const struct net_device *dev, const char *fmt, ...) \ | 7006 | int func(const struct net_device *dev, const char *fmt, ...) \ |
6996 | { \ | 7007 | { \ |
6997 | int r; \ | 7008 | int r; \ |
6998 | struct va_format vaf; \ | 7009 | struct va_format vaf; \ |
6999 | va_list args; \ | 7010 | va_list args; \ |
7000 | \ | 7011 | \ |
7001 | va_start(args, fmt); \ | 7012 | va_start(args, fmt); \ |
7002 | \ | 7013 | \ |
7003 | vaf.fmt = fmt; \ | 7014 | vaf.fmt = fmt; \ |
7004 | vaf.va = &args; \ | 7015 | vaf.va = &args; \ |
7005 | \ | 7016 | \ |
7006 | r = __netdev_printk(level, dev, &vaf); \ | 7017 | r = __netdev_printk(level, dev, &vaf); \ |
7007 | \ | 7018 | \ |
7008 | va_end(args); \ | 7019 | va_end(args); \ |
7009 | \ | 7020 | \ |
7010 | return r; \ | 7021 | return r; \ |
7011 | } \ | 7022 | } \ |
7012 | EXPORT_SYMBOL(func); | 7023 | EXPORT_SYMBOL(func); |
7013 | 7024 | ||
7014 | define_netdev_printk_level(netdev_emerg, KERN_EMERG); | 7025 | define_netdev_printk_level(netdev_emerg, KERN_EMERG); |
7015 | define_netdev_printk_level(netdev_alert, KERN_ALERT); | 7026 | define_netdev_printk_level(netdev_alert, KERN_ALERT); |
7016 | define_netdev_printk_level(netdev_crit, KERN_CRIT); | 7027 | define_netdev_printk_level(netdev_crit, KERN_CRIT); |
7017 | define_netdev_printk_level(netdev_err, KERN_ERR); | 7028 | define_netdev_printk_level(netdev_err, KERN_ERR); |
7018 | define_netdev_printk_level(netdev_warn, KERN_WARNING); | 7029 | define_netdev_printk_level(netdev_warn, KERN_WARNING); |
7019 | define_netdev_printk_level(netdev_notice, KERN_NOTICE); | 7030 | define_netdev_printk_level(netdev_notice, KERN_NOTICE); |
7020 | define_netdev_printk_level(netdev_info, KERN_INFO); | 7031 | define_netdev_printk_level(netdev_info, KERN_INFO); |
7021 | 7032 | ||
7022 | static void __net_exit netdev_exit(struct net *net) | 7033 | static void __net_exit netdev_exit(struct net *net) |
7023 | { | 7034 | { |
7024 | kfree(net->dev_name_head); | 7035 | kfree(net->dev_name_head); |
7025 | kfree(net->dev_index_head); | 7036 | kfree(net->dev_index_head); |
7026 | } | 7037 | } |
7027 | 7038 | ||
7028 | static struct pernet_operations __net_initdata netdev_net_ops = { | 7039 | static struct pernet_operations __net_initdata netdev_net_ops = { |
7029 | .init = netdev_init, | 7040 | .init = netdev_init, |
7030 | .exit = netdev_exit, | 7041 | .exit = netdev_exit, |
7031 | }; | 7042 | }; |
7032 | 7043 | ||
7033 | static void __net_exit default_device_exit(struct net *net) | 7044 | static void __net_exit default_device_exit(struct net *net) |
7034 | { | 7045 | { |
7035 | struct net_device *dev, *aux; | 7046 | struct net_device *dev, *aux; |
7036 | /* | 7047 | /* |
7037 | * Push all migratable network devices back to the | 7048 | * Push all migratable network devices back to the |
7038 | * initial network namespace | 7049 | * initial network namespace |
7039 | */ | 7050 | */ |
7040 | rtnl_lock(); | 7051 | rtnl_lock(); |
7041 | for_each_netdev_safe(net, dev, aux) { | 7052 | for_each_netdev_safe(net, dev, aux) { |
7042 | int err; | 7053 | int err; |
7043 | char fb_name[IFNAMSIZ]; | 7054 | char fb_name[IFNAMSIZ]; |
7044 | 7055 | ||
7045 | /* Ignore unmoveable devices (i.e. loopback) */ | 7056 | /* Ignore unmoveable devices (i.e. loopback) */ |
7046 | if (dev->features & NETIF_F_NETNS_LOCAL) | 7057 | if (dev->features & NETIF_F_NETNS_LOCAL) |
7047 | continue; | 7058 | continue; |
7048 | 7059 | ||
7049 | /* Leave virtual devices for the generic cleanup */ | 7060 | /* Leave virtual devices for the generic cleanup */ |
7050 | if (dev->rtnl_link_ops) | 7061 | if (dev->rtnl_link_ops) |
7051 | continue; | 7062 | continue; |
7052 | 7063 | ||
7053 | /* Push remaining network devices to init_net */ | 7064 | /* Push remaining network devices to init_net */ |
7054 | snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); | 7065 | snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); |
7055 | err = dev_change_net_namespace(dev, &init_net, fb_name); | 7066 | err = dev_change_net_namespace(dev, &init_net, fb_name); |
7056 | if (err) { | 7067 | if (err) { |
7057 | pr_emerg("%s: failed to move %s to init_net: %d\n", | 7068 | pr_emerg("%s: failed to move %s to init_net: %d\n", |
7058 | __func__, dev->name, err); | 7069 | __func__, dev->name, err); |
7059 | BUG(); | 7070 | BUG(); |
7060 | } | 7071 | } |
7061 | } | 7072 | } |
7062 | rtnl_unlock(); | 7073 | rtnl_unlock(); |
7063 | } | 7074 | } |
7064 | 7075 | ||
7065 | static void __net_exit default_device_exit_batch(struct list_head *net_list) | 7076 | static void __net_exit default_device_exit_batch(struct list_head *net_list) |
7066 | { | 7077 | { |
7067 | /* At exit all network devices most be removed from a network | 7078 | /* At exit all network devices most be removed from a network |
7068 | * namespace. Do this in the reverse order of registration. | 7079 | * namespace. Do this in the reverse order of registration. |
7069 | * Do this across as many network namespaces as possible to | 7080 | * Do this across as many network namespaces as possible to |
7070 | * improve batching efficiency. | 7081 | * improve batching efficiency. |
7071 | */ | 7082 | */ |
7072 | struct net_device *dev; | 7083 | struct net_device *dev; |
7073 | struct net *net; | 7084 | struct net *net; |
7074 | LIST_HEAD(dev_kill_list); | 7085 | LIST_HEAD(dev_kill_list); |
7075 | 7086 | ||
7076 | rtnl_lock(); | 7087 | rtnl_lock(); |
7077 | list_for_each_entry(net, net_list, exit_list) { | 7088 | list_for_each_entry(net, net_list, exit_list) { |
7078 | for_each_netdev_reverse(net, dev) { | 7089 | for_each_netdev_reverse(net, dev) { |
7079 | if (dev->rtnl_link_ops) | 7090 | if (dev->rtnl_link_ops) |
7080 | dev->rtnl_link_ops->dellink(dev, &dev_kill_list); | 7091 | dev->rtnl_link_ops->dellink(dev, &dev_kill_list); |
7081 | else | 7092 | else |
7082 | unregister_netdevice_queue(dev, &dev_kill_list); | 7093 | unregister_netdevice_queue(dev, &dev_kill_list); |
7083 | } | 7094 | } |
7084 | } | 7095 | } |
7085 | unregister_netdevice_many(&dev_kill_list); | 7096 | unregister_netdevice_many(&dev_kill_list); |
7086 | list_del(&dev_kill_list); | 7097 | list_del(&dev_kill_list); |
7087 | rtnl_unlock(); | 7098 | rtnl_unlock(); |
7088 | } | 7099 | } |
7089 | 7100 | ||
7090 | static struct pernet_operations __net_initdata default_device_ops = { | 7101 | static struct pernet_operations __net_initdata default_device_ops = { |
7091 | .exit = default_device_exit, | 7102 | .exit = default_device_exit, |
7092 | .exit_batch = default_device_exit_batch, | 7103 | .exit_batch = default_device_exit_batch, |
7093 | }; | 7104 | }; |
7094 | 7105 | ||
7095 | /* | 7106 | /* |
7096 | * Initialize the DEV module. At boot time this walks the device list and | 7107 | * Initialize the DEV module. At boot time this walks the device list and |
7097 | * unhooks any devices that fail to initialise (normally hardware not | 7108 | * unhooks any devices that fail to initialise (normally hardware not |
7098 | * present) and leaves us with a valid list of present and active devices. | 7109 | * present) and leaves us with a valid list of present and active devices. |
7099 | * | 7110 | * |
7100 | */ | 7111 | */ |
7101 | 7112 | ||
7102 | /* | 7113 | /* |
7103 | * This is called single threaded during boot, so no need | 7114 | * This is called single threaded during boot, so no need |
7104 | * to take the rtnl semaphore. | 7115 | * to take the rtnl semaphore. |
7105 | */ | 7116 | */ |
7106 | static int __init net_dev_init(void) | 7117 | static int __init net_dev_init(void) |
7107 | { | 7118 | { |
7108 | int i, rc = -ENOMEM; | 7119 | int i, rc = -ENOMEM; |
7109 | 7120 | ||
7110 | BUG_ON(!dev_boot_phase); | 7121 | BUG_ON(!dev_boot_phase); |
7111 | 7122 | ||
7112 | if (dev_proc_init()) | 7123 | if (dev_proc_init()) |
7113 | goto out; | 7124 | goto out; |
7114 | 7125 | ||
7115 | if (netdev_kobject_init()) | 7126 | if (netdev_kobject_init()) |
7116 | goto out; | 7127 | goto out; |
7117 | 7128 | ||
7118 | INIT_LIST_HEAD(&ptype_all); | 7129 | INIT_LIST_HEAD(&ptype_all); |
7119 | for (i = 0; i < PTYPE_HASH_SIZE; i++) | 7130 | for (i = 0; i < PTYPE_HASH_SIZE; i++) |
7120 | INIT_LIST_HEAD(&ptype_base[i]); | 7131 | INIT_LIST_HEAD(&ptype_base[i]); |
7121 | 7132 | ||
7122 | INIT_LIST_HEAD(&offload_base); | 7133 | INIT_LIST_HEAD(&offload_base); |
7123 | 7134 | ||
7124 | if (register_pernet_subsys(&netdev_net_ops)) | 7135 | if (register_pernet_subsys(&netdev_net_ops)) |
7125 | goto out; | 7136 | goto out; |
7126 | 7137 | ||
7127 | /* | 7138 | /* |
7128 | * Initialise the packet receive queues. | 7139 | * Initialise the packet receive queues. |
7129 | */ | 7140 | */ |
7130 | 7141 | ||
7131 | for_each_possible_cpu(i) { | 7142 | for_each_possible_cpu(i) { |
7132 | struct softnet_data *sd = &per_cpu(softnet_data, i); | 7143 | struct softnet_data *sd = &per_cpu(softnet_data, i); |
7133 | 7144 | ||
7134 | memset(sd, 0, sizeof(*sd)); | 7145 | memset(sd, 0, sizeof(*sd)); |
7135 | skb_queue_head_init(&sd->input_pkt_queue); | 7146 | skb_queue_head_init(&sd->input_pkt_queue); |
7136 | skb_queue_head_init(&sd->process_queue); | 7147 | skb_queue_head_init(&sd->process_queue); |
7137 | sd->completion_queue = NULL; | 7148 | sd->completion_queue = NULL; |
7138 | INIT_LIST_HEAD(&sd->poll_list); | 7149 | INIT_LIST_HEAD(&sd->poll_list); |
7139 | sd->output_queue = NULL; | 7150 | sd->output_queue = NULL; |
7140 | sd->output_queue_tailp = &sd->output_queue; | 7151 | sd->output_queue_tailp = &sd->output_queue; |
7141 | #ifdef CONFIG_RPS | 7152 | #ifdef CONFIG_RPS |
7142 | sd->csd.func = rps_trigger_softirq; | 7153 | sd->csd.func = rps_trigger_softirq; |
7143 | sd->csd.info = sd; | 7154 | sd->csd.info = sd; |
7144 | sd->csd.flags = 0; | 7155 | sd->csd.flags = 0; |
7145 | sd->cpu = i; | 7156 | sd->cpu = i; |
7146 | #endif | 7157 | #endif |
7147 | 7158 | ||
7148 | sd->backlog.poll = process_backlog; | 7159 | sd->backlog.poll = process_backlog; |
7149 | sd->backlog.weight = weight_p; | 7160 | sd->backlog.weight = weight_p; |
7150 | sd->backlog.gro_list = NULL; | 7161 | sd->backlog.gro_list = NULL; |
7151 | sd->backlog.gro_count = 0; | 7162 | sd->backlog.gro_count = 0; |
7152 | } | 7163 | } |
7153 | 7164 | ||
7154 | dev_boot_phase = 0; | 7165 | dev_boot_phase = 0; |
7155 | 7166 | ||
7156 | /* The loopback device is special if any other network devices | 7167 | /* The loopback device is special if any other network devices |
7157 | * is present in a network namespace the loopback device must | 7168 | * is present in a network namespace the loopback device must |
7158 | * be present. Since we now dynamically allocate and free the | 7169 | * be present. Since we now dynamically allocate and free the |
7159 | * loopback device ensure this invariant is maintained by | 7170 | * loopback device ensure this invariant is maintained by |
7160 | * keeping the loopback device as the first device on the | 7171 | * keeping the loopback device as the first device on the |
7161 | * list of network devices. Ensuring the loopback devices | 7172 | * list of network devices. Ensuring the loopback devices |
7162 | * is the first device that appears and the last network device | 7173 | * is the first device that appears and the last network device |
7163 | * that disappears. | 7174 | * that disappears. |
7164 | */ | 7175 | */ |
7165 | if (register_pernet_device(&loopback_net_ops)) | 7176 | if (register_pernet_device(&loopback_net_ops)) |
7166 | goto out; | 7177 | goto out; |
7167 | 7178 | ||
7168 | if (register_pernet_device(&default_device_ops)) | 7179 | if (register_pernet_device(&default_device_ops)) |
7169 | goto out; | 7180 | goto out; |
7170 | 7181 | ||
7171 | open_softirq(NET_TX_SOFTIRQ, net_tx_action); | 7182 | open_softirq(NET_TX_SOFTIRQ, net_tx_action); |
7172 | open_softirq(NET_RX_SOFTIRQ, net_rx_action); | 7183 | open_softirq(NET_RX_SOFTIRQ, net_rx_action); |
7173 | 7184 | ||
7174 | hotcpu_notifier(dev_cpu_callback, 0); | 7185 | hotcpu_notifier(dev_cpu_callback, 0); |
7175 | dst_init(); | 7186 | dst_init(); |
7176 | dev_mcast_init(); | 7187 | dev_mcast_init(); |
7177 | rc = 0; | 7188 | rc = 0; |
7178 | out: | 7189 | out: |
7179 | return rc; | 7190 | return rc; |
7180 | } | 7191 | } |
7181 | 7192 | ||
7182 | subsys_initcall(net_dev_init); | 7193 | subsys_initcall(net_dev_init); |
7183 | 7194 |
net/openvswitch/datapath.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2007-2012 Nicira, Inc. | 2 | * Copyright (c) 2007-2012 Nicira, Inc. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of version 2 of the GNU General Public | 5 | * modify it under the terms of version 2 of the GNU General Public |
6 | * License as published by the Free Software Foundation. | 6 | * License as published by the Free Software Foundation. |
7 | * | 7 | * |
8 | * This program is distributed in the hope that it will be useful, but | 8 | * This program is distributed in the hope that it will be useful, but |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | 9 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | * General Public License for more details. | 11 | * General Public License for more details. |
12 | * | 12 | * |
13 | * You should have received a copy of the GNU General Public License | 13 | * You should have received a copy of the GNU General Public License |
14 | * along with this program; if not, write to the Free Software | 14 | * along with this program; if not, write to the Free Software |
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | 15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
16 | * 02110-1301, USA | 16 | * 02110-1301, USA |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 19 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
20 | 20 | ||
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/if_arp.h> | 23 | #include <linux/if_arp.h> |
24 | #include <linux/if_vlan.h> | 24 | #include <linux/if_vlan.h> |
25 | #include <linux/in.h> | 25 | #include <linux/in.h> |
26 | #include <linux/ip.h> | 26 | #include <linux/ip.h> |
27 | #include <linux/jhash.h> | 27 | #include <linux/jhash.h> |
28 | #include <linux/delay.h> | 28 | #include <linux/delay.h> |
29 | #include <linux/time.h> | 29 | #include <linux/time.h> |
30 | #include <linux/etherdevice.h> | 30 | #include <linux/etherdevice.h> |
31 | #include <linux/genetlink.h> | 31 | #include <linux/genetlink.h> |
32 | #include <linux/kernel.h> | 32 | #include <linux/kernel.h> |
33 | #include <linux/kthread.h> | 33 | #include <linux/kthread.h> |
34 | #include <linux/mutex.h> | 34 | #include <linux/mutex.h> |
35 | #include <linux/percpu.h> | 35 | #include <linux/percpu.h> |
36 | #include <linux/rcupdate.h> | 36 | #include <linux/rcupdate.h> |
37 | #include <linux/tcp.h> | 37 | #include <linux/tcp.h> |
38 | #include <linux/udp.h> | 38 | #include <linux/udp.h> |
39 | #include <linux/ethtool.h> | 39 | #include <linux/ethtool.h> |
40 | #include <linux/wait.h> | 40 | #include <linux/wait.h> |
41 | #include <asm/div64.h> | 41 | #include <asm/div64.h> |
42 | #include <linux/highmem.h> | 42 | #include <linux/highmem.h> |
43 | #include <linux/netfilter_bridge.h> | 43 | #include <linux/netfilter_bridge.h> |
44 | #include <linux/netfilter_ipv4.h> | 44 | #include <linux/netfilter_ipv4.h> |
45 | #include <linux/inetdevice.h> | 45 | #include <linux/inetdevice.h> |
46 | #include <linux/list.h> | 46 | #include <linux/list.h> |
47 | #include <linux/openvswitch.h> | 47 | #include <linux/openvswitch.h> |
48 | #include <linux/rculist.h> | 48 | #include <linux/rculist.h> |
49 | #include <linux/dmi.h> | 49 | #include <linux/dmi.h> |
50 | #include <linux/workqueue.h> | 50 | #include <linux/workqueue.h> |
51 | #include <net/genetlink.h> | 51 | #include <net/genetlink.h> |
52 | #include <net/net_namespace.h> | 52 | #include <net/net_namespace.h> |
53 | #include <net/netns/generic.h> | 53 | #include <net/netns/generic.h> |
54 | 54 | ||
55 | #include "datapath.h" | 55 | #include "datapath.h" |
56 | #include "flow.h" | 56 | #include "flow.h" |
57 | #include "vport-internal_dev.h" | 57 | #include "vport-internal_dev.h" |
58 | 58 | ||
59 | /** | 59 | /** |
60 | * struct ovs_net - Per net-namespace data for ovs. | 60 | * struct ovs_net - Per net-namespace data for ovs. |
61 | * @dps: List of datapaths to enable dumping them all out. | 61 | * @dps: List of datapaths to enable dumping them all out. |
62 | * Protected by genl_mutex. | 62 | * Protected by genl_mutex. |
63 | */ | 63 | */ |
64 | struct ovs_net { | 64 | struct ovs_net { |
65 | struct list_head dps; | 65 | struct list_head dps; |
66 | }; | 66 | }; |
67 | 67 | ||
68 | static int ovs_net_id __read_mostly; | 68 | static int ovs_net_id __read_mostly; |
69 | 69 | ||
70 | #define REHASH_FLOW_INTERVAL (10 * 60 * HZ) | 70 | #define REHASH_FLOW_INTERVAL (10 * 60 * HZ) |
71 | static void rehash_flow_table(struct work_struct *work); | 71 | static void rehash_flow_table(struct work_struct *work); |
72 | static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); | 72 | static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); |
73 | 73 | ||
74 | /** | 74 | /** |
75 | * DOC: Locking: | 75 | * DOC: Locking: |
76 | * | 76 | * |
77 | * Writes to device state (add/remove datapath, port, set operations on vports, | 77 | * Writes to device state (add/remove datapath, port, set operations on vports, |
78 | * etc.) are protected by RTNL. | 78 | * etc.) are protected by RTNL. |
79 | * | 79 | * |
80 | * Writes to other state (flow table modifications, set miscellaneous datapath | 80 | * Writes to other state (flow table modifications, set miscellaneous datapath |
81 | * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside | 81 | * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside |
82 | * genl_mutex. | 82 | * genl_mutex. |
83 | * | 83 | * |
84 | * Reads are protected by RCU. | 84 | * Reads are protected by RCU. |
85 | * | 85 | * |
86 | * There are a few special cases (mostly stats) that have their own | 86 | * There are a few special cases (mostly stats) that have their own |
87 | * synchronization but they nest under all of above and don't interact with | 87 | * synchronization but they nest under all of above and don't interact with |
88 | * each other. | 88 | * each other. |
89 | */ | 89 | */ |
90 | 90 | ||
91 | static struct vport *new_vport(const struct vport_parms *); | 91 | static struct vport *new_vport(const struct vport_parms *); |
92 | static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, | 92 | static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, |
93 | const struct dp_upcall_info *); | 93 | const struct dp_upcall_info *); |
94 | static int queue_userspace_packet(struct net *, int dp_ifindex, | 94 | static int queue_userspace_packet(struct net *, int dp_ifindex, |
95 | struct sk_buff *, | 95 | struct sk_buff *, |
96 | const struct dp_upcall_info *); | 96 | const struct dp_upcall_info *); |
97 | 97 | ||
98 | /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ | 98 | /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ |
99 | static struct datapath *get_dp(struct net *net, int dp_ifindex) | 99 | static struct datapath *get_dp(struct net *net, int dp_ifindex) |
100 | { | 100 | { |
101 | struct datapath *dp = NULL; | 101 | struct datapath *dp = NULL; |
102 | struct net_device *dev; | 102 | struct net_device *dev; |
103 | 103 | ||
104 | rcu_read_lock(); | 104 | rcu_read_lock(); |
105 | dev = dev_get_by_index_rcu(net, dp_ifindex); | 105 | dev = dev_get_by_index_rcu(net, dp_ifindex); |
106 | if (dev) { | 106 | if (dev) { |
107 | struct vport *vport = ovs_internal_dev_get_vport(dev); | 107 | struct vport *vport = ovs_internal_dev_get_vport(dev); |
108 | if (vport) | 108 | if (vport) |
109 | dp = vport->dp; | 109 | dp = vport->dp; |
110 | } | 110 | } |
111 | rcu_read_unlock(); | 111 | rcu_read_unlock(); |
112 | 112 | ||
113 | return dp; | 113 | return dp; |
114 | } | 114 | } |
115 | 115 | ||
116 | /* Must be called with rcu_read_lock or RTNL lock. */ | 116 | /* Must be called with rcu_read_lock or RTNL lock. */ |
117 | const char *ovs_dp_name(const struct datapath *dp) | 117 | const char *ovs_dp_name(const struct datapath *dp) |
118 | { | 118 | { |
119 | struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL); | 119 | struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL); |
120 | return vport->ops->get_name(vport); | 120 | return vport->ops->get_name(vport); |
121 | } | 121 | } |
122 | 122 | ||
123 | static int get_dpifindex(struct datapath *dp) | 123 | static int get_dpifindex(struct datapath *dp) |
124 | { | 124 | { |
125 | struct vport *local; | 125 | struct vport *local; |
126 | int ifindex; | 126 | int ifindex; |
127 | 127 | ||
128 | rcu_read_lock(); | 128 | rcu_read_lock(); |
129 | 129 | ||
130 | local = ovs_vport_rcu(dp, OVSP_LOCAL); | 130 | local = ovs_vport_rcu(dp, OVSP_LOCAL); |
131 | if (local) | 131 | if (local) |
132 | ifindex = local->ops->get_ifindex(local); | 132 | ifindex = local->ops->get_ifindex(local); |
133 | else | 133 | else |
134 | ifindex = 0; | 134 | ifindex = 0; |
135 | 135 | ||
136 | rcu_read_unlock(); | 136 | rcu_read_unlock(); |
137 | 137 | ||
138 | return ifindex; | 138 | return ifindex; |
139 | } | 139 | } |
140 | 140 | ||
141 | static void destroy_dp_rcu(struct rcu_head *rcu) | 141 | static void destroy_dp_rcu(struct rcu_head *rcu) |
142 | { | 142 | { |
143 | struct datapath *dp = container_of(rcu, struct datapath, rcu); | 143 | struct datapath *dp = container_of(rcu, struct datapath, rcu); |
144 | 144 | ||
145 | ovs_flow_tbl_destroy((__force struct flow_table *)dp->table); | 145 | ovs_flow_tbl_destroy((__force struct flow_table *)dp->table); |
146 | free_percpu(dp->stats_percpu); | 146 | free_percpu(dp->stats_percpu); |
147 | release_net(ovs_dp_get_net(dp)); | 147 | release_net(ovs_dp_get_net(dp)); |
148 | kfree(dp->ports); | 148 | kfree(dp->ports); |
149 | kfree(dp); | 149 | kfree(dp); |
150 | } | 150 | } |
151 | 151 | ||
152 | static struct hlist_head *vport_hash_bucket(const struct datapath *dp, | 152 | static struct hlist_head *vport_hash_bucket(const struct datapath *dp, |
153 | u16 port_no) | 153 | u16 port_no) |
154 | { | 154 | { |
155 | return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; | 155 | return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; |
156 | } | 156 | } |
157 | 157 | ||
158 | struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) | 158 | struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) |
159 | { | 159 | { |
160 | struct vport *vport; | 160 | struct vport *vport; |
161 | struct hlist_node *n; | 161 | struct hlist_node *n; |
162 | struct hlist_head *head; | 162 | struct hlist_head *head; |
163 | 163 | ||
164 | head = vport_hash_bucket(dp, port_no); | 164 | head = vport_hash_bucket(dp, port_no); |
165 | hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) { | 165 | hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) { |
166 | if (vport->port_no == port_no) | 166 | if (vport->port_no == port_no) |
167 | return vport; | 167 | return vport; |
168 | } | 168 | } |
169 | return NULL; | 169 | return NULL; |
170 | } | 170 | } |
171 | 171 | ||
172 | /* Called with RTNL lock and genl_lock. */ | 172 | /* Called with RTNL lock and genl_lock. */ |
173 | static struct vport *new_vport(const struct vport_parms *parms) | 173 | static struct vport *new_vport(const struct vport_parms *parms) |
174 | { | 174 | { |
175 | struct vport *vport; | 175 | struct vport *vport; |
176 | 176 | ||
177 | vport = ovs_vport_add(parms); | 177 | vport = ovs_vport_add(parms); |
178 | if (!IS_ERR(vport)) { | 178 | if (!IS_ERR(vport)) { |
179 | struct datapath *dp = parms->dp; | 179 | struct datapath *dp = parms->dp; |
180 | struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); | 180 | struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); |
181 | 181 | ||
182 | hlist_add_head_rcu(&vport->dp_hash_node, head); | 182 | hlist_add_head_rcu(&vport->dp_hash_node, head); |
183 | } | 183 | } |
184 | 184 | ||
185 | return vport; | 185 | return vport; |
186 | } | 186 | } |
187 | 187 | ||
188 | /* Called with RTNL lock. */ | 188 | /* Called with RTNL lock. */ |
189 | void ovs_dp_detach_port(struct vport *p) | 189 | void ovs_dp_detach_port(struct vport *p) |
190 | { | 190 | { |
191 | ASSERT_RTNL(); | 191 | ASSERT_RTNL(); |
192 | 192 | ||
193 | /* First drop references to device. */ | 193 | /* First drop references to device. */ |
194 | hlist_del_rcu(&p->dp_hash_node); | 194 | hlist_del_rcu(&p->dp_hash_node); |
195 | 195 | ||
196 | /* Then destroy it. */ | 196 | /* Then destroy it. */ |
197 | ovs_vport_del(p); | 197 | ovs_vport_del(p); |
198 | } | 198 | } |
199 | 199 | ||
200 | /* Must be called with rcu_read_lock. */ | 200 | /* Must be called with rcu_read_lock. */ |
201 | void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) | 201 | void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) |
202 | { | 202 | { |
203 | struct datapath *dp = p->dp; | 203 | struct datapath *dp = p->dp; |
204 | struct sw_flow *flow; | 204 | struct sw_flow *flow; |
205 | struct dp_stats_percpu *stats; | 205 | struct dp_stats_percpu *stats; |
206 | struct sw_flow_key key; | 206 | struct sw_flow_key key; |
207 | u64 *stats_counter; | 207 | u64 *stats_counter; |
208 | int error; | 208 | int error; |
209 | int key_len; | 209 | int key_len; |
210 | 210 | ||
211 | stats = this_cpu_ptr(dp->stats_percpu); | 211 | stats = this_cpu_ptr(dp->stats_percpu); |
212 | 212 | ||
213 | /* Extract flow from 'skb' into 'key'. */ | 213 | /* Extract flow from 'skb' into 'key'. */ |
214 | error = ovs_flow_extract(skb, p->port_no, &key, &key_len); | 214 | error = ovs_flow_extract(skb, p->port_no, &key, &key_len); |
215 | if (unlikely(error)) { | 215 | if (unlikely(error)) { |
216 | kfree_skb(skb); | 216 | kfree_skb(skb); |
217 | return; | 217 | return; |
218 | } | 218 | } |
219 | 219 | ||
220 | /* Look up flow. */ | 220 | /* Look up flow. */ |
221 | flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len); | 221 | flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len); |
222 | if (unlikely(!flow)) { | 222 | if (unlikely(!flow)) { |
223 | struct dp_upcall_info upcall; | 223 | struct dp_upcall_info upcall; |
224 | 224 | ||
225 | upcall.cmd = OVS_PACKET_CMD_MISS; | 225 | upcall.cmd = OVS_PACKET_CMD_MISS; |
226 | upcall.key = &key; | 226 | upcall.key = &key; |
227 | upcall.userdata = NULL; | 227 | upcall.userdata = NULL; |
228 | upcall.portid = p->upcall_portid; | 228 | upcall.portid = p->upcall_portid; |
229 | ovs_dp_upcall(dp, skb, &upcall); | 229 | ovs_dp_upcall(dp, skb, &upcall); |
230 | consume_skb(skb); | 230 | consume_skb(skb); |
231 | stats_counter = &stats->n_missed; | 231 | stats_counter = &stats->n_missed; |
232 | goto out; | 232 | goto out; |
233 | } | 233 | } |
234 | 234 | ||
235 | OVS_CB(skb)->flow = flow; | 235 | OVS_CB(skb)->flow = flow; |
236 | 236 | ||
237 | stats_counter = &stats->n_hit; | 237 | stats_counter = &stats->n_hit; |
238 | ovs_flow_used(OVS_CB(skb)->flow, skb); | 238 | ovs_flow_used(OVS_CB(skb)->flow, skb); |
239 | ovs_execute_actions(dp, skb); | 239 | ovs_execute_actions(dp, skb); |
240 | 240 | ||
241 | out: | 241 | out: |
242 | /* Update datapath statistics. */ | 242 | /* Update datapath statistics. */ |
243 | u64_stats_update_begin(&stats->sync); | 243 | u64_stats_update_begin(&stats->sync); |
244 | (*stats_counter)++; | 244 | (*stats_counter)++; |
245 | u64_stats_update_end(&stats->sync); | 245 | u64_stats_update_end(&stats->sync); |
246 | } | 246 | } |
247 | 247 | ||
248 | static struct genl_family dp_packet_genl_family = { | 248 | static struct genl_family dp_packet_genl_family = { |
249 | .id = GENL_ID_GENERATE, | 249 | .id = GENL_ID_GENERATE, |
250 | .hdrsize = sizeof(struct ovs_header), | 250 | .hdrsize = sizeof(struct ovs_header), |
251 | .name = OVS_PACKET_FAMILY, | 251 | .name = OVS_PACKET_FAMILY, |
252 | .version = OVS_PACKET_VERSION, | 252 | .version = OVS_PACKET_VERSION, |
253 | .maxattr = OVS_PACKET_ATTR_MAX, | 253 | .maxattr = OVS_PACKET_ATTR_MAX, |
254 | .netnsok = true | 254 | .netnsok = true |
255 | }; | 255 | }; |
256 | 256 | ||
257 | int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, | 257 | int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, |
258 | const struct dp_upcall_info *upcall_info) | 258 | const struct dp_upcall_info *upcall_info) |
259 | { | 259 | { |
260 | struct dp_stats_percpu *stats; | 260 | struct dp_stats_percpu *stats; |
261 | int dp_ifindex; | 261 | int dp_ifindex; |
262 | int err; | 262 | int err; |
263 | 263 | ||
264 | if (upcall_info->portid == 0) { | 264 | if (upcall_info->portid == 0) { |
265 | err = -ENOTCONN; | 265 | err = -ENOTCONN; |
266 | goto err; | 266 | goto err; |
267 | } | 267 | } |
268 | 268 | ||
269 | dp_ifindex = get_dpifindex(dp); | 269 | dp_ifindex = get_dpifindex(dp); |
270 | if (!dp_ifindex) { | 270 | if (!dp_ifindex) { |
271 | err = -ENODEV; | 271 | err = -ENODEV; |
272 | goto err; | 272 | goto err; |
273 | } | 273 | } |
274 | 274 | ||
275 | if (!skb_is_gso(skb)) | 275 | if (!skb_is_gso(skb)) |
276 | err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); | 276 | err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); |
277 | else | 277 | else |
278 | err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); | 278 | err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); |
279 | if (err) | 279 | if (err) |
280 | goto err; | 280 | goto err; |
281 | 281 | ||
282 | return 0; | 282 | return 0; |
283 | 283 | ||
284 | err: | 284 | err: |
285 | stats = this_cpu_ptr(dp->stats_percpu); | 285 | stats = this_cpu_ptr(dp->stats_percpu); |
286 | 286 | ||
287 | u64_stats_update_begin(&stats->sync); | 287 | u64_stats_update_begin(&stats->sync); |
288 | stats->n_lost++; | 288 | stats->n_lost++; |
289 | u64_stats_update_end(&stats->sync); | 289 | u64_stats_update_end(&stats->sync); |
290 | 290 | ||
291 | return err; | 291 | return err; |
292 | } | 292 | } |
293 | 293 | ||
294 | static int queue_gso_packets(struct net *net, int dp_ifindex, | 294 | static int queue_gso_packets(struct net *net, int dp_ifindex, |
295 | struct sk_buff *skb, | 295 | struct sk_buff *skb, |
296 | const struct dp_upcall_info *upcall_info) | 296 | const struct dp_upcall_info *upcall_info) |
297 | { | 297 | { |
298 | unsigned short gso_type = skb_shinfo(skb)->gso_type; | 298 | unsigned short gso_type = skb_shinfo(skb)->gso_type; |
299 | struct dp_upcall_info later_info; | 299 | struct dp_upcall_info later_info; |
300 | struct sw_flow_key later_key; | 300 | struct sw_flow_key later_key; |
301 | struct sk_buff *segs, *nskb; | 301 | struct sk_buff *segs, *nskb; |
302 | int err; | 302 | int err; |
303 | 303 | ||
304 | segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); | 304 | segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false); |
305 | if (IS_ERR(segs)) | 305 | if (IS_ERR(segs)) |
306 | return PTR_ERR(segs); | 306 | return PTR_ERR(segs); |
307 | 307 | ||
308 | /* Queue all of the segments. */ | 308 | /* Queue all of the segments. */ |
309 | skb = segs; | 309 | skb = segs; |
310 | do { | 310 | do { |
311 | err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info); | 311 | err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info); |
312 | if (err) | 312 | if (err) |
313 | break; | 313 | break; |
314 | 314 | ||
315 | if (skb == segs && gso_type & SKB_GSO_UDP) { | 315 | if (skb == segs && gso_type & SKB_GSO_UDP) { |
316 | /* The initial flow key extracted by ovs_flow_extract() | 316 | /* The initial flow key extracted by ovs_flow_extract() |
317 | * in this case is for a first fragment, so we need to | 317 | * in this case is for a first fragment, so we need to |
318 | * properly mark later fragments. | 318 | * properly mark later fragments. |
319 | */ | 319 | */ |
320 | later_key = *upcall_info->key; | 320 | later_key = *upcall_info->key; |
321 | later_key.ip.frag = OVS_FRAG_TYPE_LATER; | 321 | later_key.ip.frag = OVS_FRAG_TYPE_LATER; |
322 | 322 | ||
323 | later_info = *upcall_info; | 323 | later_info = *upcall_info; |
324 | later_info.key = &later_key; | 324 | later_info.key = &later_key; |
325 | upcall_info = &later_info; | 325 | upcall_info = &later_info; |
326 | } | 326 | } |
327 | } while ((skb = skb->next)); | 327 | } while ((skb = skb->next)); |
328 | 328 | ||
329 | /* Free all of the segments. */ | 329 | /* Free all of the segments. */ |
330 | skb = segs; | 330 | skb = segs; |
331 | do { | 331 | do { |
332 | nskb = skb->next; | 332 | nskb = skb->next; |
333 | if (err) | 333 | if (err) |
334 | kfree_skb(skb); | 334 | kfree_skb(skb); |
335 | else | 335 | else |
336 | consume_skb(skb); | 336 | consume_skb(skb); |
337 | } while ((skb = nskb)); | 337 | } while ((skb = nskb)); |
338 | return err; | 338 | return err; |
339 | } | 339 | } |
340 | 340 | ||
341 | static int queue_userspace_packet(struct net *net, int dp_ifindex, | 341 | static int queue_userspace_packet(struct net *net, int dp_ifindex, |
342 | struct sk_buff *skb, | 342 | struct sk_buff *skb, |
343 | const struct dp_upcall_info *upcall_info) | 343 | const struct dp_upcall_info *upcall_info) |
344 | { | 344 | { |
345 | struct ovs_header *upcall; | 345 | struct ovs_header *upcall; |
346 | struct sk_buff *nskb = NULL; | 346 | struct sk_buff *nskb = NULL; |
347 | struct sk_buff *user_skb; /* to be queued to userspace */ | 347 | struct sk_buff *user_skb; /* to be queued to userspace */ |
348 | struct nlattr *nla; | 348 | struct nlattr *nla; |
349 | unsigned int len; | 349 | unsigned int len; |
350 | int err; | 350 | int err; |
351 | 351 | ||
352 | if (vlan_tx_tag_present(skb)) { | 352 | if (vlan_tx_tag_present(skb)) { |
353 | nskb = skb_clone(skb, GFP_ATOMIC); | 353 | nskb = skb_clone(skb, GFP_ATOMIC); |
354 | if (!nskb) | 354 | if (!nskb) |
355 | return -ENOMEM; | 355 | return -ENOMEM; |
356 | 356 | ||
357 | nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb)); | 357 | nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb)); |
358 | if (!nskb) | 358 | if (!nskb) |
359 | return -ENOMEM; | 359 | return -ENOMEM; |
360 | 360 | ||
361 | nskb->vlan_tci = 0; | 361 | nskb->vlan_tci = 0; |
362 | skb = nskb; | 362 | skb = nskb; |
363 | } | 363 | } |
364 | 364 | ||
365 | if (nla_attr_size(skb->len) > USHRT_MAX) { | 365 | if (nla_attr_size(skb->len) > USHRT_MAX) { |
366 | err = -EFBIG; | 366 | err = -EFBIG; |
367 | goto out; | 367 | goto out; |
368 | } | 368 | } |
369 | 369 | ||
370 | len = sizeof(struct ovs_header); | 370 | len = sizeof(struct ovs_header); |
371 | len += nla_total_size(skb->len); | 371 | len += nla_total_size(skb->len); |
372 | len += nla_total_size(FLOW_BUFSIZE); | 372 | len += nla_total_size(FLOW_BUFSIZE); |
373 | if (upcall_info->cmd == OVS_PACKET_CMD_ACTION) | 373 | if (upcall_info->cmd == OVS_PACKET_CMD_ACTION) |
374 | len += nla_total_size(8); | 374 | len += nla_total_size(8); |
375 | 375 | ||
376 | user_skb = genlmsg_new(len, GFP_ATOMIC); | 376 | user_skb = genlmsg_new(len, GFP_ATOMIC); |
377 | if (!user_skb) { | 377 | if (!user_skb) { |
378 | err = -ENOMEM; | 378 | err = -ENOMEM; |
379 | goto out; | 379 | goto out; |
380 | } | 380 | } |
381 | 381 | ||
382 | upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, | 382 | upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, |
383 | 0, upcall_info->cmd); | 383 | 0, upcall_info->cmd); |
384 | upcall->dp_ifindex = dp_ifindex; | 384 | upcall->dp_ifindex = dp_ifindex; |
385 | 385 | ||
386 | nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); | 386 | nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); |
387 | ovs_flow_to_nlattrs(upcall_info->key, user_skb); | 387 | ovs_flow_to_nlattrs(upcall_info->key, user_skb); |
388 | nla_nest_end(user_skb, nla); | 388 | nla_nest_end(user_skb, nla); |
389 | 389 | ||
390 | if (upcall_info->userdata) | 390 | if (upcall_info->userdata) |
391 | nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, | 391 | nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, |
392 | nla_get_u64(upcall_info->userdata)); | 392 | nla_get_u64(upcall_info->userdata)); |
393 | 393 | ||
394 | nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); | 394 | nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); |
395 | 395 | ||
396 | skb_copy_and_csum_dev(skb, nla_data(nla)); | 396 | skb_copy_and_csum_dev(skb, nla_data(nla)); |
397 | 397 | ||
398 | err = genlmsg_unicast(net, user_skb, upcall_info->portid); | 398 | err = genlmsg_unicast(net, user_skb, upcall_info->portid); |
399 | 399 | ||
400 | out: | 400 | out: |
401 | kfree_skb(nskb); | 401 | kfree_skb(nskb); |
402 | return err; | 402 | return err; |
403 | } | 403 | } |
404 | 404 | ||
405 | /* Called with genl_mutex. */ | 405 | /* Called with genl_mutex. */ |
406 | static int flush_flows(struct datapath *dp) | 406 | static int flush_flows(struct datapath *dp) |
407 | { | 407 | { |
408 | struct flow_table *old_table; | 408 | struct flow_table *old_table; |
409 | struct flow_table *new_table; | 409 | struct flow_table *new_table; |
410 | 410 | ||
411 | old_table = genl_dereference(dp->table); | 411 | old_table = genl_dereference(dp->table); |
412 | new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); | 412 | new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); |
413 | if (!new_table) | 413 | if (!new_table) |
414 | return -ENOMEM; | 414 | return -ENOMEM; |
415 | 415 | ||
416 | rcu_assign_pointer(dp->table, new_table); | 416 | rcu_assign_pointer(dp->table, new_table); |
417 | 417 | ||
418 | ovs_flow_tbl_deferred_destroy(old_table); | 418 | ovs_flow_tbl_deferred_destroy(old_table); |
419 | return 0; | 419 | return 0; |
420 | } | 420 | } |
421 | 421 | ||
422 | static int validate_actions(const struct nlattr *attr, | 422 | static int validate_actions(const struct nlattr *attr, |
423 | const struct sw_flow_key *key, int depth); | 423 | const struct sw_flow_key *key, int depth); |
424 | 424 | ||
425 | static int validate_sample(const struct nlattr *attr, | 425 | static int validate_sample(const struct nlattr *attr, |
426 | const struct sw_flow_key *key, int depth) | 426 | const struct sw_flow_key *key, int depth) |
427 | { | 427 | { |
428 | const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; | 428 | const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; |
429 | const struct nlattr *probability, *actions; | 429 | const struct nlattr *probability, *actions; |
430 | const struct nlattr *a; | 430 | const struct nlattr *a; |
431 | int rem; | 431 | int rem; |
432 | 432 | ||
433 | memset(attrs, 0, sizeof(attrs)); | 433 | memset(attrs, 0, sizeof(attrs)); |
434 | nla_for_each_nested(a, attr, rem) { | 434 | nla_for_each_nested(a, attr, rem) { |
435 | int type = nla_type(a); | 435 | int type = nla_type(a); |
436 | if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) | 436 | if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) |
437 | return -EINVAL; | 437 | return -EINVAL; |
438 | attrs[type] = a; | 438 | attrs[type] = a; |
439 | } | 439 | } |
440 | if (rem) | 440 | if (rem) |
441 | return -EINVAL; | 441 | return -EINVAL; |
442 | 442 | ||
443 | probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; | 443 | probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; |
444 | if (!probability || nla_len(probability) != sizeof(u32)) | 444 | if (!probability || nla_len(probability) != sizeof(u32)) |
445 | return -EINVAL; | 445 | return -EINVAL; |
446 | 446 | ||
447 | actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; | 447 | actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; |
448 | if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) | 448 | if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) |
449 | return -EINVAL; | 449 | return -EINVAL; |
450 | return validate_actions(actions, key, depth + 1); | 450 | return validate_actions(actions, key, depth + 1); |
451 | } | 451 | } |
452 | 452 | ||
453 | static int validate_tp_port(const struct sw_flow_key *flow_key) | 453 | static int validate_tp_port(const struct sw_flow_key *flow_key) |
454 | { | 454 | { |
455 | if (flow_key->eth.type == htons(ETH_P_IP)) { | 455 | if (flow_key->eth.type == htons(ETH_P_IP)) { |
456 | if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) | 456 | if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) |
457 | return 0; | 457 | return 0; |
458 | } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { | 458 | } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { |
459 | if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) | 459 | if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) |
460 | return 0; | 460 | return 0; |
461 | } | 461 | } |
462 | 462 | ||
463 | return -EINVAL; | 463 | return -EINVAL; |
464 | } | 464 | } |
465 | 465 | ||
466 | static int validate_set(const struct nlattr *a, | 466 | static int validate_set(const struct nlattr *a, |
467 | const struct sw_flow_key *flow_key) | 467 | const struct sw_flow_key *flow_key) |
468 | { | 468 | { |
469 | const struct nlattr *ovs_key = nla_data(a); | 469 | const struct nlattr *ovs_key = nla_data(a); |
470 | int key_type = nla_type(ovs_key); | 470 | int key_type = nla_type(ovs_key); |
471 | 471 | ||
472 | /* There can be only one key in a action */ | 472 | /* There can be only one key in a action */ |
473 | if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) | 473 | if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) |
474 | return -EINVAL; | 474 | return -EINVAL; |
475 | 475 | ||
476 | if (key_type > OVS_KEY_ATTR_MAX || | 476 | if (key_type > OVS_KEY_ATTR_MAX || |
477 | nla_len(ovs_key) != ovs_key_lens[key_type]) | 477 | nla_len(ovs_key) != ovs_key_lens[key_type]) |
478 | return -EINVAL; | 478 | return -EINVAL; |
479 | 479 | ||
480 | switch (key_type) { | 480 | switch (key_type) { |
481 | const struct ovs_key_ipv4 *ipv4_key; | 481 | const struct ovs_key_ipv4 *ipv4_key; |
482 | const struct ovs_key_ipv6 *ipv6_key; | 482 | const struct ovs_key_ipv6 *ipv6_key; |
483 | 483 | ||
484 | case OVS_KEY_ATTR_PRIORITY: | 484 | case OVS_KEY_ATTR_PRIORITY: |
485 | case OVS_KEY_ATTR_SKB_MARK: | 485 | case OVS_KEY_ATTR_SKB_MARK: |
486 | case OVS_KEY_ATTR_ETHERNET: | 486 | case OVS_KEY_ATTR_ETHERNET: |
487 | break; | 487 | break; |
488 | 488 | ||
489 | case OVS_KEY_ATTR_IPV4: | 489 | case OVS_KEY_ATTR_IPV4: |
490 | if (flow_key->eth.type != htons(ETH_P_IP)) | 490 | if (flow_key->eth.type != htons(ETH_P_IP)) |
491 | return -EINVAL; | 491 | return -EINVAL; |
492 | 492 | ||
493 | if (!flow_key->ip.proto) | 493 | if (!flow_key->ip.proto) |
494 | return -EINVAL; | 494 | return -EINVAL; |
495 | 495 | ||
496 | ipv4_key = nla_data(ovs_key); | 496 | ipv4_key = nla_data(ovs_key); |
497 | if (ipv4_key->ipv4_proto != flow_key->ip.proto) | 497 | if (ipv4_key->ipv4_proto != flow_key->ip.proto) |
498 | return -EINVAL; | 498 | return -EINVAL; |
499 | 499 | ||
500 | if (ipv4_key->ipv4_frag != flow_key->ip.frag) | 500 | if (ipv4_key->ipv4_frag != flow_key->ip.frag) |
501 | return -EINVAL; | 501 | return -EINVAL; |
502 | 502 | ||
503 | break; | 503 | break; |
504 | 504 | ||
505 | case OVS_KEY_ATTR_IPV6: | 505 | case OVS_KEY_ATTR_IPV6: |
506 | if (flow_key->eth.type != htons(ETH_P_IPV6)) | 506 | if (flow_key->eth.type != htons(ETH_P_IPV6)) |
507 | return -EINVAL; | 507 | return -EINVAL; |
508 | 508 | ||
509 | if (!flow_key->ip.proto) | 509 | if (!flow_key->ip.proto) |
510 | return -EINVAL; | 510 | return -EINVAL; |
511 | 511 | ||
512 | ipv6_key = nla_data(ovs_key); | 512 | ipv6_key = nla_data(ovs_key); |
513 | if (ipv6_key->ipv6_proto != flow_key->ip.proto) | 513 | if (ipv6_key->ipv6_proto != flow_key->ip.proto) |
514 | return -EINVAL; | 514 | return -EINVAL; |
515 | 515 | ||
516 | if (ipv6_key->ipv6_frag != flow_key->ip.frag) | 516 | if (ipv6_key->ipv6_frag != flow_key->ip.frag) |
517 | return -EINVAL; | 517 | return -EINVAL; |
518 | 518 | ||
519 | if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) | 519 | if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) |
520 | return -EINVAL; | 520 | return -EINVAL; |
521 | 521 | ||
522 | break; | 522 | break; |
523 | 523 | ||
524 | case OVS_KEY_ATTR_TCP: | 524 | case OVS_KEY_ATTR_TCP: |
525 | if (flow_key->ip.proto != IPPROTO_TCP) | 525 | if (flow_key->ip.proto != IPPROTO_TCP) |
526 | return -EINVAL; | 526 | return -EINVAL; |
527 | 527 | ||
528 | return validate_tp_port(flow_key); | 528 | return validate_tp_port(flow_key); |
529 | 529 | ||
530 | case OVS_KEY_ATTR_UDP: | 530 | case OVS_KEY_ATTR_UDP: |
531 | if (flow_key->ip.proto != IPPROTO_UDP) | 531 | if (flow_key->ip.proto != IPPROTO_UDP) |
532 | return -EINVAL; | 532 | return -EINVAL; |
533 | 533 | ||
534 | return validate_tp_port(flow_key); | 534 | return validate_tp_port(flow_key); |
535 | 535 | ||
536 | default: | 536 | default: |
537 | return -EINVAL; | 537 | return -EINVAL; |
538 | } | 538 | } |
539 | 539 | ||
540 | return 0; | 540 | return 0; |
541 | } | 541 | } |
542 | 542 | ||
543 | static int validate_userspace(const struct nlattr *attr) | 543 | static int validate_userspace(const struct nlattr *attr) |
544 | { | 544 | { |
545 | static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { | 545 | static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { |
546 | [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, | 546 | [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, |
547 | [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 }, | 547 | [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 }, |
548 | }; | 548 | }; |
549 | struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; | 549 | struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; |
550 | int error; | 550 | int error; |
551 | 551 | ||
552 | error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, | 552 | error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, |
553 | attr, userspace_policy); | 553 | attr, userspace_policy); |
554 | if (error) | 554 | if (error) |
555 | return error; | 555 | return error; |
556 | 556 | ||
557 | if (!a[OVS_USERSPACE_ATTR_PID] || | 557 | if (!a[OVS_USERSPACE_ATTR_PID] || |
558 | !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) | 558 | !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) |
559 | return -EINVAL; | 559 | return -EINVAL; |
560 | 560 | ||
561 | return 0; | 561 | return 0; |
562 | } | 562 | } |
563 | 563 | ||
564 | static int validate_actions(const struct nlattr *attr, | 564 | static int validate_actions(const struct nlattr *attr, |
565 | const struct sw_flow_key *key, int depth) | 565 | const struct sw_flow_key *key, int depth) |
566 | { | 566 | { |
567 | const struct nlattr *a; | 567 | const struct nlattr *a; |
568 | int rem, err; | 568 | int rem, err; |
569 | 569 | ||
570 | if (depth >= SAMPLE_ACTION_DEPTH) | 570 | if (depth >= SAMPLE_ACTION_DEPTH) |
571 | return -EOVERFLOW; | 571 | return -EOVERFLOW; |
572 | 572 | ||
573 | nla_for_each_nested(a, attr, rem) { | 573 | nla_for_each_nested(a, attr, rem) { |
574 | /* Expected argument lengths, (u32)-1 for variable length. */ | 574 | /* Expected argument lengths, (u32)-1 for variable length. */ |
575 | static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { | 575 | static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { |
576 | [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), | 576 | [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), |
577 | [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, | 577 | [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, |
578 | [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), | 578 | [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), |
579 | [OVS_ACTION_ATTR_POP_VLAN] = 0, | 579 | [OVS_ACTION_ATTR_POP_VLAN] = 0, |
580 | [OVS_ACTION_ATTR_SET] = (u32)-1, | 580 | [OVS_ACTION_ATTR_SET] = (u32)-1, |
581 | [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 | 581 | [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 |
582 | }; | 582 | }; |
583 | const struct ovs_action_push_vlan *vlan; | 583 | const struct ovs_action_push_vlan *vlan; |
584 | int type = nla_type(a); | 584 | int type = nla_type(a); |
585 | 585 | ||
586 | if (type > OVS_ACTION_ATTR_MAX || | 586 | if (type > OVS_ACTION_ATTR_MAX || |
587 | (action_lens[type] != nla_len(a) && | 587 | (action_lens[type] != nla_len(a) && |
588 | action_lens[type] != (u32)-1)) | 588 | action_lens[type] != (u32)-1)) |
589 | return -EINVAL; | 589 | return -EINVAL; |
590 | 590 | ||
591 | switch (type) { | 591 | switch (type) { |
592 | case OVS_ACTION_ATTR_UNSPEC: | 592 | case OVS_ACTION_ATTR_UNSPEC: |
593 | return -EINVAL; | 593 | return -EINVAL; |
594 | 594 | ||
595 | case OVS_ACTION_ATTR_USERSPACE: | 595 | case OVS_ACTION_ATTR_USERSPACE: |
596 | err = validate_userspace(a); | 596 | err = validate_userspace(a); |
597 | if (err) | 597 | if (err) |
598 | return err; | 598 | return err; |
599 | break; | 599 | break; |
600 | 600 | ||
601 | case OVS_ACTION_ATTR_OUTPUT: | 601 | case OVS_ACTION_ATTR_OUTPUT: |
602 | if (nla_get_u32(a) >= DP_MAX_PORTS) | 602 | if (nla_get_u32(a) >= DP_MAX_PORTS) |
603 | return -EINVAL; | 603 | return -EINVAL; |
604 | break; | 604 | break; |
605 | 605 | ||
606 | 606 | ||
607 | case OVS_ACTION_ATTR_POP_VLAN: | 607 | case OVS_ACTION_ATTR_POP_VLAN: |
608 | break; | 608 | break; |
609 | 609 | ||
610 | case OVS_ACTION_ATTR_PUSH_VLAN: | 610 | case OVS_ACTION_ATTR_PUSH_VLAN: |
611 | vlan = nla_data(a); | 611 | vlan = nla_data(a); |
612 | if (vlan->vlan_tpid != htons(ETH_P_8021Q)) | 612 | if (vlan->vlan_tpid != htons(ETH_P_8021Q)) |
613 | return -EINVAL; | 613 | return -EINVAL; |
614 | if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) | 614 | if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) |
615 | return -EINVAL; | 615 | return -EINVAL; |
616 | break; | 616 | break; |
617 | 617 | ||
618 | case OVS_ACTION_ATTR_SET: | 618 | case OVS_ACTION_ATTR_SET: |
619 | err = validate_set(a, key); | 619 | err = validate_set(a, key); |
620 | if (err) | 620 | if (err) |
621 | return err; | 621 | return err; |
622 | break; | 622 | break; |
623 | 623 | ||
624 | case OVS_ACTION_ATTR_SAMPLE: | 624 | case OVS_ACTION_ATTR_SAMPLE: |
625 | err = validate_sample(a, key, depth); | 625 | err = validate_sample(a, key, depth); |
626 | if (err) | 626 | if (err) |
627 | return err; | 627 | return err; |
628 | break; | 628 | break; |
629 | 629 | ||
630 | default: | 630 | default: |
631 | return -EINVAL; | 631 | return -EINVAL; |
632 | } | 632 | } |
633 | } | 633 | } |
634 | 634 | ||
635 | if (rem > 0) | 635 | if (rem > 0) |
636 | return -EINVAL; | 636 | return -EINVAL; |
637 | 637 | ||
638 | return 0; | 638 | return 0; |
639 | } | 639 | } |
640 | 640 | ||
641 | static void clear_stats(struct sw_flow *flow) | 641 | static void clear_stats(struct sw_flow *flow) |
642 | { | 642 | { |
643 | flow->used = 0; | 643 | flow->used = 0; |
644 | flow->tcp_flags = 0; | 644 | flow->tcp_flags = 0; |
645 | flow->packet_count = 0; | 645 | flow->packet_count = 0; |
646 | flow->byte_count = 0; | 646 | flow->byte_count = 0; |
647 | } | 647 | } |
648 | 648 | ||
649 | static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) | 649 | static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) |
650 | { | 650 | { |
651 | struct ovs_header *ovs_header = info->userhdr; | 651 | struct ovs_header *ovs_header = info->userhdr; |
652 | struct nlattr **a = info->attrs; | 652 | struct nlattr **a = info->attrs; |
653 | struct sw_flow_actions *acts; | 653 | struct sw_flow_actions *acts; |
654 | struct sk_buff *packet; | 654 | struct sk_buff *packet; |
655 | struct sw_flow *flow; | 655 | struct sw_flow *flow; |
656 | struct datapath *dp; | 656 | struct datapath *dp; |
657 | struct ethhdr *eth; | 657 | struct ethhdr *eth; |
658 | int len; | 658 | int len; |
659 | int err; | 659 | int err; |
660 | int key_len; | 660 | int key_len; |
661 | 661 | ||
662 | err = -EINVAL; | 662 | err = -EINVAL; |
663 | if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || | 663 | if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || |
664 | !a[OVS_PACKET_ATTR_ACTIONS] || | 664 | !a[OVS_PACKET_ATTR_ACTIONS] || |
665 | nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN) | 665 | nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN) |
666 | goto err; | 666 | goto err; |
667 | 667 | ||
668 | len = nla_len(a[OVS_PACKET_ATTR_PACKET]); | 668 | len = nla_len(a[OVS_PACKET_ATTR_PACKET]); |
669 | packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL); | 669 | packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL); |
670 | err = -ENOMEM; | 670 | err = -ENOMEM; |
671 | if (!packet) | 671 | if (!packet) |
672 | goto err; | 672 | goto err; |
673 | skb_reserve(packet, NET_IP_ALIGN); | 673 | skb_reserve(packet, NET_IP_ALIGN); |
674 | 674 | ||
675 | memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len); | 675 | memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len); |
676 | 676 | ||
677 | skb_reset_mac_header(packet); | 677 | skb_reset_mac_header(packet); |
678 | eth = eth_hdr(packet); | 678 | eth = eth_hdr(packet); |
679 | 679 | ||
680 | /* Normally, setting the skb 'protocol' field would be handled by a | 680 | /* Normally, setting the skb 'protocol' field would be handled by a |
681 | * call to eth_type_trans(), but it assumes there's a sending | 681 | * call to eth_type_trans(), but it assumes there's a sending |
682 | * device, which we may not have. */ | 682 | * device, which we may not have. */ |
683 | if (ntohs(eth->h_proto) >= 1536) | 683 | if (ntohs(eth->h_proto) >= 1536) |
684 | packet->protocol = eth->h_proto; | 684 | packet->protocol = eth->h_proto; |
685 | else | 685 | else |
686 | packet->protocol = htons(ETH_P_802_2); | 686 | packet->protocol = htons(ETH_P_802_2); |
687 | 687 | ||
688 | /* Build an sw_flow for sending this packet. */ | 688 | /* Build an sw_flow for sending this packet. */ |
689 | flow = ovs_flow_alloc(); | 689 | flow = ovs_flow_alloc(); |
690 | err = PTR_ERR(flow); | 690 | err = PTR_ERR(flow); |
691 | if (IS_ERR(flow)) | 691 | if (IS_ERR(flow)) |
692 | goto err_kfree_skb; | 692 | goto err_kfree_skb; |
693 | 693 | ||
694 | err = ovs_flow_extract(packet, -1, &flow->key, &key_len); | 694 | err = ovs_flow_extract(packet, -1, &flow->key, &key_len); |
695 | if (err) | 695 | if (err) |
696 | goto err_flow_free; | 696 | goto err_flow_free; |
697 | 697 | ||
698 | err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, | 698 | err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, |
699 | &flow->key.phy.skb_mark, | 699 | &flow->key.phy.skb_mark, |
700 | &flow->key.phy.in_port, | 700 | &flow->key.phy.in_port, |
701 | a[OVS_PACKET_ATTR_KEY]); | 701 | a[OVS_PACKET_ATTR_KEY]); |
702 | if (err) | 702 | if (err) |
703 | goto err_flow_free; | 703 | goto err_flow_free; |
704 | 704 | ||
705 | err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0); | 705 | err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0); |
706 | if (err) | 706 | if (err) |
707 | goto err_flow_free; | 707 | goto err_flow_free; |
708 | 708 | ||
709 | flow->hash = ovs_flow_hash(&flow->key, key_len); | 709 | flow->hash = ovs_flow_hash(&flow->key, key_len); |
710 | 710 | ||
711 | acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]); | 711 | acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]); |
712 | err = PTR_ERR(acts); | 712 | err = PTR_ERR(acts); |
713 | if (IS_ERR(acts)) | 713 | if (IS_ERR(acts)) |
714 | goto err_flow_free; | 714 | goto err_flow_free; |
715 | rcu_assign_pointer(flow->sf_acts, acts); | 715 | rcu_assign_pointer(flow->sf_acts, acts); |
716 | 716 | ||
717 | OVS_CB(packet)->flow = flow; | 717 | OVS_CB(packet)->flow = flow; |
718 | packet->priority = flow->key.phy.priority; | 718 | packet->priority = flow->key.phy.priority; |
719 | packet->mark = flow->key.phy.skb_mark; | 719 | packet->mark = flow->key.phy.skb_mark; |
720 | 720 | ||
721 | rcu_read_lock(); | 721 | rcu_read_lock(); |
722 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); | 722 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); |
723 | err = -ENODEV; | 723 | err = -ENODEV; |
724 | if (!dp) | 724 | if (!dp) |
725 | goto err_unlock; | 725 | goto err_unlock; |
726 | 726 | ||
727 | local_bh_disable(); | 727 | local_bh_disable(); |
728 | err = ovs_execute_actions(dp, packet); | 728 | err = ovs_execute_actions(dp, packet); |
729 | local_bh_enable(); | 729 | local_bh_enable(); |
730 | rcu_read_unlock(); | 730 | rcu_read_unlock(); |
731 | 731 | ||
732 | ovs_flow_free(flow); | 732 | ovs_flow_free(flow); |
733 | return err; | 733 | return err; |
734 | 734 | ||
735 | err_unlock: | 735 | err_unlock: |
736 | rcu_read_unlock(); | 736 | rcu_read_unlock(); |
737 | err_flow_free: | 737 | err_flow_free: |
738 | ovs_flow_free(flow); | 738 | ovs_flow_free(flow); |
739 | err_kfree_skb: | 739 | err_kfree_skb: |
740 | kfree_skb(packet); | 740 | kfree_skb(packet); |
741 | err: | 741 | err: |
742 | return err; | 742 | return err; |
743 | } | 743 | } |
744 | 744 | ||
745 | static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { | 745 | static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { |
746 | [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC }, | 746 | [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC }, |
747 | [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, | 747 | [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, |
748 | [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, | 748 | [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, |
749 | }; | 749 | }; |
750 | 750 | ||
751 | static struct genl_ops dp_packet_genl_ops[] = { | 751 | static struct genl_ops dp_packet_genl_ops[] = { |
752 | { .cmd = OVS_PACKET_CMD_EXECUTE, | 752 | { .cmd = OVS_PACKET_CMD_EXECUTE, |
753 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 753 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
754 | .policy = packet_policy, | 754 | .policy = packet_policy, |
755 | .doit = ovs_packet_cmd_execute | 755 | .doit = ovs_packet_cmd_execute |
756 | } | 756 | } |
757 | }; | 757 | }; |
758 | 758 | ||
759 | static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) | 759 | static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) |
760 | { | 760 | { |
761 | int i; | 761 | int i; |
762 | struct flow_table *table = genl_dereference(dp->table); | 762 | struct flow_table *table = genl_dereference(dp->table); |
763 | 763 | ||
764 | stats->n_flows = ovs_flow_tbl_count(table); | 764 | stats->n_flows = ovs_flow_tbl_count(table); |
765 | 765 | ||
766 | stats->n_hit = stats->n_missed = stats->n_lost = 0; | 766 | stats->n_hit = stats->n_missed = stats->n_lost = 0; |
767 | for_each_possible_cpu(i) { | 767 | for_each_possible_cpu(i) { |
768 | const struct dp_stats_percpu *percpu_stats; | 768 | const struct dp_stats_percpu *percpu_stats; |
769 | struct dp_stats_percpu local_stats; | 769 | struct dp_stats_percpu local_stats; |
770 | unsigned int start; | 770 | unsigned int start; |
771 | 771 | ||
772 | percpu_stats = per_cpu_ptr(dp->stats_percpu, i); | 772 | percpu_stats = per_cpu_ptr(dp->stats_percpu, i); |
773 | 773 | ||
774 | do { | 774 | do { |
775 | start = u64_stats_fetch_begin_bh(&percpu_stats->sync); | 775 | start = u64_stats_fetch_begin_bh(&percpu_stats->sync); |
776 | local_stats = *percpu_stats; | 776 | local_stats = *percpu_stats; |
777 | } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start)); | 777 | } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start)); |
778 | 778 | ||
779 | stats->n_hit += local_stats.n_hit; | 779 | stats->n_hit += local_stats.n_hit; |
780 | stats->n_missed += local_stats.n_missed; | 780 | stats->n_missed += local_stats.n_missed; |
781 | stats->n_lost += local_stats.n_lost; | 781 | stats->n_lost += local_stats.n_lost; |
782 | } | 782 | } |
783 | } | 783 | } |
784 | 784 | ||
785 | static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { | 785 | static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { |
786 | [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, | 786 | [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, |
787 | [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, | 787 | [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, |
788 | [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, | 788 | [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, |
789 | }; | 789 | }; |
790 | 790 | ||
791 | static struct genl_family dp_flow_genl_family = { | 791 | static struct genl_family dp_flow_genl_family = { |
792 | .id = GENL_ID_GENERATE, | 792 | .id = GENL_ID_GENERATE, |
793 | .hdrsize = sizeof(struct ovs_header), | 793 | .hdrsize = sizeof(struct ovs_header), |
794 | .name = OVS_FLOW_FAMILY, | 794 | .name = OVS_FLOW_FAMILY, |
795 | .version = OVS_FLOW_VERSION, | 795 | .version = OVS_FLOW_VERSION, |
796 | .maxattr = OVS_FLOW_ATTR_MAX, | 796 | .maxattr = OVS_FLOW_ATTR_MAX, |
797 | .netnsok = true | 797 | .netnsok = true |
798 | }; | 798 | }; |
799 | 799 | ||
800 | static struct genl_multicast_group ovs_dp_flow_multicast_group = { | 800 | static struct genl_multicast_group ovs_dp_flow_multicast_group = { |
801 | .name = OVS_FLOW_MCGROUP | 801 | .name = OVS_FLOW_MCGROUP |
802 | }; | 802 | }; |
803 | 803 | ||
804 | /* Called with genl_lock. */ | 804 | /* Called with genl_lock. */ |
805 | static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, | 805 | static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, |
806 | struct sk_buff *skb, u32 portid, | 806 | struct sk_buff *skb, u32 portid, |
807 | u32 seq, u32 flags, u8 cmd) | 807 | u32 seq, u32 flags, u8 cmd) |
808 | { | 808 | { |
809 | const int skb_orig_len = skb->len; | 809 | const int skb_orig_len = skb->len; |
810 | const struct sw_flow_actions *sf_acts; | 810 | const struct sw_flow_actions *sf_acts; |
811 | struct ovs_flow_stats stats; | 811 | struct ovs_flow_stats stats; |
812 | struct ovs_header *ovs_header; | 812 | struct ovs_header *ovs_header; |
813 | struct nlattr *nla; | 813 | struct nlattr *nla; |
814 | unsigned long used; | 814 | unsigned long used; |
815 | u8 tcp_flags; | 815 | u8 tcp_flags; |
816 | int err; | 816 | int err; |
817 | 817 | ||
818 | sf_acts = rcu_dereference_protected(flow->sf_acts, | 818 | sf_acts = rcu_dereference_protected(flow->sf_acts, |
819 | lockdep_genl_is_held()); | 819 | lockdep_genl_is_held()); |
820 | 820 | ||
821 | ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); | 821 | ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); |
822 | if (!ovs_header) | 822 | if (!ovs_header) |
823 | return -EMSGSIZE; | 823 | return -EMSGSIZE; |
824 | 824 | ||
825 | ovs_header->dp_ifindex = get_dpifindex(dp); | 825 | ovs_header->dp_ifindex = get_dpifindex(dp); |
826 | 826 | ||
827 | nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); | 827 | nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); |
828 | if (!nla) | 828 | if (!nla) |
829 | goto nla_put_failure; | 829 | goto nla_put_failure; |
830 | err = ovs_flow_to_nlattrs(&flow->key, skb); | 830 | err = ovs_flow_to_nlattrs(&flow->key, skb); |
831 | if (err) | 831 | if (err) |
832 | goto error; | 832 | goto error; |
833 | nla_nest_end(skb, nla); | 833 | nla_nest_end(skb, nla); |
834 | 834 | ||
835 | spin_lock_bh(&flow->lock); | 835 | spin_lock_bh(&flow->lock); |
836 | used = flow->used; | 836 | used = flow->used; |
837 | stats.n_packets = flow->packet_count; | 837 | stats.n_packets = flow->packet_count; |
838 | stats.n_bytes = flow->byte_count; | 838 | stats.n_bytes = flow->byte_count; |
839 | tcp_flags = flow->tcp_flags; | 839 | tcp_flags = flow->tcp_flags; |
840 | spin_unlock_bh(&flow->lock); | 840 | spin_unlock_bh(&flow->lock); |
841 | 841 | ||
842 | if (used && | 842 | if (used && |
843 | nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) | 843 | nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) |
844 | goto nla_put_failure; | 844 | goto nla_put_failure; |
845 | 845 | ||
846 | if (stats.n_packets && | 846 | if (stats.n_packets && |
847 | nla_put(skb, OVS_FLOW_ATTR_STATS, | 847 | nla_put(skb, OVS_FLOW_ATTR_STATS, |
848 | sizeof(struct ovs_flow_stats), &stats)) | 848 | sizeof(struct ovs_flow_stats), &stats)) |
849 | goto nla_put_failure; | 849 | goto nla_put_failure; |
850 | 850 | ||
851 | if (tcp_flags && | 851 | if (tcp_flags && |
852 | nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags)) | 852 | nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags)) |
853 | goto nla_put_failure; | 853 | goto nla_put_failure; |
854 | 854 | ||
855 | /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if | 855 | /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if |
856 | * this is the first flow to be dumped into 'skb'. This is unusual for | 856 | * this is the first flow to be dumped into 'skb'. This is unusual for |
857 | * Netlink but individual action lists can be longer than | 857 | * Netlink but individual action lists can be longer than |
858 | * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this. | 858 | * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this. |
859 | * The userspace caller can always fetch the actions separately if it | 859 | * The userspace caller can always fetch the actions separately if it |
860 | * really wants them. (Most userspace callers in fact don't care.) | 860 | * really wants them. (Most userspace callers in fact don't care.) |
861 | * | 861 | * |
862 | * This can only fail for dump operations because the skb is always | 862 | * This can only fail for dump operations because the skb is always |
863 | * properly sized for single flows. | 863 | * properly sized for single flows. |
864 | */ | 864 | */ |
865 | err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len, | 865 | err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len, |
866 | sf_acts->actions); | 866 | sf_acts->actions); |
867 | if (err < 0 && skb_orig_len) | 867 | if (err < 0 && skb_orig_len) |
868 | goto error; | 868 | goto error; |
869 | 869 | ||
870 | return genlmsg_end(skb, ovs_header); | 870 | return genlmsg_end(skb, ovs_header); |
871 | 871 | ||
872 | nla_put_failure: | 872 | nla_put_failure: |
873 | err = -EMSGSIZE; | 873 | err = -EMSGSIZE; |
874 | error: | 874 | error: |
875 | genlmsg_cancel(skb, ovs_header); | 875 | genlmsg_cancel(skb, ovs_header); |
876 | return err; | 876 | return err; |
877 | } | 877 | } |
878 | 878 | ||
879 | static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) | 879 | static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) |
880 | { | 880 | { |
881 | const struct sw_flow_actions *sf_acts; | 881 | const struct sw_flow_actions *sf_acts; |
882 | int len; | 882 | int len; |
883 | 883 | ||
884 | sf_acts = rcu_dereference_protected(flow->sf_acts, | 884 | sf_acts = rcu_dereference_protected(flow->sf_acts, |
885 | lockdep_genl_is_held()); | 885 | lockdep_genl_is_held()); |
886 | 886 | ||
887 | /* OVS_FLOW_ATTR_KEY */ | 887 | /* OVS_FLOW_ATTR_KEY */ |
888 | len = nla_total_size(FLOW_BUFSIZE); | 888 | len = nla_total_size(FLOW_BUFSIZE); |
889 | /* OVS_FLOW_ATTR_ACTIONS */ | 889 | /* OVS_FLOW_ATTR_ACTIONS */ |
890 | len += nla_total_size(sf_acts->actions_len); | 890 | len += nla_total_size(sf_acts->actions_len); |
891 | /* OVS_FLOW_ATTR_STATS */ | 891 | /* OVS_FLOW_ATTR_STATS */ |
892 | len += nla_total_size(sizeof(struct ovs_flow_stats)); | 892 | len += nla_total_size(sizeof(struct ovs_flow_stats)); |
893 | /* OVS_FLOW_ATTR_TCP_FLAGS */ | 893 | /* OVS_FLOW_ATTR_TCP_FLAGS */ |
894 | len += nla_total_size(1); | 894 | len += nla_total_size(1); |
895 | /* OVS_FLOW_ATTR_USED */ | 895 | /* OVS_FLOW_ATTR_USED */ |
896 | len += nla_total_size(8); | 896 | len += nla_total_size(8); |
897 | 897 | ||
898 | len += NLMSG_ALIGN(sizeof(struct ovs_header)); | 898 | len += NLMSG_ALIGN(sizeof(struct ovs_header)); |
899 | 899 | ||
900 | return genlmsg_new(len, GFP_KERNEL); | 900 | return genlmsg_new(len, GFP_KERNEL); |
901 | } | 901 | } |
902 | 902 | ||
903 | static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, | 903 | static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, |
904 | struct datapath *dp, | 904 | struct datapath *dp, |
905 | u32 portid, u32 seq, u8 cmd) | 905 | u32 portid, u32 seq, u8 cmd) |
906 | { | 906 | { |
907 | struct sk_buff *skb; | 907 | struct sk_buff *skb; |
908 | int retval; | 908 | int retval; |
909 | 909 | ||
910 | skb = ovs_flow_cmd_alloc_info(flow); | 910 | skb = ovs_flow_cmd_alloc_info(flow); |
911 | if (!skb) | 911 | if (!skb) |
912 | return ERR_PTR(-ENOMEM); | 912 | return ERR_PTR(-ENOMEM); |
913 | 913 | ||
914 | retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); | 914 | retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); |
915 | BUG_ON(retval < 0); | 915 | BUG_ON(retval < 0); |
916 | return skb; | 916 | return skb; |
917 | } | 917 | } |
918 | 918 | ||
919 | static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) | 919 | static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) |
920 | { | 920 | { |
921 | struct nlattr **a = info->attrs; | 921 | struct nlattr **a = info->attrs; |
922 | struct ovs_header *ovs_header = info->userhdr; | 922 | struct ovs_header *ovs_header = info->userhdr; |
923 | struct sw_flow_key key; | 923 | struct sw_flow_key key; |
924 | struct sw_flow *flow; | 924 | struct sw_flow *flow; |
925 | struct sk_buff *reply; | 925 | struct sk_buff *reply; |
926 | struct datapath *dp; | 926 | struct datapath *dp; |
927 | struct flow_table *table; | 927 | struct flow_table *table; |
928 | int error; | 928 | int error; |
929 | int key_len; | 929 | int key_len; |
930 | 930 | ||
931 | /* Extract key. */ | 931 | /* Extract key. */ |
932 | error = -EINVAL; | 932 | error = -EINVAL; |
933 | if (!a[OVS_FLOW_ATTR_KEY]) | 933 | if (!a[OVS_FLOW_ATTR_KEY]) |
934 | goto error; | 934 | goto error; |
935 | error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); | 935 | error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); |
936 | if (error) | 936 | if (error) |
937 | goto error; | 937 | goto error; |
938 | 938 | ||
939 | /* Validate actions. */ | 939 | /* Validate actions. */ |
940 | if (a[OVS_FLOW_ATTR_ACTIONS]) { | 940 | if (a[OVS_FLOW_ATTR_ACTIONS]) { |
941 | error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0); | 941 | error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0); |
942 | if (error) | 942 | if (error) |
943 | goto error; | 943 | goto error; |
944 | } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { | 944 | } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { |
945 | error = -EINVAL; | 945 | error = -EINVAL; |
946 | goto error; | 946 | goto error; |
947 | } | 947 | } |
948 | 948 | ||
949 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); | 949 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); |
950 | error = -ENODEV; | 950 | error = -ENODEV; |
951 | if (!dp) | 951 | if (!dp) |
952 | goto error; | 952 | goto error; |
953 | 953 | ||
954 | table = genl_dereference(dp->table); | 954 | table = genl_dereference(dp->table); |
955 | flow = ovs_flow_tbl_lookup(table, &key, key_len); | 955 | flow = ovs_flow_tbl_lookup(table, &key, key_len); |
956 | if (!flow) { | 956 | if (!flow) { |
957 | struct sw_flow_actions *acts; | 957 | struct sw_flow_actions *acts; |
958 | 958 | ||
959 | /* Bail out if we're not allowed to create a new flow. */ | 959 | /* Bail out if we're not allowed to create a new flow. */ |
960 | error = -ENOENT; | 960 | error = -ENOENT; |
961 | if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) | 961 | if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) |
962 | goto error; | 962 | goto error; |
963 | 963 | ||
964 | /* Expand table, if necessary, to make room. */ | 964 | /* Expand table, if necessary, to make room. */ |
965 | if (ovs_flow_tbl_need_to_expand(table)) { | 965 | if (ovs_flow_tbl_need_to_expand(table)) { |
966 | struct flow_table *new_table; | 966 | struct flow_table *new_table; |
967 | 967 | ||
968 | new_table = ovs_flow_tbl_expand(table); | 968 | new_table = ovs_flow_tbl_expand(table); |
969 | if (!IS_ERR(new_table)) { | 969 | if (!IS_ERR(new_table)) { |
970 | rcu_assign_pointer(dp->table, new_table); | 970 | rcu_assign_pointer(dp->table, new_table); |
971 | ovs_flow_tbl_deferred_destroy(table); | 971 | ovs_flow_tbl_deferred_destroy(table); |
972 | table = genl_dereference(dp->table); | 972 | table = genl_dereference(dp->table); |
973 | } | 973 | } |
974 | } | 974 | } |
975 | 975 | ||
976 | /* Allocate flow. */ | 976 | /* Allocate flow. */ |
977 | flow = ovs_flow_alloc(); | 977 | flow = ovs_flow_alloc(); |
978 | if (IS_ERR(flow)) { | 978 | if (IS_ERR(flow)) { |
979 | error = PTR_ERR(flow); | 979 | error = PTR_ERR(flow); |
980 | goto error; | 980 | goto error; |
981 | } | 981 | } |
982 | flow->key = key; | 982 | flow->key = key; |
983 | clear_stats(flow); | 983 | clear_stats(flow); |
984 | 984 | ||
985 | /* Obtain actions. */ | 985 | /* Obtain actions. */ |
986 | acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]); | 986 | acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]); |
987 | error = PTR_ERR(acts); | 987 | error = PTR_ERR(acts); |
988 | if (IS_ERR(acts)) | 988 | if (IS_ERR(acts)) |
989 | goto error_free_flow; | 989 | goto error_free_flow; |
990 | rcu_assign_pointer(flow->sf_acts, acts); | 990 | rcu_assign_pointer(flow->sf_acts, acts); |
991 | 991 | ||
992 | /* Put flow in bucket. */ | 992 | /* Put flow in bucket. */ |
993 | flow->hash = ovs_flow_hash(&key, key_len); | 993 | flow->hash = ovs_flow_hash(&key, key_len); |
994 | ovs_flow_tbl_insert(table, flow); | 994 | ovs_flow_tbl_insert(table, flow); |
995 | 995 | ||
996 | reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, | 996 | reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, |
997 | info->snd_seq, | 997 | info->snd_seq, |
998 | OVS_FLOW_CMD_NEW); | 998 | OVS_FLOW_CMD_NEW); |
999 | } else { | 999 | } else { |
1000 | /* We found a matching flow. */ | 1000 | /* We found a matching flow. */ |
1001 | struct sw_flow_actions *old_acts; | 1001 | struct sw_flow_actions *old_acts; |
1002 | struct nlattr *acts_attrs; | 1002 | struct nlattr *acts_attrs; |
1003 | 1003 | ||
1004 | /* Bail out if we're not allowed to modify an existing flow. | 1004 | /* Bail out if we're not allowed to modify an existing flow. |
1005 | * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL | 1005 | * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL |
1006 | * because Generic Netlink treats the latter as a dump | 1006 | * because Generic Netlink treats the latter as a dump |
1007 | * request. We also accept NLM_F_EXCL in case that bug ever | 1007 | * request. We also accept NLM_F_EXCL in case that bug ever |
1008 | * gets fixed. | 1008 | * gets fixed. |
1009 | */ | 1009 | */ |
1010 | error = -EEXIST; | 1010 | error = -EEXIST; |
1011 | if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && | 1011 | if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && |
1012 | info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) | 1012 | info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) |
1013 | goto error; | 1013 | goto error; |
1014 | 1014 | ||
1015 | /* Update actions. */ | 1015 | /* Update actions. */ |
1016 | old_acts = rcu_dereference_protected(flow->sf_acts, | 1016 | old_acts = rcu_dereference_protected(flow->sf_acts, |
1017 | lockdep_genl_is_held()); | 1017 | lockdep_genl_is_held()); |
1018 | acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; | 1018 | acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; |
1019 | if (acts_attrs && | 1019 | if (acts_attrs && |
1020 | (old_acts->actions_len != nla_len(acts_attrs) || | 1020 | (old_acts->actions_len != nla_len(acts_attrs) || |
1021 | memcmp(old_acts->actions, nla_data(acts_attrs), | 1021 | memcmp(old_acts->actions, nla_data(acts_attrs), |
1022 | old_acts->actions_len))) { | 1022 | old_acts->actions_len))) { |
1023 | struct sw_flow_actions *new_acts; | 1023 | struct sw_flow_actions *new_acts; |
1024 | 1024 | ||
1025 | new_acts = ovs_flow_actions_alloc(acts_attrs); | 1025 | new_acts = ovs_flow_actions_alloc(acts_attrs); |
1026 | error = PTR_ERR(new_acts); | 1026 | error = PTR_ERR(new_acts); |
1027 | if (IS_ERR(new_acts)) | 1027 | if (IS_ERR(new_acts)) |
1028 | goto error; | 1028 | goto error; |
1029 | 1029 | ||
1030 | rcu_assign_pointer(flow->sf_acts, new_acts); | 1030 | rcu_assign_pointer(flow->sf_acts, new_acts); |
1031 | ovs_flow_deferred_free_acts(old_acts); | 1031 | ovs_flow_deferred_free_acts(old_acts); |
1032 | } | 1032 | } |
1033 | 1033 | ||
1034 | reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, | 1034 | reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, |
1035 | info->snd_seq, OVS_FLOW_CMD_NEW); | 1035 | info->snd_seq, OVS_FLOW_CMD_NEW); |
1036 | 1036 | ||
1037 | /* Clear stats. */ | 1037 | /* Clear stats. */ |
1038 | if (a[OVS_FLOW_ATTR_CLEAR]) { | 1038 | if (a[OVS_FLOW_ATTR_CLEAR]) { |
1039 | spin_lock_bh(&flow->lock); | 1039 | spin_lock_bh(&flow->lock); |
1040 | clear_stats(flow); | 1040 | clear_stats(flow); |
1041 | spin_unlock_bh(&flow->lock); | 1041 | spin_unlock_bh(&flow->lock); |
1042 | } | 1042 | } |
1043 | } | 1043 | } |
1044 | 1044 | ||
1045 | if (!IS_ERR(reply)) | 1045 | if (!IS_ERR(reply)) |
1046 | genl_notify(reply, genl_info_net(info), info->snd_portid, | 1046 | genl_notify(reply, genl_info_net(info), info->snd_portid, |
1047 | ovs_dp_flow_multicast_group.id, info->nlhdr, | 1047 | ovs_dp_flow_multicast_group.id, info->nlhdr, |
1048 | GFP_KERNEL); | 1048 | GFP_KERNEL); |
1049 | else | 1049 | else |
1050 | netlink_set_err(sock_net(skb->sk)->genl_sock, 0, | 1050 | netlink_set_err(sock_net(skb->sk)->genl_sock, 0, |
1051 | ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); | 1051 | ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); |
1052 | return 0; | 1052 | return 0; |
1053 | 1053 | ||
1054 | error_free_flow: | 1054 | error_free_flow: |
1055 | ovs_flow_free(flow); | 1055 | ovs_flow_free(flow); |
1056 | error: | 1056 | error: |
1057 | return error; | 1057 | return error; |
1058 | } | 1058 | } |
1059 | 1059 | ||
1060 | static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) | 1060 | static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) |
1061 | { | 1061 | { |
1062 | struct nlattr **a = info->attrs; | 1062 | struct nlattr **a = info->attrs; |
1063 | struct ovs_header *ovs_header = info->userhdr; | 1063 | struct ovs_header *ovs_header = info->userhdr; |
1064 | struct sw_flow_key key; | 1064 | struct sw_flow_key key; |
1065 | struct sk_buff *reply; | 1065 | struct sk_buff *reply; |
1066 | struct sw_flow *flow; | 1066 | struct sw_flow *flow; |
1067 | struct datapath *dp; | 1067 | struct datapath *dp; |
1068 | struct flow_table *table; | 1068 | struct flow_table *table; |
1069 | int err; | 1069 | int err; |
1070 | int key_len; | 1070 | int key_len; |
1071 | 1071 | ||
1072 | if (!a[OVS_FLOW_ATTR_KEY]) | 1072 | if (!a[OVS_FLOW_ATTR_KEY]) |
1073 | return -EINVAL; | 1073 | return -EINVAL; |
1074 | err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); | 1074 | err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); |
1075 | if (err) | 1075 | if (err) |
1076 | return err; | 1076 | return err; |
1077 | 1077 | ||
1078 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); | 1078 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); |
1079 | if (!dp) | 1079 | if (!dp) |
1080 | return -ENODEV; | 1080 | return -ENODEV; |
1081 | 1081 | ||
1082 | table = genl_dereference(dp->table); | 1082 | table = genl_dereference(dp->table); |
1083 | flow = ovs_flow_tbl_lookup(table, &key, key_len); | 1083 | flow = ovs_flow_tbl_lookup(table, &key, key_len); |
1084 | if (!flow) | 1084 | if (!flow) |
1085 | return -ENOENT; | 1085 | return -ENOENT; |
1086 | 1086 | ||
1087 | reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, | 1087 | reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, |
1088 | info->snd_seq, OVS_FLOW_CMD_NEW); | 1088 | info->snd_seq, OVS_FLOW_CMD_NEW); |
1089 | if (IS_ERR(reply)) | 1089 | if (IS_ERR(reply)) |
1090 | return PTR_ERR(reply); | 1090 | return PTR_ERR(reply); |
1091 | 1091 | ||
1092 | return genlmsg_reply(reply, info); | 1092 | return genlmsg_reply(reply, info); |
1093 | } | 1093 | } |
1094 | 1094 | ||
1095 | static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) | 1095 | static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) |
1096 | { | 1096 | { |
1097 | struct nlattr **a = info->attrs; | 1097 | struct nlattr **a = info->attrs; |
1098 | struct ovs_header *ovs_header = info->userhdr; | 1098 | struct ovs_header *ovs_header = info->userhdr; |
1099 | struct sw_flow_key key; | 1099 | struct sw_flow_key key; |
1100 | struct sk_buff *reply; | 1100 | struct sk_buff *reply; |
1101 | struct sw_flow *flow; | 1101 | struct sw_flow *flow; |
1102 | struct datapath *dp; | 1102 | struct datapath *dp; |
1103 | struct flow_table *table; | 1103 | struct flow_table *table; |
1104 | int err; | 1104 | int err; |
1105 | int key_len; | 1105 | int key_len; |
1106 | 1106 | ||
1107 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); | 1107 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); |
1108 | if (!dp) | 1108 | if (!dp) |
1109 | return -ENODEV; | 1109 | return -ENODEV; |
1110 | 1110 | ||
1111 | if (!a[OVS_FLOW_ATTR_KEY]) | 1111 | if (!a[OVS_FLOW_ATTR_KEY]) |
1112 | return flush_flows(dp); | 1112 | return flush_flows(dp); |
1113 | 1113 | ||
1114 | err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); | 1114 | err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); |
1115 | if (err) | 1115 | if (err) |
1116 | return err; | 1116 | return err; |
1117 | 1117 | ||
1118 | table = genl_dereference(dp->table); | 1118 | table = genl_dereference(dp->table); |
1119 | flow = ovs_flow_tbl_lookup(table, &key, key_len); | 1119 | flow = ovs_flow_tbl_lookup(table, &key, key_len); |
1120 | if (!flow) | 1120 | if (!flow) |
1121 | return -ENOENT; | 1121 | return -ENOENT; |
1122 | 1122 | ||
1123 | reply = ovs_flow_cmd_alloc_info(flow); | 1123 | reply = ovs_flow_cmd_alloc_info(flow); |
1124 | if (!reply) | 1124 | if (!reply) |
1125 | return -ENOMEM; | 1125 | return -ENOMEM; |
1126 | 1126 | ||
1127 | ovs_flow_tbl_remove(table, flow); | 1127 | ovs_flow_tbl_remove(table, flow); |
1128 | 1128 | ||
1129 | err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, | 1129 | err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, |
1130 | info->snd_seq, 0, OVS_FLOW_CMD_DEL); | 1130 | info->snd_seq, 0, OVS_FLOW_CMD_DEL); |
1131 | BUG_ON(err < 0); | 1131 | BUG_ON(err < 0); |
1132 | 1132 | ||
1133 | ovs_flow_deferred_free(flow); | 1133 | ovs_flow_deferred_free(flow); |
1134 | 1134 | ||
1135 | genl_notify(reply, genl_info_net(info), info->snd_portid, | 1135 | genl_notify(reply, genl_info_net(info), info->snd_portid, |
1136 | ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); | 1136 | ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); |
1137 | return 0; | 1137 | return 0; |
1138 | } | 1138 | } |
1139 | 1139 | ||
1140 | static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) | 1140 | static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) |
1141 | { | 1141 | { |
1142 | struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); | 1142 | struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); |
1143 | struct datapath *dp; | 1143 | struct datapath *dp; |
1144 | struct flow_table *table; | 1144 | struct flow_table *table; |
1145 | 1145 | ||
1146 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); | 1146 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); |
1147 | if (!dp) | 1147 | if (!dp) |
1148 | return -ENODEV; | 1148 | return -ENODEV; |
1149 | 1149 | ||
1150 | table = genl_dereference(dp->table); | 1150 | table = genl_dereference(dp->table); |
1151 | 1151 | ||
1152 | for (;;) { | 1152 | for (;;) { |
1153 | struct sw_flow *flow; | 1153 | struct sw_flow *flow; |
1154 | u32 bucket, obj; | 1154 | u32 bucket, obj; |
1155 | 1155 | ||
1156 | bucket = cb->args[0]; | 1156 | bucket = cb->args[0]; |
1157 | obj = cb->args[1]; | 1157 | obj = cb->args[1]; |
1158 | flow = ovs_flow_tbl_next(table, &bucket, &obj); | 1158 | flow = ovs_flow_tbl_next(table, &bucket, &obj); |
1159 | if (!flow) | 1159 | if (!flow) |
1160 | break; | 1160 | break; |
1161 | 1161 | ||
1162 | if (ovs_flow_cmd_fill_info(flow, dp, skb, | 1162 | if (ovs_flow_cmd_fill_info(flow, dp, skb, |
1163 | NETLINK_CB(cb->skb).portid, | 1163 | NETLINK_CB(cb->skb).portid, |
1164 | cb->nlh->nlmsg_seq, NLM_F_MULTI, | 1164 | cb->nlh->nlmsg_seq, NLM_F_MULTI, |
1165 | OVS_FLOW_CMD_NEW) < 0) | 1165 | OVS_FLOW_CMD_NEW) < 0) |
1166 | break; | 1166 | break; |
1167 | 1167 | ||
1168 | cb->args[0] = bucket; | 1168 | cb->args[0] = bucket; |
1169 | cb->args[1] = obj; | 1169 | cb->args[1] = obj; |
1170 | } | 1170 | } |
1171 | return skb->len; | 1171 | return skb->len; |
1172 | } | 1172 | } |
1173 | 1173 | ||
1174 | static struct genl_ops dp_flow_genl_ops[] = { | 1174 | static struct genl_ops dp_flow_genl_ops[] = { |
1175 | { .cmd = OVS_FLOW_CMD_NEW, | 1175 | { .cmd = OVS_FLOW_CMD_NEW, |
1176 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1176 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1177 | .policy = flow_policy, | 1177 | .policy = flow_policy, |
1178 | .doit = ovs_flow_cmd_new_or_set | 1178 | .doit = ovs_flow_cmd_new_or_set |
1179 | }, | 1179 | }, |
1180 | { .cmd = OVS_FLOW_CMD_DEL, | 1180 | { .cmd = OVS_FLOW_CMD_DEL, |
1181 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1181 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1182 | .policy = flow_policy, | 1182 | .policy = flow_policy, |
1183 | .doit = ovs_flow_cmd_del | 1183 | .doit = ovs_flow_cmd_del |
1184 | }, | 1184 | }, |
1185 | { .cmd = OVS_FLOW_CMD_GET, | 1185 | { .cmd = OVS_FLOW_CMD_GET, |
1186 | .flags = 0, /* OK for unprivileged users. */ | 1186 | .flags = 0, /* OK for unprivileged users. */ |
1187 | .policy = flow_policy, | 1187 | .policy = flow_policy, |
1188 | .doit = ovs_flow_cmd_get, | 1188 | .doit = ovs_flow_cmd_get, |
1189 | .dumpit = ovs_flow_cmd_dump | 1189 | .dumpit = ovs_flow_cmd_dump |
1190 | }, | 1190 | }, |
1191 | { .cmd = OVS_FLOW_CMD_SET, | 1191 | { .cmd = OVS_FLOW_CMD_SET, |
1192 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1192 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1193 | .policy = flow_policy, | 1193 | .policy = flow_policy, |
1194 | .doit = ovs_flow_cmd_new_or_set, | 1194 | .doit = ovs_flow_cmd_new_or_set, |
1195 | }, | 1195 | }, |
1196 | }; | 1196 | }; |
1197 | 1197 | ||
1198 | static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { | 1198 | static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { |
1199 | [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, | 1199 | [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, |
1200 | [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, | 1200 | [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, |
1201 | }; | 1201 | }; |
1202 | 1202 | ||
1203 | static struct genl_family dp_datapath_genl_family = { | 1203 | static struct genl_family dp_datapath_genl_family = { |
1204 | .id = GENL_ID_GENERATE, | 1204 | .id = GENL_ID_GENERATE, |
1205 | .hdrsize = sizeof(struct ovs_header), | 1205 | .hdrsize = sizeof(struct ovs_header), |
1206 | .name = OVS_DATAPATH_FAMILY, | 1206 | .name = OVS_DATAPATH_FAMILY, |
1207 | .version = OVS_DATAPATH_VERSION, | 1207 | .version = OVS_DATAPATH_VERSION, |
1208 | .maxattr = OVS_DP_ATTR_MAX, | 1208 | .maxattr = OVS_DP_ATTR_MAX, |
1209 | .netnsok = true | 1209 | .netnsok = true |
1210 | }; | 1210 | }; |
1211 | 1211 | ||
1212 | static struct genl_multicast_group ovs_dp_datapath_multicast_group = { | 1212 | static struct genl_multicast_group ovs_dp_datapath_multicast_group = { |
1213 | .name = OVS_DATAPATH_MCGROUP | 1213 | .name = OVS_DATAPATH_MCGROUP |
1214 | }; | 1214 | }; |
1215 | 1215 | ||
1216 | static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, | 1216 | static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, |
1217 | u32 portid, u32 seq, u32 flags, u8 cmd) | 1217 | u32 portid, u32 seq, u32 flags, u8 cmd) |
1218 | { | 1218 | { |
1219 | struct ovs_header *ovs_header; | 1219 | struct ovs_header *ovs_header; |
1220 | struct ovs_dp_stats dp_stats; | 1220 | struct ovs_dp_stats dp_stats; |
1221 | int err; | 1221 | int err; |
1222 | 1222 | ||
1223 | ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, | 1223 | ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, |
1224 | flags, cmd); | 1224 | flags, cmd); |
1225 | if (!ovs_header) | 1225 | if (!ovs_header) |
1226 | goto error; | 1226 | goto error; |
1227 | 1227 | ||
1228 | ovs_header->dp_ifindex = get_dpifindex(dp); | 1228 | ovs_header->dp_ifindex = get_dpifindex(dp); |
1229 | 1229 | ||
1230 | rcu_read_lock(); | 1230 | rcu_read_lock(); |
1231 | err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); | 1231 | err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); |
1232 | rcu_read_unlock(); | 1232 | rcu_read_unlock(); |
1233 | if (err) | 1233 | if (err) |
1234 | goto nla_put_failure; | 1234 | goto nla_put_failure; |
1235 | 1235 | ||
1236 | get_dp_stats(dp, &dp_stats); | 1236 | get_dp_stats(dp, &dp_stats); |
1237 | if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats)) | 1237 | if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats)) |
1238 | goto nla_put_failure; | 1238 | goto nla_put_failure; |
1239 | 1239 | ||
1240 | return genlmsg_end(skb, ovs_header); | 1240 | return genlmsg_end(skb, ovs_header); |
1241 | 1241 | ||
1242 | nla_put_failure: | 1242 | nla_put_failure: |
1243 | genlmsg_cancel(skb, ovs_header); | 1243 | genlmsg_cancel(skb, ovs_header); |
1244 | error: | 1244 | error: |
1245 | return -EMSGSIZE; | 1245 | return -EMSGSIZE; |
1246 | } | 1246 | } |
1247 | 1247 | ||
1248 | static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, | 1248 | static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, |
1249 | u32 seq, u8 cmd) | 1249 | u32 seq, u8 cmd) |
1250 | { | 1250 | { |
1251 | struct sk_buff *skb; | 1251 | struct sk_buff *skb; |
1252 | int retval; | 1252 | int retval; |
1253 | 1253 | ||
1254 | skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); | 1254 | skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); |
1255 | if (!skb) | 1255 | if (!skb) |
1256 | return ERR_PTR(-ENOMEM); | 1256 | return ERR_PTR(-ENOMEM); |
1257 | 1257 | ||
1258 | retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); | 1258 | retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); |
1259 | if (retval < 0) { | 1259 | if (retval < 0) { |
1260 | kfree_skb(skb); | 1260 | kfree_skb(skb); |
1261 | return ERR_PTR(retval); | 1261 | return ERR_PTR(retval); |
1262 | } | 1262 | } |
1263 | return skb; | 1263 | return skb; |
1264 | } | 1264 | } |
1265 | 1265 | ||
1266 | /* Called with genl_mutex and optionally with RTNL lock also. */ | 1266 | /* Called with genl_mutex and optionally with RTNL lock also. */ |
1267 | static struct datapath *lookup_datapath(struct net *net, | 1267 | static struct datapath *lookup_datapath(struct net *net, |
1268 | struct ovs_header *ovs_header, | 1268 | struct ovs_header *ovs_header, |
1269 | struct nlattr *a[OVS_DP_ATTR_MAX + 1]) | 1269 | struct nlattr *a[OVS_DP_ATTR_MAX + 1]) |
1270 | { | 1270 | { |
1271 | struct datapath *dp; | 1271 | struct datapath *dp; |
1272 | 1272 | ||
1273 | if (!a[OVS_DP_ATTR_NAME]) | 1273 | if (!a[OVS_DP_ATTR_NAME]) |
1274 | dp = get_dp(net, ovs_header->dp_ifindex); | 1274 | dp = get_dp(net, ovs_header->dp_ifindex); |
1275 | else { | 1275 | else { |
1276 | struct vport *vport; | 1276 | struct vport *vport; |
1277 | 1277 | ||
1278 | rcu_read_lock(); | 1278 | rcu_read_lock(); |
1279 | vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); | 1279 | vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); |
1280 | dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; | 1280 | dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; |
1281 | rcu_read_unlock(); | 1281 | rcu_read_unlock(); |
1282 | } | 1282 | } |
1283 | return dp ? dp : ERR_PTR(-ENODEV); | 1283 | return dp ? dp : ERR_PTR(-ENODEV); |
1284 | } | 1284 | } |
1285 | 1285 | ||
1286 | static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) | 1286 | static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) |
1287 | { | 1287 | { |
1288 | struct nlattr **a = info->attrs; | 1288 | struct nlattr **a = info->attrs; |
1289 | struct vport_parms parms; | 1289 | struct vport_parms parms; |
1290 | struct sk_buff *reply; | 1290 | struct sk_buff *reply; |
1291 | struct datapath *dp; | 1291 | struct datapath *dp; |
1292 | struct vport *vport; | 1292 | struct vport *vport; |
1293 | struct ovs_net *ovs_net; | 1293 | struct ovs_net *ovs_net; |
1294 | int err, i; | 1294 | int err, i; |
1295 | 1295 | ||
1296 | err = -EINVAL; | 1296 | err = -EINVAL; |
1297 | if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) | 1297 | if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) |
1298 | goto err; | 1298 | goto err; |
1299 | 1299 | ||
1300 | rtnl_lock(); | 1300 | rtnl_lock(); |
1301 | 1301 | ||
1302 | err = -ENOMEM; | 1302 | err = -ENOMEM; |
1303 | dp = kzalloc(sizeof(*dp), GFP_KERNEL); | 1303 | dp = kzalloc(sizeof(*dp), GFP_KERNEL); |
1304 | if (dp == NULL) | 1304 | if (dp == NULL) |
1305 | goto err_unlock_rtnl; | 1305 | goto err_unlock_rtnl; |
1306 | 1306 | ||
1307 | ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); | 1307 | ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); |
1308 | 1308 | ||
1309 | /* Allocate table. */ | 1309 | /* Allocate table. */ |
1310 | err = -ENOMEM; | 1310 | err = -ENOMEM; |
1311 | rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS)); | 1311 | rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS)); |
1312 | if (!dp->table) | 1312 | if (!dp->table) |
1313 | goto err_free_dp; | 1313 | goto err_free_dp; |
1314 | 1314 | ||
1315 | dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); | 1315 | dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); |
1316 | if (!dp->stats_percpu) { | 1316 | if (!dp->stats_percpu) { |
1317 | err = -ENOMEM; | 1317 | err = -ENOMEM; |
1318 | goto err_destroy_table; | 1318 | goto err_destroy_table; |
1319 | } | 1319 | } |
1320 | 1320 | ||
1321 | dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), | 1321 | dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), |
1322 | GFP_KERNEL); | 1322 | GFP_KERNEL); |
1323 | if (!dp->ports) { | 1323 | if (!dp->ports) { |
1324 | err = -ENOMEM; | 1324 | err = -ENOMEM; |
1325 | goto err_destroy_percpu; | 1325 | goto err_destroy_percpu; |
1326 | } | 1326 | } |
1327 | 1327 | ||
1328 | for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) | 1328 | for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) |
1329 | INIT_HLIST_HEAD(&dp->ports[i]); | 1329 | INIT_HLIST_HEAD(&dp->ports[i]); |
1330 | 1330 | ||
1331 | /* Set up our datapath device. */ | 1331 | /* Set up our datapath device. */ |
1332 | parms.name = nla_data(a[OVS_DP_ATTR_NAME]); | 1332 | parms.name = nla_data(a[OVS_DP_ATTR_NAME]); |
1333 | parms.type = OVS_VPORT_TYPE_INTERNAL; | 1333 | parms.type = OVS_VPORT_TYPE_INTERNAL; |
1334 | parms.options = NULL; | 1334 | parms.options = NULL; |
1335 | parms.dp = dp; | 1335 | parms.dp = dp; |
1336 | parms.port_no = OVSP_LOCAL; | 1336 | parms.port_no = OVSP_LOCAL; |
1337 | parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); | 1337 | parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); |
1338 | 1338 | ||
1339 | vport = new_vport(&parms); | 1339 | vport = new_vport(&parms); |
1340 | if (IS_ERR(vport)) { | 1340 | if (IS_ERR(vport)) { |
1341 | err = PTR_ERR(vport); | 1341 | err = PTR_ERR(vport); |
1342 | if (err == -EBUSY) | 1342 | if (err == -EBUSY) |
1343 | err = -EEXIST; | 1343 | err = -EEXIST; |
1344 | 1344 | ||
1345 | goto err_destroy_ports_array; | 1345 | goto err_destroy_ports_array; |
1346 | } | 1346 | } |
1347 | 1347 | ||
1348 | reply = ovs_dp_cmd_build_info(dp, info->snd_portid, | 1348 | reply = ovs_dp_cmd_build_info(dp, info->snd_portid, |
1349 | info->snd_seq, OVS_DP_CMD_NEW); | 1349 | info->snd_seq, OVS_DP_CMD_NEW); |
1350 | err = PTR_ERR(reply); | 1350 | err = PTR_ERR(reply); |
1351 | if (IS_ERR(reply)) | 1351 | if (IS_ERR(reply)) |
1352 | goto err_destroy_local_port; | 1352 | goto err_destroy_local_port; |
1353 | 1353 | ||
1354 | ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); | 1354 | ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); |
1355 | list_add_tail(&dp->list_node, &ovs_net->dps); | 1355 | list_add_tail(&dp->list_node, &ovs_net->dps); |
1356 | rtnl_unlock(); | 1356 | rtnl_unlock(); |
1357 | 1357 | ||
1358 | genl_notify(reply, genl_info_net(info), info->snd_portid, | 1358 | genl_notify(reply, genl_info_net(info), info->snd_portid, |
1359 | ovs_dp_datapath_multicast_group.id, info->nlhdr, | 1359 | ovs_dp_datapath_multicast_group.id, info->nlhdr, |
1360 | GFP_KERNEL); | 1360 | GFP_KERNEL); |
1361 | return 0; | 1361 | return 0; |
1362 | 1362 | ||
1363 | err_destroy_local_port: | 1363 | err_destroy_local_port: |
1364 | ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); | 1364 | ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); |
1365 | err_destroy_ports_array: | 1365 | err_destroy_ports_array: |
1366 | kfree(dp->ports); | 1366 | kfree(dp->ports); |
1367 | err_destroy_percpu: | 1367 | err_destroy_percpu: |
1368 | free_percpu(dp->stats_percpu); | 1368 | free_percpu(dp->stats_percpu); |
1369 | err_destroy_table: | 1369 | err_destroy_table: |
1370 | ovs_flow_tbl_destroy(genl_dereference(dp->table)); | 1370 | ovs_flow_tbl_destroy(genl_dereference(dp->table)); |
1371 | err_free_dp: | 1371 | err_free_dp: |
1372 | release_net(ovs_dp_get_net(dp)); | 1372 | release_net(ovs_dp_get_net(dp)); |
1373 | kfree(dp); | 1373 | kfree(dp); |
1374 | err_unlock_rtnl: | 1374 | err_unlock_rtnl: |
1375 | rtnl_unlock(); | 1375 | rtnl_unlock(); |
1376 | err: | 1376 | err: |
1377 | return err; | 1377 | return err; |
1378 | } | 1378 | } |
1379 | 1379 | ||
1380 | /* Called with genl_mutex. */ | 1380 | /* Called with genl_mutex. */ |
1381 | static void __dp_destroy(struct datapath *dp) | 1381 | static void __dp_destroy(struct datapath *dp) |
1382 | { | 1382 | { |
1383 | int i; | 1383 | int i; |
1384 | 1384 | ||
1385 | rtnl_lock(); | 1385 | rtnl_lock(); |
1386 | 1386 | ||
1387 | for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { | 1387 | for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { |
1388 | struct vport *vport; | 1388 | struct vport *vport; |
1389 | struct hlist_node *node, *n; | 1389 | struct hlist_node *node, *n; |
1390 | 1390 | ||
1391 | hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node) | 1391 | hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node) |
1392 | if (vport->port_no != OVSP_LOCAL) | 1392 | if (vport->port_no != OVSP_LOCAL) |
1393 | ovs_dp_detach_port(vport); | 1393 | ovs_dp_detach_port(vport); |
1394 | } | 1394 | } |
1395 | 1395 | ||
1396 | list_del(&dp->list_node); | 1396 | list_del(&dp->list_node); |
1397 | ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); | 1397 | ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); |
1398 | 1398 | ||
1399 | /* rtnl_unlock() will wait until all the references to devices that | 1399 | /* rtnl_unlock() will wait until all the references to devices that |
1400 | * are pending unregistration have been dropped. We do it here to | 1400 | * are pending unregistration have been dropped. We do it here to |
1401 | * ensure that any internal devices (which contain DP pointers) are | 1401 | * ensure that any internal devices (which contain DP pointers) are |
1402 | * fully destroyed before freeing the datapath. | 1402 | * fully destroyed before freeing the datapath. |
1403 | */ | 1403 | */ |
1404 | rtnl_unlock(); | 1404 | rtnl_unlock(); |
1405 | 1405 | ||
1406 | call_rcu(&dp->rcu, destroy_dp_rcu); | 1406 | call_rcu(&dp->rcu, destroy_dp_rcu); |
1407 | } | 1407 | } |
1408 | 1408 | ||
1409 | static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) | 1409 | static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) |
1410 | { | 1410 | { |
1411 | struct sk_buff *reply; | 1411 | struct sk_buff *reply; |
1412 | struct datapath *dp; | 1412 | struct datapath *dp; |
1413 | int err; | 1413 | int err; |
1414 | 1414 | ||
1415 | dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); | 1415 | dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); |
1416 | err = PTR_ERR(dp); | 1416 | err = PTR_ERR(dp); |
1417 | if (IS_ERR(dp)) | 1417 | if (IS_ERR(dp)) |
1418 | return err; | 1418 | return err; |
1419 | 1419 | ||
1420 | reply = ovs_dp_cmd_build_info(dp, info->snd_portid, | 1420 | reply = ovs_dp_cmd_build_info(dp, info->snd_portid, |
1421 | info->snd_seq, OVS_DP_CMD_DEL); | 1421 | info->snd_seq, OVS_DP_CMD_DEL); |
1422 | err = PTR_ERR(reply); | 1422 | err = PTR_ERR(reply); |
1423 | if (IS_ERR(reply)) | 1423 | if (IS_ERR(reply)) |
1424 | return err; | 1424 | return err; |
1425 | 1425 | ||
1426 | __dp_destroy(dp); | 1426 | __dp_destroy(dp); |
1427 | 1427 | ||
1428 | genl_notify(reply, genl_info_net(info), info->snd_portid, | 1428 | genl_notify(reply, genl_info_net(info), info->snd_portid, |
1429 | ovs_dp_datapath_multicast_group.id, info->nlhdr, | 1429 | ovs_dp_datapath_multicast_group.id, info->nlhdr, |
1430 | GFP_KERNEL); | 1430 | GFP_KERNEL); |
1431 | 1431 | ||
1432 | return 0; | 1432 | return 0; |
1433 | } | 1433 | } |
1434 | 1434 | ||
1435 | static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) | 1435 | static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) |
1436 | { | 1436 | { |
1437 | struct sk_buff *reply; | 1437 | struct sk_buff *reply; |
1438 | struct datapath *dp; | 1438 | struct datapath *dp; |
1439 | int err; | 1439 | int err; |
1440 | 1440 | ||
1441 | dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); | 1441 | dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); |
1442 | if (IS_ERR(dp)) | 1442 | if (IS_ERR(dp)) |
1443 | return PTR_ERR(dp); | 1443 | return PTR_ERR(dp); |
1444 | 1444 | ||
1445 | reply = ovs_dp_cmd_build_info(dp, info->snd_portid, | 1445 | reply = ovs_dp_cmd_build_info(dp, info->snd_portid, |
1446 | info->snd_seq, OVS_DP_CMD_NEW); | 1446 | info->snd_seq, OVS_DP_CMD_NEW); |
1447 | if (IS_ERR(reply)) { | 1447 | if (IS_ERR(reply)) { |
1448 | err = PTR_ERR(reply); | 1448 | err = PTR_ERR(reply); |
1449 | netlink_set_err(sock_net(skb->sk)->genl_sock, 0, | 1449 | netlink_set_err(sock_net(skb->sk)->genl_sock, 0, |
1450 | ovs_dp_datapath_multicast_group.id, err); | 1450 | ovs_dp_datapath_multicast_group.id, err); |
1451 | return 0; | 1451 | return 0; |
1452 | } | 1452 | } |
1453 | 1453 | ||
1454 | genl_notify(reply, genl_info_net(info), info->snd_portid, | 1454 | genl_notify(reply, genl_info_net(info), info->snd_portid, |
1455 | ovs_dp_datapath_multicast_group.id, info->nlhdr, | 1455 | ovs_dp_datapath_multicast_group.id, info->nlhdr, |
1456 | GFP_KERNEL); | 1456 | GFP_KERNEL); |
1457 | 1457 | ||
1458 | return 0; | 1458 | return 0; |
1459 | } | 1459 | } |
1460 | 1460 | ||
1461 | static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) | 1461 | static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) |
1462 | { | 1462 | { |
1463 | struct sk_buff *reply; | 1463 | struct sk_buff *reply; |
1464 | struct datapath *dp; | 1464 | struct datapath *dp; |
1465 | 1465 | ||
1466 | dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); | 1466 | dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); |
1467 | if (IS_ERR(dp)) | 1467 | if (IS_ERR(dp)) |
1468 | return PTR_ERR(dp); | 1468 | return PTR_ERR(dp); |
1469 | 1469 | ||
1470 | reply = ovs_dp_cmd_build_info(dp, info->snd_portid, | 1470 | reply = ovs_dp_cmd_build_info(dp, info->snd_portid, |
1471 | info->snd_seq, OVS_DP_CMD_NEW); | 1471 | info->snd_seq, OVS_DP_CMD_NEW); |
1472 | if (IS_ERR(reply)) | 1472 | if (IS_ERR(reply)) |
1473 | return PTR_ERR(reply); | 1473 | return PTR_ERR(reply); |
1474 | 1474 | ||
1475 | return genlmsg_reply(reply, info); | 1475 | return genlmsg_reply(reply, info); |
1476 | } | 1476 | } |
1477 | 1477 | ||
1478 | static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) | 1478 | static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) |
1479 | { | 1479 | { |
1480 | struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); | 1480 | struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); |
1481 | struct datapath *dp; | 1481 | struct datapath *dp; |
1482 | int skip = cb->args[0]; | 1482 | int skip = cb->args[0]; |
1483 | int i = 0; | 1483 | int i = 0; |
1484 | 1484 | ||
1485 | list_for_each_entry(dp, &ovs_net->dps, list_node) { | 1485 | list_for_each_entry(dp, &ovs_net->dps, list_node) { |
1486 | if (i >= skip && | 1486 | if (i >= skip && |
1487 | ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, | 1487 | ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, |
1488 | cb->nlh->nlmsg_seq, NLM_F_MULTI, | 1488 | cb->nlh->nlmsg_seq, NLM_F_MULTI, |
1489 | OVS_DP_CMD_NEW) < 0) | 1489 | OVS_DP_CMD_NEW) < 0) |
1490 | break; | 1490 | break; |
1491 | i++; | 1491 | i++; |
1492 | } | 1492 | } |
1493 | 1493 | ||
1494 | cb->args[0] = i; | 1494 | cb->args[0] = i; |
1495 | 1495 | ||
1496 | return skb->len; | 1496 | return skb->len; |
1497 | } | 1497 | } |
1498 | 1498 | ||
1499 | static struct genl_ops dp_datapath_genl_ops[] = { | 1499 | static struct genl_ops dp_datapath_genl_ops[] = { |
1500 | { .cmd = OVS_DP_CMD_NEW, | 1500 | { .cmd = OVS_DP_CMD_NEW, |
1501 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1501 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1502 | .policy = datapath_policy, | 1502 | .policy = datapath_policy, |
1503 | .doit = ovs_dp_cmd_new | 1503 | .doit = ovs_dp_cmd_new |
1504 | }, | 1504 | }, |
1505 | { .cmd = OVS_DP_CMD_DEL, | 1505 | { .cmd = OVS_DP_CMD_DEL, |
1506 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1506 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1507 | .policy = datapath_policy, | 1507 | .policy = datapath_policy, |
1508 | .doit = ovs_dp_cmd_del | 1508 | .doit = ovs_dp_cmd_del |
1509 | }, | 1509 | }, |
1510 | { .cmd = OVS_DP_CMD_GET, | 1510 | { .cmd = OVS_DP_CMD_GET, |
1511 | .flags = 0, /* OK for unprivileged users. */ | 1511 | .flags = 0, /* OK for unprivileged users. */ |
1512 | .policy = datapath_policy, | 1512 | .policy = datapath_policy, |
1513 | .doit = ovs_dp_cmd_get, | 1513 | .doit = ovs_dp_cmd_get, |
1514 | .dumpit = ovs_dp_cmd_dump | 1514 | .dumpit = ovs_dp_cmd_dump |
1515 | }, | 1515 | }, |
1516 | { .cmd = OVS_DP_CMD_SET, | 1516 | { .cmd = OVS_DP_CMD_SET, |
1517 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1517 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1518 | .policy = datapath_policy, | 1518 | .policy = datapath_policy, |
1519 | .doit = ovs_dp_cmd_set, | 1519 | .doit = ovs_dp_cmd_set, |
1520 | }, | 1520 | }, |
1521 | }; | 1521 | }; |
1522 | 1522 | ||
1523 | static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { | 1523 | static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { |
1524 | [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, | 1524 | [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, |
1525 | [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, | 1525 | [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, |
1526 | [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, | 1526 | [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, |
1527 | [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, | 1527 | [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, |
1528 | [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, | 1528 | [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, |
1529 | [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, | 1529 | [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, |
1530 | }; | 1530 | }; |
1531 | 1531 | ||
1532 | static struct genl_family dp_vport_genl_family = { | 1532 | static struct genl_family dp_vport_genl_family = { |
1533 | .id = GENL_ID_GENERATE, | 1533 | .id = GENL_ID_GENERATE, |
1534 | .hdrsize = sizeof(struct ovs_header), | 1534 | .hdrsize = sizeof(struct ovs_header), |
1535 | .name = OVS_VPORT_FAMILY, | 1535 | .name = OVS_VPORT_FAMILY, |
1536 | .version = OVS_VPORT_VERSION, | 1536 | .version = OVS_VPORT_VERSION, |
1537 | .maxattr = OVS_VPORT_ATTR_MAX, | 1537 | .maxattr = OVS_VPORT_ATTR_MAX, |
1538 | .netnsok = true | 1538 | .netnsok = true |
1539 | }; | 1539 | }; |
1540 | 1540 | ||
1541 | struct genl_multicast_group ovs_dp_vport_multicast_group = { | 1541 | struct genl_multicast_group ovs_dp_vport_multicast_group = { |
1542 | .name = OVS_VPORT_MCGROUP | 1542 | .name = OVS_VPORT_MCGROUP |
1543 | }; | 1543 | }; |
1544 | 1544 | ||
1545 | /* Called with RTNL lock or RCU read lock. */ | 1545 | /* Called with RTNL lock or RCU read lock. */ |
1546 | static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, | 1546 | static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, |
1547 | u32 portid, u32 seq, u32 flags, u8 cmd) | 1547 | u32 portid, u32 seq, u32 flags, u8 cmd) |
1548 | { | 1548 | { |
1549 | struct ovs_header *ovs_header; | 1549 | struct ovs_header *ovs_header; |
1550 | struct ovs_vport_stats vport_stats; | 1550 | struct ovs_vport_stats vport_stats; |
1551 | int err; | 1551 | int err; |
1552 | 1552 | ||
1553 | ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, | 1553 | ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, |
1554 | flags, cmd); | 1554 | flags, cmd); |
1555 | if (!ovs_header) | 1555 | if (!ovs_header) |
1556 | return -EMSGSIZE; | 1556 | return -EMSGSIZE; |
1557 | 1557 | ||
1558 | ovs_header->dp_ifindex = get_dpifindex(vport->dp); | 1558 | ovs_header->dp_ifindex = get_dpifindex(vport->dp); |
1559 | 1559 | ||
1560 | if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || | 1560 | if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || |
1561 | nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || | 1561 | nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || |
1562 | nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) || | 1562 | nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) || |
1563 | nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid)) | 1563 | nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid)) |
1564 | goto nla_put_failure; | 1564 | goto nla_put_failure; |
1565 | 1565 | ||
1566 | ovs_vport_get_stats(vport, &vport_stats); | 1566 | ovs_vport_get_stats(vport, &vport_stats); |
1567 | if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), | 1567 | if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), |
1568 | &vport_stats)) | 1568 | &vport_stats)) |
1569 | goto nla_put_failure; | 1569 | goto nla_put_failure; |
1570 | 1570 | ||
1571 | err = ovs_vport_get_options(vport, skb); | 1571 | err = ovs_vport_get_options(vport, skb); |
1572 | if (err == -EMSGSIZE) | 1572 | if (err == -EMSGSIZE) |
1573 | goto error; | 1573 | goto error; |
1574 | 1574 | ||
1575 | return genlmsg_end(skb, ovs_header); | 1575 | return genlmsg_end(skb, ovs_header); |
1576 | 1576 | ||
1577 | nla_put_failure: | 1577 | nla_put_failure: |
1578 | err = -EMSGSIZE; | 1578 | err = -EMSGSIZE; |
1579 | error: | 1579 | error: |
1580 | genlmsg_cancel(skb, ovs_header); | 1580 | genlmsg_cancel(skb, ovs_header); |
1581 | return err; | 1581 | return err; |
1582 | } | 1582 | } |
1583 | 1583 | ||
1584 | /* Called with RTNL lock or RCU read lock. */ | 1584 | /* Called with RTNL lock or RCU read lock. */ |
1585 | struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, | 1585 | struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, |
1586 | u32 seq, u8 cmd) | 1586 | u32 seq, u8 cmd) |
1587 | { | 1587 | { |
1588 | struct sk_buff *skb; | 1588 | struct sk_buff *skb; |
1589 | int retval; | 1589 | int retval; |
1590 | 1590 | ||
1591 | skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); | 1591 | skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); |
1592 | if (!skb) | 1592 | if (!skb) |
1593 | return ERR_PTR(-ENOMEM); | 1593 | return ERR_PTR(-ENOMEM); |
1594 | 1594 | ||
1595 | retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); | 1595 | retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); |
1596 | if (retval < 0) { | 1596 | if (retval < 0) { |
1597 | kfree_skb(skb); | 1597 | kfree_skb(skb); |
1598 | return ERR_PTR(retval); | 1598 | return ERR_PTR(retval); |
1599 | } | 1599 | } |
1600 | return skb; | 1600 | return skb; |
1601 | } | 1601 | } |
1602 | 1602 | ||
1603 | /* Called with RTNL lock or RCU read lock. */ | 1603 | /* Called with RTNL lock or RCU read lock. */ |
1604 | static struct vport *lookup_vport(struct net *net, | 1604 | static struct vport *lookup_vport(struct net *net, |
1605 | struct ovs_header *ovs_header, | 1605 | struct ovs_header *ovs_header, |
1606 | struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) | 1606 | struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) |
1607 | { | 1607 | { |
1608 | struct datapath *dp; | 1608 | struct datapath *dp; |
1609 | struct vport *vport; | 1609 | struct vport *vport; |
1610 | 1610 | ||
1611 | if (a[OVS_VPORT_ATTR_NAME]) { | 1611 | if (a[OVS_VPORT_ATTR_NAME]) { |
1612 | vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); | 1612 | vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); |
1613 | if (!vport) | 1613 | if (!vport) |
1614 | return ERR_PTR(-ENODEV); | 1614 | return ERR_PTR(-ENODEV); |
1615 | if (ovs_header->dp_ifindex && | 1615 | if (ovs_header->dp_ifindex && |
1616 | ovs_header->dp_ifindex != get_dpifindex(vport->dp)) | 1616 | ovs_header->dp_ifindex != get_dpifindex(vport->dp)) |
1617 | return ERR_PTR(-ENODEV); | 1617 | return ERR_PTR(-ENODEV); |
1618 | return vport; | 1618 | return vport; |
1619 | } else if (a[OVS_VPORT_ATTR_PORT_NO]) { | 1619 | } else if (a[OVS_VPORT_ATTR_PORT_NO]) { |
1620 | u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); | 1620 | u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); |
1621 | 1621 | ||
1622 | if (port_no >= DP_MAX_PORTS) | 1622 | if (port_no >= DP_MAX_PORTS) |
1623 | return ERR_PTR(-EFBIG); | 1623 | return ERR_PTR(-EFBIG); |
1624 | 1624 | ||
1625 | dp = get_dp(net, ovs_header->dp_ifindex); | 1625 | dp = get_dp(net, ovs_header->dp_ifindex); |
1626 | if (!dp) | 1626 | if (!dp) |
1627 | return ERR_PTR(-ENODEV); | 1627 | return ERR_PTR(-ENODEV); |
1628 | 1628 | ||
1629 | vport = ovs_vport_rtnl_rcu(dp, port_no); | 1629 | vport = ovs_vport_rtnl_rcu(dp, port_no); |
1630 | if (!vport) | 1630 | if (!vport) |
1631 | return ERR_PTR(-ENOENT); | 1631 | return ERR_PTR(-ENOENT); |
1632 | return vport; | 1632 | return vport; |
1633 | } else | 1633 | } else |
1634 | return ERR_PTR(-EINVAL); | 1634 | return ERR_PTR(-EINVAL); |
1635 | } | 1635 | } |
1636 | 1636 | ||
1637 | static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) | 1637 | static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) |
1638 | { | 1638 | { |
1639 | struct nlattr **a = info->attrs; | 1639 | struct nlattr **a = info->attrs; |
1640 | struct ovs_header *ovs_header = info->userhdr; | 1640 | struct ovs_header *ovs_header = info->userhdr; |
1641 | struct vport_parms parms; | 1641 | struct vport_parms parms; |
1642 | struct sk_buff *reply; | 1642 | struct sk_buff *reply; |
1643 | struct vport *vport; | 1643 | struct vport *vport; |
1644 | struct datapath *dp; | 1644 | struct datapath *dp; |
1645 | u32 port_no; | 1645 | u32 port_no; |
1646 | int err; | 1646 | int err; |
1647 | 1647 | ||
1648 | err = -EINVAL; | 1648 | err = -EINVAL; |
1649 | if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || | 1649 | if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || |
1650 | !a[OVS_VPORT_ATTR_UPCALL_PID]) | 1650 | !a[OVS_VPORT_ATTR_UPCALL_PID]) |
1651 | goto exit; | 1651 | goto exit; |
1652 | 1652 | ||
1653 | rtnl_lock(); | 1653 | rtnl_lock(); |
1654 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); | 1654 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); |
1655 | err = -ENODEV; | 1655 | err = -ENODEV; |
1656 | if (!dp) | 1656 | if (!dp) |
1657 | goto exit_unlock; | 1657 | goto exit_unlock; |
1658 | 1658 | ||
1659 | if (a[OVS_VPORT_ATTR_PORT_NO]) { | 1659 | if (a[OVS_VPORT_ATTR_PORT_NO]) { |
1660 | port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); | 1660 | port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); |
1661 | 1661 | ||
1662 | err = -EFBIG; | 1662 | err = -EFBIG; |
1663 | if (port_no >= DP_MAX_PORTS) | 1663 | if (port_no >= DP_MAX_PORTS) |
1664 | goto exit_unlock; | 1664 | goto exit_unlock; |
1665 | 1665 | ||
1666 | vport = ovs_vport_rtnl_rcu(dp, port_no); | 1666 | vport = ovs_vport_rtnl_rcu(dp, port_no); |
1667 | err = -EBUSY; | 1667 | err = -EBUSY; |
1668 | if (vport) | 1668 | if (vport) |
1669 | goto exit_unlock; | 1669 | goto exit_unlock; |
1670 | } else { | 1670 | } else { |
1671 | for (port_no = 1; ; port_no++) { | 1671 | for (port_no = 1; ; port_no++) { |
1672 | if (port_no >= DP_MAX_PORTS) { | 1672 | if (port_no >= DP_MAX_PORTS) { |
1673 | err = -EFBIG; | 1673 | err = -EFBIG; |
1674 | goto exit_unlock; | 1674 | goto exit_unlock; |
1675 | } | 1675 | } |
1676 | vport = ovs_vport_rtnl(dp, port_no); | 1676 | vport = ovs_vport_rtnl(dp, port_no); |
1677 | if (!vport) | 1677 | if (!vport) |
1678 | break; | 1678 | break; |
1679 | } | 1679 | } |
1680 | } | 1680 | } |
1681 | 1681 | ||
1682 | parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]); | 1682 | parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]); |
1683 | parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]); | 1683 | parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]); |
1684 | parms.options = a[OVS_VPORT_ATTR_OPTIONS]; | 1684 | parms.options = a[OVS_VPORT_ATTR_OPTIONS]; |
1685 | parms.dp = dp; | 1685 | parms.dp = dp; |
1686 | parms.port_no = port_no; | 1686 | parms.port_no = port_no; |
1687 | parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); | 1687 | parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); |
1688 | 1688 | ||
1689 | vport = new_vport(&parms); | 1689 | vport = new_vport(&parms); |
1690 | err = PTR_ERR(vport); | 1690 | err = PTR_ERR(vport); |
1691 | if (IS_ERR(vport)) | 1691 | if (IS_ERR(vport)) |
1692 | goto exit_unlock; | 1692 | goto exit_unlock; |
1693 | 1693 | ||
1694 | reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, | 1694 | reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, |
1695 | OVS_VPORT_CMD_NEW); | 1695 | OVS_VPORT_CMD_NEW); |
1696 | if (IS_ERR(reply)) { | 1696 | if (IS_ERR(reply)) { |
1697 | err = PTR_ERR(reply); | 1697 | err = PTR_ERR(reply); |
1698 | ovs_dp_detach_port(vport); | 1698 | ovs_dp_detach_port(vport); |
1699 | goto exit_unlock; | 1699 | goto exit_unlock; |
1700 | } | 1700 | } |
1701 | genl_notify(reply, genl_info_net(info), info->snd_portid, | 1701 | genl_notify(reply, genl_info_net(info), info->snd_portid, |
1702 | ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); | 1702 | ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); |
1703 | 1703 | ||
1704 | exit_unlock: | 1704 | exit_unlock: |
1705 | rtnl_unlock(); | 1705 | rtnl_unlock(); |
1706 | exit: | 1706 | exit: |
1707 | return err; | 1707 | return err; |
1708 | } | 1708 | } |
1709 | 1709 | ||
1710 | static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) | 1710 | static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) |
1711 | { | 1711 | { |
1712 | struct nlattr **a = info->attrs; | 1712 | struct nlattr **a = info->attrs; |
1713 | struct sk_buff *reply; | 1713 | struct sk_buff *reply; |
1714 | struct vport *vport; | 1714 | struct vport *vport; |
1715 | int err; | 1715 | int err; |
1716 | 1716 | ||
1717 | rtnl_lock(); | 1717 | rtnl_lock(); |
1718 | vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); | 1718 | vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); |
1719 | err = PTR_ERR(vport); | 1719 | err = PTR_ERR(vport); |
1720 | if (IS_ERR(vport)) | 1720 | if (IS_ERR(vport)) |
1721 | goto exit_unlock; | 1721 | goto exit_unlock; |
1722 | 1722 | ||
1723 | err = 0; | 1723 | err = 0; |
1724 | if (a[OVS_VPORT_ATTR_TYPE] && | 1724 | if (a[OVS_VPORT_ATTR_TYPE] && |
1725 | nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) | 1725 | nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) |
1726 | err = -EINVAL; | 1726 | err = -EINVAL; |
1727 | 1727 | ||
1728 | if (!err && a[OVS_VPORT_ATTR_OPTIONS]) | 1728 | if (!err && a[OVS_VPORT_ATTR_OPTIONS]) |
1729 | err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); | 1729 | err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); |
1730 | if (err) | 1730 | if (err) |
1731 | goto exit_unlock; | 1731 | goto exit_unlock; |
1732 | if (a[OVS_VPORT_ATTR_UPCALL_PID]) | 1732 | if (a[OVS_VPORT_ATTR_UPCALL_PID]) |
1733 | vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); | 1733 | vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); |
1734 | 1734 | ||
1735 | reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, | 1735 | reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, |
1736 | OVS_VPORT_CMD_NEW); | 1736 | OVS_VPORT_CMD_NEW); |
1737 | if (IS_ERR(reply)) { | 1737 | if (IS_ERR(reply)) { |
1738 | netlink_set_err(sock_net(skb->sk)->genl_sock, 0, | 1738 | netlink_set_err(sock_net(skb->sk)->genl_sock, 0, |
1739 | ovs_dp_vport_multicast_group.id, PTR_ERR(reply)); | 1739 | ovs_dp_vport_multicast_group.id, PTR_ERR(reply)); |
1740 | goto exit_unlock; | 1740 | goto exit_unlock; |
1741 | } | 1741 | } |
1742 | 1742 | ||
1743 | genl_notify(reply, genl_info_net(info), info->snd_portid, | 1743 | genl_notify(reply, genl_info_net(info), info->snd_portid, |
1744 | ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); | 1744 | ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); |
1745 | 1745 | ||
1746 | exit_unlock: | 1746 | exit_unlock: |
1747 | rtnl_unlock(); | 1747 | rtnl_unlock(); |
1748 | return err; | 1748 | return err; |
1749 | } | 1749 | } |
1750 | 1750 | ||
1751 | static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) | 1751 | static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) |
1752 | { | 1752 | { |
1753 | struct nlattr **a = info->attrs; | 1753 | struct nlattr **a = info->attrs; |
1754 | struct sk_buff *reply; | 1754 | struct sk_buff *reply; |
1755 | struct vport *vport; | 1755 | struct vport *vport; |
1756 | int err; | 1756 | int err; |
1757 | 1757 | ||
1758 | rtnl_lock(); | 1758 | rtnl_lock(); |
1759 | vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); | 1759 | vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); |
1760 | err = PTR_ERR(vport); | 1760 | err = PTR_ERR(vport); |
1761 | if (IS_ERR(vport)) | 1761 | if (IS_ERR(vport)) |
1762 | goto exit_unlock; | 1762 | goto exit_unlock; |
1763 | 1763 | ||
1764 | if (vport->port_no == OVSP_LOCAL) { | 1764 | if (vport->port_no == OVSP_LOCAL) { |
1765 | err = -EINVAL; | 1765 | err = -EINVAL; |
1766 | goto exit_unlock; | 1766 | goto exit_unlock; |
1767 | } | 1767 | } |
1768 | 1768 | ||
1769 | reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, | 1769 | reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, |
1770 | OVS_VPORT_CMD_DEL); | 1770 | OVS_VPORT_CMD_DEL); |
1771 | err = PTR_ERR(reply); | 1771 | err = PTR_ERR(reply); |
1772 | if (IS_ERR(reply)) | 1772 | if (IS_ERR(reply)) |
1773 | goto exit_unlock; | 1773 | goto exit_unlock; |
1774 | 1774 | ||
1775 | ovs_dp_detach_port(vport); | 1775 | ovs_dp_detach_port(vport); |
1776 | 1776 | ||
1777 | genl_notify(reply, genl_info_net(info), info->snd_portid, | 1777 | genl_notify(reply, genl_info_net(info), info->snd_portid, |
1778 | ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); | 1778 | ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); |
1779 | 1779 | ||
1780 | exit_unlock: | 1780 | exit_unlock: |
1781 | rtnl_unlock(); | 1781 | rtnl_unlock(); |
1782 | return err; | 1782 | return err; |
1783 | } | 1783 | } |
1784 | 1784 | ||
1785 | static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) | 1785 | static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) |
1786 | { | 1786 | { |
1787 | struct nlattr **a = info->attrs; | 1787 | struct nlattr **a = info->attrs; |
1788 | struct ovs_header *ovs_header = info->userhdr; | 1788 | struct ovs_header *ovs_header = info->userhdr; |
1789 | struct sk_buff *reply; | 1789 | struct sk_buff *reply; |
1790 | struct vport *vport; | 1790 | struct vport *vport; |
1791 | int err; | 1791 | int err; |
1792 | 1792 | ||
1793 | rcu_read_lock(); | 1793 | rcu_read_lock(); |
1794 | vport = lookup_vport(sock_net(skb->sk), ovs_header, a); | 1794 | vport = lookup_vport(sock_net(skb->sk), ovs_header, a); |
1795 | err = PTR_ERR(vport); | 1795 | err = PTR_ERR(vport); |
1796 | if (IS_ERR(vport)) | 1796 | if (IS_ERR(vport)) |
1797 | goto exit_unlock; | 1797 | goto exit_unlock; |
1798 | 1798 | ||
1799 | reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, | 1799 | reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, |
1800 | OVS_VPORT_CMD_NEW); | 1800 | OVS_VPORT_CMD_NEW); |
1801 | err = PTR_ERR(reply); | 1801 | err = PTR_ERR(reply); |
1802 | if (IS_ERR(reply)) | 1802 | if (IS_ERR(reply)) |
1803 | goto exit_unlock; | 1803 | goto exit_unlock; |
1804 | 1804 | ||
1805 | rcu_read_unlock(); | 1805 | rcu_read_unlock(); |
1806 | 1806 | ||
1807 | return genlmsg_reply(reply, info); | 1807 | return genlmsg_reply(reply, info); |
1808 | 1808 | ||
1809 | exit_unlock: | 1809 | exit_unlock: |
1810 | rcu_read_unlock(); | 1810 | rcu_read_unlock(); |
1811 | return err; | 1811 | return err; |
1812 | } | 1812 | } |
1813 | 1813 | ||
1814 | static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) | 1814 | static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) |
1815 | { | 1815 | { |
1816 | struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); | 1816 | struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); |
1817 | struct datapath *dp; | 1817 | struct datapath *dp; |
1818 | int bucket = cb->args[0], skip = cb->args[1]; | 1818 | int bucket = cb->args[0], skip = cb->args[1]; |
1819 | int i, j = 0; | 1819 | int i, j = 0; |
1820 | 1820 | ||
1821 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); | 1821 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); |
1822 | if (!dp) | 1822 | if (!dp) |
1823 | return -ENODEV; | 1823 | return -ENODEV; |
1824 | 1824 | ||
1825 | rcu_read_lock(); | 1825 | rcu_read_lock(); |
1826 | for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { | 1826 | for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { |
1827 | struct vport *vport; | 1827 | struct vport *vport; |
1828 | struct hlist_node *n; | 1828 | struct hlist_node *n; |
1829 | 1829 | ||
1830 | j = 0; | 1830 | j = 0; |
1831 | hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) { | 1831 | hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) { |
1832 | if (j >= skip && | 1832 | if (j >= skip && |
1833 | ovs_vport_cmd_fill_info(vport, skb, | 1833 | ovs_vport_cmd_fill_info(vport, skb, |
1834 | NETLINK_CB(cb->skb).portid, | 1834 | NETLINK_CB(cb->skb).portid, |
1835 | cb->nlh->nlmsg_seq, | 1835 | cb->nlh->nlmsg_seq, |
1836 | NLM_F_MULTI, | 1836 | NLM_F_MULTI, |
1837 | OVS_VPORT_CMD_NEW) < 0) | 1837 | OVS_VPORT_CMD_NEW) < 0) |
1838 | goto out; | 1838 | goto out; |
1839 | 1839 | ||
1840 | j++; | 1840 | j++; |
1841 | } | 1841 | } |
1842 | skip = 0; | 1842 | skip = 0; |
1843 | } | 1843 | } |
1844 | out: | 1844 | out: |
1845 | rcu_read_unlock(); | 1845 | rcu_read_unlock(); |
1846 | 1846 | ||
1847 | cb->args[0] = i; | 1847 | cb->args[0] = i; |
1848 | cb->args[1] = j; | 1848 | cb->args[1] = j; |
1849 | 1849 | ||
1850 | return skb->len; | 1850 | return skb->len; |
1851 | } | 1851 | } |
1852 | 1852 | ||
1853 | static struct genl_ops dp_vport_genl_ops[] = { | 1853 | static struct genl_ops dp_vport_genl_ops[] = { |
1854 | { .cmd = OVS_VPORT_CMD_NEW, | 1854 | { .cmd = OVS_VPORT_CMD_NEW, |
1855 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1855 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1856 | .policy = vport_policy, | 1856 | .policy = vport_policy, |
1857 | .doit = ovs_vport_cmd_new | 1857 | .doit = ovs_vport_cmd_new |
1858 | }, | 1858 | }, |
1859 | { .cmd = OVS_VPORT_CMD_DEL, | 1859 | { .cmd = OVS_VPORT_CMD_DEL, |
1860 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1860 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1861 | .policy = vport_policy, | 1861 | .policy = vport_policy, |
1862 | .doit = ovs_vport_cmd_del | 1862 | .doit = ovs_vport_cmd_del |
1863 | }, | 1863 | }, |
1864 | { .cmd = OVS_VPORT_CMD_GET, | 1864 | { .cmd = OVS_VPORT_CMD_GET, |
1865 | .flags = 0, /* OK for unprivileged users. */ | 1865 | .flags = 0, /* OK for unprivileged users. */ |
1866 | .policy = vport_policy, | 1866 | .policy = vport_policy, |
1867 | .doit = ovs_vport_cmd_get, | 1867 | .doit = ovs_vport_cmd_get, |
1868 | .dumpit = ovs_vport_cmd_dump | 1868 | .dumpit = ovs_vport_cmd_dump |
1869 | }, | 1869 | }, |
1870 | { .cmd = OVS_VPORT_CMD_SET, | 1870 | { .cmd = OVS_VPORT_CMD_SET, |
1871 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1871 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1872 | .policy = vport_policy, | 1872 | .policy = vport_policy, |
1873 | .doit = ovs_vport_cmd_set, | 1873 | .doit = ovs_vport_cmd_set, |
1874 | }, | 1874 | }, |
1875 | }; | 1875 | }; |
1876 | 1876 | ||
1877 | struct genl_family_and_ops { | 1877 | struct genl_family_and_ops { |
1878 | struct genl_family *family; | 1878 | struct genl_family *family; |
1879 | struct genl_ops *ops; | 1879 | struct genl_ops *ops; |
1880 | int n_ops; | 1880 | int n_ops; |
1881 | struct genl_multicast_group *group; | 1881 | struct genl_multicast_group *group; |
1882 | }; | 1882 | }; |
1883 | 1883 | ||
1884 | static const struct genl_family_and_ops dp_genl_families[] = { | 1884 | static const struct genl_family_and_ops dp_genl_families[] = { |
1885 | { &dp_datapath_genl_family, | 1885 | { &dp_datapath_genl_family, |
1886 | dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops), | 1886 | dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops), |
1887 | &ovs_dp_datapath_multicast_group }, | 1887 | &ovs_dp_datapath_multicast_group }, |
1888 | { &dp_vport_genl_family, | 1888 | { &dp_vport_genl_family, |
1889 | dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops), | 1889 | dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops), |
1890 | &ovs_dp_vport_multicast_group }, | 1890 | &ovs_dp_vport_multicast_group }, |
1891 | { &dp_flow_genl_family, | 1891 | { &dp_flow_genl_family, |
1892 | dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops), | 1892 | dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops), |
1893 | &ovs_dp_flow_multicast_group }, | 1893 | &ovs_dp_flow_multicast_group }, |
1894 | { &dp_packet_genl_family, | 1894 | { &dp_packet_genl_family, |
1895 | dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops), | 1895 | dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops), |
1896 | NULL }, | 1896 | NULL }, |
1897 | }; | 1897 | }; |
1898 | 1898 | ||
1899 | static void dp_unregister_genl(int n_families) | 1899 | static void dp_unregister_genl(int n_families) |
1900 | { | 1900 | { |
1901 | int i; | 1901 | int i; |
1902 | 1902 | ||
1903 | for (i = 0; i < n_families; i++) | 1903 | for (i = 0; i < n_families; i++) |
1904 | genl_unregister_family(dp_genl_families[i].family); | 1904 | genl_unregister_family(dp_genl_families[i].family); |
1905 | } | 1905 | } |
1906 | 1906 | ||
1907 | static int dp_register_genl(void) | 1907 | static int dp_register_genl(void) |
1908 | { | 1908 | { |
1909 | int n_registered; | 1909 | int n_registered; |
1910 | int err; | 1910 | int err; |
1911 | int i; | 1911 | int i; |
1912 | 1912 | ||
1913 | n_registered = 0; | 1913 | n_registered = 0; |
1914 | for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { | 1914 | for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { |
1915 | const struct genl_family_and_ops *f = &dp_genl_families[i]; | 1915 | const struct genl_family_and_ops *f = &dp_genl_families[i]; |
1916 | 1916 | ||
1917 | err = genl_register_family_with_ops(f->family, f->ops, | 1917 | err = genl_register_family_with_ops(f->family, f->ops, |
1918 | f->n_ops); | 1918 | f->n_ops); |
1919 | if (err) | 1919 | if (err) |
1920 | goto error; | 1920 | goto error; |
1921 | n_registered++; | 1921 | n_registered++; |
1922 | 1922 | ||
1923 | if (f->group) { | 1923 | if (f->group) { |
1924 | err = genl_register_mc_group(f->family, f->group); | 1924 | err = genl_register_mc_group(f->family, f->group); |
1925 | if (err) | 1925 | if (err) |
1926 | goto error; | 1926 | goto error; |
1927 | } | 1927 | } |
1928 | } | 1928 | } |
1929 | 1929 | ||
1930 | return 0; | 1930 | return 0; |
1931 | 1931 | ||
1932 | error: | 1932 | error: |
1933 | dp_unregister_genl(n_registered); | 1933 | dp_unregister_genl(n_registered); |
1934 | return err; | 1934 | return err; |
1935 | } | 1935 | } |
1936 | 1936 | ||
1937 | static void rehash_flow_table(struct work_struct *work) | 1937 | static void rehash_flow_table(struct work_struct *work) |
1938 | { | 1938 | { |
1939 | struct datapath *dp; | 1939 | struct datapath *dp; |
1940 | struct net *net; | 1940 | struct net *net; |
1941 | 1941 | ||
1942 | genl_lock(); | 1942 | genl_lock(); |
1943 | rtnl_lock(); | 1943 | rtnl_lock(); |
1944 | for_each_net(net) { | 1944 | for_each_net(net) { |
1945 | struct ovs_net *ovs_net = net_generic(net, ovs_net_id); | 1945 | struct ovs_net *ovs_net = net_generic(net, ovs_net_id); |
1946 | 1946 | ||
1947 | list_for_each_entry(dp, &ovs_net->dps, list_node) { | 1947 | list_for_each_entry(dp, &ovs_net->dps, list_node) { |
1948 | struct flow_table *old_table = genl_dereference(dp->table); | 1948 | struct flow_table *old_table = genl_dereference(dp->table); |
1949 | struct flow_table *new_table; | 1949 | struct flow_table *new_table; |
1950 | 1950 | ||
1951 | new_table = ovs_flow_tbl_rehash(old_table); | 1951 | new_table = ovs_flow_tbl_rehash(old_table); |
1952 | if (!IS_ERR(new_table)) { | 1952 | if (!IS_ERR(new_table)) { |
1953 | rcu_assign_pointer(dp->table, new_table); | 1953 | rcu_assign_pointer(dp->table, new_table); |
1954 | ovs_flow_tbl_deferred_destroy(old_table); | 1954 | ovs_flow_tbl_deferred_destroy(old_table); |
1955 | } | 1955 | } |
1956 | } | 1956 | } |
1957 | } | 1957 | } |
1958 | rtnl_unlock(); | 1958 | rtnl_unlock(); |
1959 | genl_unlock(); | 1959 | genl_unlock(); |
1960 | 1960 | ||
1961 | schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); | 1961 | schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); |
1962 | } | 1962 | } |
1963 | 1963 | ||
1964 | static int __net_init ovs_init_net(struct net *net) | 1964 | static int __net_init ovs_init_net(struct net *net) |
1965 | { | 1965 | { |
1966 | struct ovs_net *ovs_net = net_generic(net, ovs_net_id); | 1966 | struct ovs_net *ovs_net = net_generic(net, ovs_net_id); |
1967 | 1967 | ||
1968 | INIT_LIST_HEAD(&ovs_net->dps); | 1968 | INIT_LIST_HEAD(&ovs_net->dps); |
1969 | return 0; | 1969 | return 0; |
1970 | } | 1970 | } |
1971 | 1971 | ||
1972 | static void __net_exit ovs_exit_net(struct net *net) | 1972 | static void __net_exit ovs_exit_net(struct net *net) |
1973 | { | 1973 | { |
1974 | struct ovs_net *ovs_net = net_generic(net, ovs_net_id); | 1974 | struct ovs_net *ovs_net = net_generic(net, ovs_net_id); |
1975 | struct datapath *dp, *dp_next; | 1975 | struct datapath *dp, *dp_next; |
1976 | 1976 | ||
1977 | genl_lock(); | 1977 | genl_lock(); |
1978 | list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) | 1978 | list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) |
1979 | __dp_destroy(dp); | 1979 | __dp_destroy(dp); |
1980 | genl_unlock(); | 1980 | genl_unlock(); |
1981 | } | 1981 | } |
1982 | 1982 | ||
1983 | static struct pernet_operations ovs_net_ops = { | 1983 | static struct pernet_operations ovs_net_ops = { |
1984 | .init = ovs_init_net, | 1984 | .init = ovs_init_net, |
1985 | .exit = ovs_exit_net, | 1985 | .exit = ovs_exit_net, |
1986 | .id = &ovs_net_id, | 1986 | .id = &ovs_net_id, |
1987 | .size = sizeof(struct ovs_net), | 1987 | .size = sizeof(struct ovs_net), |
1988 | }; | 1988 | }; |
1989 | 1989 | ||
1990 | static int __init dp_init(void) | 1990 | static int __init dp_init(void) |
1991 | { | 1991 | { |
1992 | int err; | 1992 | int err; |
1993 | 1993 | ||
1994 | BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); | 1994 | BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); |
1995 | 1995 | ||
1996 | pr_info("Open vSwitch switching datapath\n"); | 1996 | pr_info("Open vSwitch switching datapath\n"); |
1997 | 1997 | ||
1998 | err = ovs_flow_init(); | 1998 | err = ovs_flow_init(); |
1999 | if (err) | 1999 | if (err) |
2000 | goto error; | 2000 | goto error; |
2001 | 2001 | ||
2002 | err = ovs_vport_init(); | 2002 | err = ovs_vport_init(); |
2003 | if (err) | 2003 | if (err) |
2004 | goto error_flow_exit; | 2004 | goto error_flow_exit; |
2005 | 2005 | ||
2006 | err = register_pernet_device(&ovs_net_ops); | 2006 | err = register_pernet_device(&ovs_net_ops); |
2007 | if (err) | 2007 | if (err) |
2008 | goto error_vport_exit; | 2008 | goto error_vport_exit; |
2009 | 2009 | ||
2010 | err = register_netdevice_notifier(&ovs_dp_device_notifier); | 2010 | err = register_netdevice_notifier(&ovs_dp_device_notifier); |
2011 | if (err) | 2011 | if (err) |
2012 | goto error_netns_exit; | 2012 | goto error_netns_exit; |
2013 | 2013 | ||
2014 | err = dp_register_genl(); | 2014 | err = dp_register_genl(); |
2015 | if (err < 0) | 2015 | if (err < 0) |
2016 | goto error_unreg_notifier; | 2016 | goto error_unreg_notifier; |
2017 | 2017 | ||
2018 | schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); | 2018 | schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); |
2019 | 2019 | ||
2020 | return 0; | 2020 | return 0; |
2021 | 2021 | ||
2022 | error_unreg_notifier: | 2022 | error_unreg_notifier: |
2023 | unregister_netdevice_notifier(&ovs_dp_device_notifier); | 2023 | unregister_netdevice_notifier(&ovs_dp_device_notifier); |
2024 | error_netns_exit: | 2024 | error_netns_exit: |
2025 | unregister_pernet_device(&ovs_net_ops); | 2025 | unregister_pernet_device(&ovs_net_ops); |
2026 | error_vport_exit: | 2026 | error_vport_exit: |
2027 | ovs_vport_exit(); | 2027 | ovs_vport_exit(); |
2028 | error_flow_exit: | 2028 | error_flow_exit: |
2029 | ovs_flow_exit(); | 2029 | ovs_flow_exit(); |
2030 | error: | 2030 | error: |
2031 | return err; | 2031 | return err; |
2032 | } | 2032 | } |
2033 | 2033 | ||
2034 | static void dp_cleanup(void) | 2034 | static void dp_cleanup(void) |
2035 | { | 2035 | { |
2036 | cancel_delayed_work_sync(&rehash_flow_wq); | 2036 | cancel_delayed_work_sync(&rehash_flow_wq); |
2037 | dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); | 2037 | dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); |
2038 | unregister_netdevice_notifier(&ovs_dp_device_notifier); | 2038 | unregister_netdevice_notifier(&ovs_dp_device_notifier); |
2039 | unregister_pernet_device(&ovs_net_ops); | 2039 | unregister_pernet_device(&ovs_net_ops); |
2040 | rcu_barrier(); | 2040 | rcu_barrier(); |
2041 | ovs_vport_exit(); | 2041 | ovs_vport_exit(); |
2042 | ovs_flow_exit(); | 2042 | ovs_flow_exit(); |
2043 | } | 2043 | } |
2044 | 2044 | ||
2045 | module_init(dp_init); | 2045 | module_init(dp_init); |
2046 | module_exit(dp_cleanup); | 2046 | module_exit(dp_cleanup); |
2047 | 2047 | ||
2048 | MODULE_DESCRIPTION("Open vSwitch switching datapath"); | 2048 | MODULE_DESCRIPTION("Open vSwitch switching datapath"); |
2049 | MODULE_LICENSE("GPL"); | 2049 | MODULE_LICENSE("GPL"); |
2050 | 2050 |