Eric Lee / smarc-ti-linux-kernel

1

/*

1

/*

2

* NET3 Protocol independent device support routines.

2

* NET3 Protocol independent device support routines.

3

*

3

*

4

* This program is free software; you can redistribute it and/or

4

* This program is free software; you can redistribute it and/or

5

* modify it under the terms of the GNU General Public License

5

* modify it under the terms of the GNU General Public License

6

* as published by the Free Software Foundation; either version

6

* as published by the Free Software Foundation; either version

7

* 2 of the License, or (at your option) any later version.

7

* 2 of the License, or (at your option) any later version.

8

*

8

*

9

* Derived from the non IP parts of dev.c 1.0.19

9

* Derived from the non IP parts of dev.c 1.0.19

10

* Authors: Ross Biro

10

* Authors: Ross Biro

11

* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>

11

* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>

12

* Mark Evans, <evansmp@uhura.aston.ac.uk>

12

* Mark Evans, <evansmp@uhura.aston.ac.uk>

13

*

13

*

14

* Additional Authors:

14

* Additional Authors:

15

* Florian la Roche <rzsfl@rz.uni-sb.de>

15

* Florian la Roche <rzsfl@rz.uni-sb.de>

16

* Alan Cox <gw4pts@gw4pts.ampr.org>

16

* Alan Cox <gw4pts@gw4pts.ampr.org>

17

* David Hinds <dahinds@users.sourceforge.net>

17

* David Hinds <dahinds@users.sourceforge.net>

18

* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>

18

* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>

19

* Adam Sulmicki <adam@cfar.umd.edu>

19

* Adam Sulmicki <adam@cfar.umd.edu>

20

* Pekka Riikonen <priikone@poesidon.pspt.fi>

20

* Pekka Riikonen <priikone@poesidon.pspt.fi>

21

*

21

*

22

* Changes:

22

* Changes:

23

* D.J. Barrow : Fixed bug where dev->refcnt gets set

23

* D.J. Barrow : Fixed bug where dev->refcnt gets set

24

* to 2 if register_netdev gets called

24

* to 2 if register_netdev gets called

25

* before net_dev_init & also removed a

25

* before net_dev_init & also removed a

26

* few lines of code in the process.

26

* few lines of code in the process.

27

* Alan Cox : device private ioctl copies fields back.

27

* Alan Cox : device private ioctl copies fields back.

28

* Alan Cox : Transmit queue code does relevant

28

* Alan Cox : Transmit queue code does relevant

29

* stunts to keep the queue safe.

29

* stunts to keep the queue safe.

30

* Alan Cox : Fixed double lock.

30

* Alan Cox : Fixed double lock.

31

* Alan Cox : Fixed promisc NULL pointer trap

31

* Alan Cox : Fixed promisc NULL pointer trap

32

* ???????? : Support the full private ioctl range

32

* ???????? : Support the full private ioctl range

33

* Alan Cox : Moved ioctl permission check into

33

* Alan Cox : Moved ioctl permission check into

34

* drivers

34

* drivers

35

* Tim Kordas : SIOCADDMULTI/SIOCDELMULTI

35

* Tim Kordas : SIOCADDMULTI/SIOCDELMULTI

36

* Alan Cox : 100 backlog just doesn't cut it when

36

* Alan Cox : 100 backlog just doesn't cut it when

37

* you start doing multicast video 8)

37

* you start doing multicast video 8)

38

* Alan Cox : Rewrote net_bh and list manager.

38

* Alan Cox : Rewrote net_bh and list manager.

39

* Alan Cox : Fix ETH_P_ALL echoback lengths.

39

* Alan Cox : Fix ETH_P_ALL echoback lengths.

40

* Alan Cox : Took out transmit every packet pass

40

* Alan Cox : Took out transmit every packet pass

41

* Saved a few bytes in the ioctl handler

41

* Saved a few bytes in the ioctl handler

42

* Alan Cox : Network driver sets packet type before

42

* Alan Cox : Network driver sets packet type before

43

* calling netif_rx. Saves a function

43

* calling netif_rx. Saves a function

44

* call a packet.

44

* call a packet.

45

* Alan Cox : Hashed net_bh()

45

* Alan Cox : Hashed net_bh()

46

* Richard Kooijman: Timestamp fixes.

46

* Richard Kooijman: Timestamp fixes.

47

* Alan Cox : Wrong field in SIOCGIFDSTADDR

47

* Alan Cox : Wrong field in SIOCGIFDSTADDR

48

* Alan Cox : Device lock protection.

48

* Alan Cox : Device lock protection.

49

* Alan Cox : Fixed nasty side effect of device close

49

* Alan Cox : Fixed nasty side effect of device close

50

* changes.

50

* changes.

51

* Rudi Cilibrasi : Pass the right thing to

51

* Rudi Cilibrasi : Pass the right thing to

52

* set_mac_address()

52

* set_mac_address()

53

* Dave Miller : 32bit quantity for the device lock to

53

* Dave Miller : 32bit quantity for the device lock to

54

* make it work out on a Sparc.

54

* make it work out on a Sparc.

55

* Bjorn Ekwall : Added KERNELD hack.

55

* Bjorn Ekwall : Added KERNELD hack.

56

* Alan Cox : Cleaned up the backlog initialise.

56

* Alan Cox : Cleaned up the backlog initialise.

57

* Craig Metz : SIOCGIFCONF fix if space for under

57

* Craig Metz : SIOCGIFCONF fix if space for under

58

* 1 device.

58

* 1 device.

59

* Thomas Bogendoerfer : Return ENODEV for dev_open, if there

59

* Thomas Bogendoerfer : Return ENODEV for dev_open, if there

60

* is no device open function.

60

* is no device open function.

61

* Andi Kleen : Fix error reporting for SIOCGIFCONF

61

* Andi Kleen : Fix error reporting for SIOCGIFCONF

62

* Michael Chastain : Fix signed/unsigned for SIOCGIFCONF

62

* Michael Chastain : Fix signed/unsigned for SIOCGIFCONF

63

* Cyrus Durgin : Cleaned for KMOD

63

* Cyrus Durgin : Cleaned for KMOD

64

* Adam Sulmicki : Bug Fix : Network Device Unload

64

* Adam Sulmicki : Bug Fix : Network Device Unload

65

* A network device unload needs to purge

65

* A network device unload needs to purge

66

* the backlog queue.

66

* the backlog queue.

67

* Paul Rusty Russell : SIOCSIFNAME

67

* Paul Rusty Russell : SIOCSIFNAME

68

* Pekka Riikonen : Netdev boot-time settings code

68

* Pekka Riikonen : Netdev boot-time settings code

69

* Andrew Morton : Make unregister_netdevice wait

69

* Andrew Morton : Make unregister_netdevice wait

70

* indefinitely on dev->refcnt

70

* indefinitely on dev->refcnt

71

* J Hadi Salim : - Backlog queue sampling

71

* J Hadi Salim : - Backlog queue sampling

72

* - netif_rx() feedback

72

* - netif_rx() feedback

73

*/

73

*/

74

75

#include <asm/uaccess.h>

75

#include <asm/uaccess.h>

76

#include <linux/bitops.h>

76

#include <linux/bitops.h>

77

#include <linux/capability.h>

77

#include <linux/capability.h>

78

#include <linux/cpu.h>

78

#include <linux/cpu.h>

79

#include <linux/types.h>

79

#include <linux/types.h>

80

#include <linux/kernel.h>

80

#include <linux/kernel.h>

81

#include <linux/hash.h>

81

#include <linux/hash.h>

82

#include <linux/slab.h>

82

#include <linux/slab.h>

83

#include <linux/sched.h>

83

#include <linux/sched.h>

84

#include <linux/mutex.h>

84

#include <linux/mutex.h>

85

#include <linux/string.h>

85

#include <linux/string.h>

86

#include <linux/mm.h>

86

#include <linux/mm.h>

87

#include <linux/socket.h>

87

#include <linux/socket.h>

88

#include <linux/sockios.h>

88

#include <linux/sockios.h>

89

#include <linux/errno.h>

89

#include <linux/errno.h>

90

#include <linux/interrupt.h>

90

#include <linux/interrupt.h>

91

#include <linux/if_ether.h>

91

#include <linux/if_ether.h>

92

#include <linux/netdevice.h>

92

#include <linux/netdevice.h>

93

#include <linux/etherdevice.h>

93

#include <linux/etherdevice.h>

94

#include <linux/ethtool.h>

94

#include <linux/ethtool.h>

95

#include <linux/notifier.h>

95

#include <linux/notifier.h>

96

#include <linux/skbuff.h>

96

#include <linux/skbuff.h>

97

#include <net/net_namespace.h>

97

#include <net/net_namespace.h>

98

#include <net/sock.h>

98

#include <net/sock.h>

99

#include <linux/rtnetlink.h>

99

#include <linux/rtnetlink.h>

100

#include <linux/stat.h>

100

#include <linux/stat.h>

101

#include <net/dst.h>

101

#include <net/dst.h>

102

#include <net/pkt_sched.h>

102

#include <net/pkt_sched.h>

103

#include <net/checksum.h>

103

#include <net/checksum.h>

104

#include <net/xfrm.h>

104

#include <net/xfrm.h>

105

#include <linux/highmem.h>

105

#include <linux/highmem.h>

106

#include <linux/init.h>

106

#include <linux/init.h>

107

#include <linux/module.h>

107

#include <linux/module.h>

108

#include <linux/netpoll.h>

108

#include <linux/netpoll.h>

109

#include <linux/rcupdate.h>

109

#include <linux/rcupdate.h>

110

#include <linux/delay.h>

110

#include <linux/delay.h>

111

#include <net/iw_handler.h>

111

#include <net/iw_handler.h>

112

#include <asm/current.h>

112

#include <asm/current.h>

113

#include <linux/audit.h>

113

#include <linux/audit.h>

114

#include <linux/dmaengine.h>

114

#include <linux/dmaengine.h>

115

#include <linux/err.h>

115

#include <linux/err.h>

116

#include <linux/ctype.h>

116

#include <linux/ctype.h>

117

#include <linux/if_arp.h>

117

#include <linux/if_arp.h>

118

#include <linux/if_vlan.h>

118

#include <linux/if_vlan.h>

119

#include <linux/ip.h>

119

#include <linux/ip.h>

120

#include <net/ip.h>

120

#include <net/ip.h>

121

#include <linux/ipv6.h>

121

#include <linux/ipv6.h>

122

#include <linux/in.h>

122

#include <linux/in.h>

123

#include <linux/jhash.h>

123

#include <linux/jhash.h>

124

#include <linux/random.h>

124

#include <linux/random.h>

125

#include <trace/events/napi.h>

125

#include <trace/events/napi.h>

126

#include <trace/events/net.h>

126

#include <trace/events/net.h>

127

#include <trace/events/skb.h>

127

#include <trace/events/skb.h>

128

#include <linux/pci.h>

128

#include <linux/pci.h>

129

#include <linux/inetdevice.h>

129

#include <linux/inetdevice.h>

130

#include <linux/cpu_rmap.h>

130

#include <linux/cpu_rmap.h>

131

#include <linux/static_key.h>

131

#include <linux/static_key.h>

132

#include <linux/hashtable.h>

132

#include <linux/hashtable.h>

133

#include <linux/vmalloc.h>

133

#include <linux/vmalloc.h>

134

#include <linux/if_macvlan.h>

134

#include <linux/if_macvlan.h>

135

136

#include "net-sysfs.h"

136

#include "net-sysfs.h"

137

138

/* Instead of increasing this, you should create a hash table. */

138

/* Instead of increasing this, you should create a hash table. */

139

#define MAX_GRO_SKBS 8

139

#define MAX_GRO_SKBS 8

140

141

/* This should be increased if a protocol with a bigger head is added. */

141

/* This should be increased if a protocol with a bigger head is added. */

142

#define GRO_MAX_HEAD (MAX_HEADER + 128)

142

#define GRO_MAX_HEAD (MAX_HEADER + 128)

143

144

static DEFINE_SPINLOCK(ptype_lock);

144

static DEFINE_SPINLOCK(ptype_lock);

145

static DEFINE_SPINLOCK(offload_lock);

145

static DEFINE_SPINLOCK(offload_lock);

146

struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;

146

struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;

147

struct list_head ptype_all __read_mostly; /* Taps */

147

struct list_head ptype_all __read_mostly; /* Taps */

148

static struct list_head offload_base __read_mostly;

148

static struct list_head offload_base __read_mostly;

149

150

static int netif_rx_internal(struct sk_buff *skb);

150

static int netif_rx_internal(struct sk_buff *skb);

151

152

/*

152

/*

153

* The @dev_base_head list is protected by @dev_base_lock and the rtnl

153

* The @dev_base_head list is protected by @dev_base_lock and the rtnl

154

* semaphore.

154

* semaphore.

155

*

155

*

156

* Pure readers hold dev_base_lock for reading, or rcu_read_lock()

156

* Pure readers hold dev_base_lock for reading, or rcu_read_lock()

157

*

157

*

158

* Writers must hold the rtnl semaphore while they loop through the

158

* Writers must hold the rtnl semaphore while they loop through the

159

* dev_base_head list, and hold dev_base_lock for writing when they do the

159

* dev_base_head list, and hold dev_base_lock for writing when they do the

160

* actual updates. This allows pure readers to access the list even

160

* actual updates. This allows pure readers to access the list even

161

* while a writer is preparing to update it.

161

* while a writer is preparing to update it.

162

*

162

*

163

* To put it another way, dev_base_lock is held for writing only to

163

* To put it another way, dev_base_lock is held for writing only to

164

* protect against pure readers; the rtnl semaphore provides the

164

* protect against pure readers; the rtnl semaphore provides the

165

* protection against other writers.

165

* protection against other writers.

166

*

166

*

167

* See, for example usages, register_netdevice() and

167

* See, for example usages, register_netdevice() and

168

* unregister_netdevice(), which must be called with the rtnl

168

* unregister_netdevice(), which must be called with the rtnl

169

* semaphore held.

169

* semaphore held.

170

*/

170

*/

171

DEFINE_RWLOCK(dev_base_lock);

171

DEFINE_RWLOCK(dev_base_lock);

172

EXPORT_SYMBOL(dev_base_lock);

172

EXPORT_SYMBOL(dev_base_lock);

173

174

/* protects napi_hash addition/deletion and napi_gen_id */

174

/* protects napi_hash addition/deletion and napi_gen_id */

175

static DEFINE_SPINLOCK(napi_hash_lock);

175

static DEFINE_SPINLOCK(napi_hash_lock);

176

177

static unsigned int napi_gen_id;

177

static unsigned int napi_gen_id;

178

static DEFINE_HASHTABLE(napi_hash, 8);

178

static DEFINE_HASHTABLE(napi_hash, 8);

179

180

static seqcount_t devnet_rename_seq;

180

static seqcount_t devnet_rename_seq;

181

182

static inline void dev_base_seq_inc(struct net *net)

182

static inline void dev_base_seq_inc(struct net *net)

183

{

183

{

184

while (++net->dev_base_seq == 0);

184

while (++net->dev_base_seq == 0);

185

}

185

}

186

187

static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)

187

static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)

188

{

188

{

189

unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));

189

unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));

190

191

return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];

191

return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];

192

}

192

}

193

194

static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)

194

static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)

195

{

195

{

196

return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];

196

return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];

197

}

197

}

198

199

static inline void rps_lock(struct softnet_data *sd)

199

static inline void rps_lock(struct softnet_data *sd)

200

{

200

{

201

#ifdef CONFIG_RPS

201

#ifdef CONFIG_RPS

202

spin_lock(&sd->input_pkt_queue.lock);

202

spin_lock(&sd->input_pkt_queue.lock);

203

#endif

203

#endif

204

}

204

}

205

206

static inline void rps_unlock(struct softnet_data *sd)

206

static inline void rps_unlock(struct softnet_data *sd)

207

{

207

{

208

#ifdef CONFIG_RPS

208

#ifdef CONFIG_RPS

209

spin_unlock(&sd->input_pkt_queue.lock);

209

spin_unlock(&sd->input_pkt_queue.lock);

210

#endif

210

#endif

211

}

211

}

212

213

/* Device list insertion */

213

/* Device list insertion */

214

static void list_netdevice(struct net_device *dev)

214

static void list_netdevice(struct net_device *dev)

215

{

215

{

216

struct net *net = dev_net(dev);

216

struct net *net = dev_net(dev);

217

218

ASSERT_RTNL();

218

ASSERT_RTNL();

219

220

write_lock_bh(&dev_base_lock);

220

write_lock_bh(&dev_base_lock);

221

list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);

221

list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);

222

hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));

222

hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));

223

hlist_add_head_rcu(&dev->index_hlist,

223

hlist_add_head_rcu(&dev->index_hlist,

224

dev_index_hash(net, dev->ifindex));

224

dev_index_hash(net, dev->ifindex));

225

write_unlock_bh(&dev_base_lock);

225

write_unlock_bh(&dev_base_lock);

226

227

dev_base_seq_inc(net);

227

dev_base_seq_inc(net);

228

}

228

}

229

230

/* Device list removal

230

/* Device list removal

231

* caller must respect a RCU grace period before freeing/reusing dev

231

* caller must respect a RCU grace period before freeing/reusing dev

232

*/

232

*/

233

static void unlist_netdevice(struct net_device *dev)

233

static void unlist_netdevice(struct net_device *dev)

234

{

234

{

235

ASSERT_RTNL();

235

ASSERT_RTNL();

236

237

/* Unlink dev from the device chain */

237

/* Unlink dev from the device chain */

238

write_lock_bh(&dev_base_lock);

238

write_lock_bh(&dev_base_lock);

239

list_del_rcu(&dev->dev_list);

239

list_del_rcu(&dev->dev_list);

240

hlist_del_rcu(&dev->name_hlist);

240

hlist_del_rcu(&dev->name_hlist);

241

hlist_del_rcu(&dev->index_hlist);

241

hlist_del_rcu(&dev->index_hlist);

242

write_unlock_bh(&dev_base_lock);

242

write_unlock_bh(&dev_base_lock);

243

244

dev_base_seq_inc(dev_net(dev));

244

dev_base_seq_inc(dev_net(dev));

245

}

245

}

246

247

/*

247

/*

248

* Our notifier list

248

* Our notifier list

249

*/

249

*/

250

251

static RAW_NOTIFIER_HEAD(netdev_chain);

251

static RAW_NOTIFIER_HEAD(netdev_chain);

252

253

/*

253

/*

254

* Device drivers call our routines to queue packets here. We empty the

254

* Device drivers call our routines to queue packets here. We empty the

255

* queue in the local softnet handler.

255

* queue in the local softnet handler.

256

*/

256

*/

257

258

DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);

258

DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);

259

EXPORT_PER_CPU_SYMBOL(softnet_data);

259

EXPORT_PER_CPU_SYMBOL(softnet_data);

260

261

#ifdef CONFIG_LOCKDEP

261

#ifdef CONFIG_LOCKDEP

262

/*

262

/*

263

* register_netdevice() inits txq->_xmit_lock and sets lockdep class

263

* register_netdevice() inits txq->_xmit_lock and sets lockdep class

264

* according to dev->type

264

* according to dev->type

265

*/

265

*/

266

static const unsigned short netdev_lock_type[] =

266

static const unsigned short netdev_lock_type[] =

267

{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,

267

{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,

268

ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,

268

ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,

269

ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,

269

ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,

270

ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,

270

ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,

271

ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,

271

ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,

272

ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,

272

ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,

273

ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,

273

ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,

274

ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,

274

ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,

275

ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,

275

ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,

276

ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,

276

ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,

277

ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,

277

ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,

278

ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,

278

ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,

279

ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,

279

ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,

280

ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,

280

ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,

281

ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};

281

ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};

282

283

static const char *const netdev_lock_name[] =

283

static const char *const netdev_lock_name[] =

284

{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",

284

{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",

285

"_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",

285

"_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",

286

"_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",

286

"_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",

287

"_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",

287

"_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",

288

"_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",

288

"_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",

289

"_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",

289

"_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",

290

"_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",

290

"_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",

291

"_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",

291

"_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",

292

"_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",

292

"_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",

293

"_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",

293

"_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",

294

"_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",

294

"_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",

295

"_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",

295

"_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",

296

"_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",

296

"_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",

297

"_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",

297

"_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",

298

"_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};

298

"_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};

299

300

static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];

300

static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];

301

static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];

301

static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];

302

303

static inline unsigned short netdev_lock_pos(unsigned short dev_type)

303

static inline unsigned short netdev_lock_pos(unsigned short dev_type)

304

{

304

{

305

int i;

305

int i;

306

307

for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)

307

for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)

308

if (netdev_lock_type[i] == dev_type)

308

if (netdev_lock_type[i] == dev_type)

309

return i;

309

return i;

310

/* the last key is used by default */

310

/* the last key is used by default */

311

return ARRAY_SIZE(netdev_lock_type) - 1;

311

return ARRAY_SIZE(netdev_lock_type) - 1;

312

}

312

}

313

314

static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,

314

static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,

315

unsigned short dev_type)

315

unsigned short dev_type)

316

{

316

{

317

int i;

317

int i;

318

319

i = netdev_lock_pos(dev_type);

319

i = netdev_lock_pos(dev_type);

320

lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],

320

lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],

321

netdev_lock_name[i]);

321

netdev_lock_name[i]);

322

}

322

}

323

324

static inline void netdev_set_addr_lockdep_class(struct net_device *dev)

324

static inline void netdev_set_addr_lockdep_class(struct net_device *dev)

325

{

325

{

326

int i;

326

int i;

327

328

i = netdev_lock_pos(dev->type);

328

i = netdev_lock_pos(dev->type);

329

lockdep_set_class_and_name(&dev->addr_list_lock,

329

lockdep_set_class_and_name(&dev->addr_list_lock,

330

&netdev_addr_lock_key[i],

330

&netdev_addr_lock_key[i],

331

netdev_lock_name[i]);

331

netdev_lock_name[i]);

332

}

332

}

333

#else

333

#else

334

static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,

334

static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,

335

unsigned short dev_type)

335

unsigned short dev_type)

336

{

336

{

337

}

337

}

338

static inline void netdev_set_addr_lockdep_class(struct net_device *dev)

338

static inline void netdev_set_addr_lockdep_class(struct net_device *dev)

339

{

339

{

340

}

340

}

341

#endif

341

#endif

342

343

/*******************************************************************************

343

/*******************************************************************************

344

345

Protocol management and registration routines

345

Protocol management and registration routines

346

347

*******************************************************************************/

347

*******************************************************************************/

348

349

/*

349

/*

350

* Add a protocol ID to the list. Now that the input handler is

350

* Add a protocol ID to the list. Now that the input handler is

351

* smarter we can dispense with all the messy stuff that used to be

351

* smarter we can dispense with all the messy stuff that used to be

352

* here.

352

* here.

353

*

353

*

354

* BEWARE!!! Protocol handlers, mangling input packets,

354

* BEWARE!!! Protocol handlers, mangling input packets,

355

* MUST BE last in hash buckets and checking protocol handlers

355

* MUST BE last in hash buckets and checking protocol handlers

356

* MUST start from promiscuous ptype_all chain in net_bh.

356

* MUST start from promiscuous ptype_all chain in net_bh.

357

* It is true now, do not change it.

357

* It is true now, do not change it.

358

* Explanation follows: if protocol handler, mangling packet, will

358

* Explanation follows: if protocol handler, mangling packet, will

359

* be the first on list, it is not able to sense, that packet

359

* be the first on list, it is not able to sense, that packet

360

* is cloned and should be copied-on-write, so that it will

360

* is cloned and should be copied-on-write, so that it will

361

* change it and subsequent readers will get broken packet.

361

* change it and subsequent readers will get broken packet.

362

* --ANK (980803)

362

* --ANK (980803)

363

*/

363

*/

364

365

static inline struct list_head *ptype_head(const struct packet_type *pt)

365

static inline struct list_head *ptype_head(const struct packet_type *pt)

366

{

366

{

367

if (pt->type == htons(ETH_P_ALL))

367

if (pt->type == htons(ETH_P_ALL))

368

return &ptype_all;

368

return &ptype_all;

369

else

369

else

370

return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];

370

return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];

371

}

371

}

372

373

/**

373

/**

374

* dev_add_pack - add packet handler

374

* dev_add_pack - add packet handler

375

* @pt: packet type declaration

375

* @pt: packet type declaration

376

*

376

*

377

* Add a protocol handler to the networking stack. The passed &packet_type

377

* Add a protocol handler to the networking stack. The passed &packet_type

378

* is linked into kernel lists and may not be freed until it has been

378

* is linked into kernel lists and may not be freed until it has been

379

* removed from the kernel lists.

379

* removed from the kernel lists.

380

*

380

*

381

* This call does not sleep therefore it can not

381

* This call does not sleep therefore it can not

382

* guarantee all CPU's that are in middle of receiving packets

382

* guarantee all CPU's that are in middle of receiving packets

383

* will see the new packet type (until the next received packet).

383

* will see the new packet type (until the next received packet).

384

*/

384

*/

385

386

void dev_add_pack(struct packet_type *pt)

386

void dev_add_pack(struct packet_type *pt)

387

{

387

{

388

struct list_head *head = ptype_head(pt);

388

struct list_head *head = ptype_head(pt);

389

390

spin_lock(&ptype_lock);

390

spin_lock(&ptype_lock);

391

list_add_rcu(&pt->list, head);

391

list_add_rcu(&pt->list, head);

392

spin_unlock(&ptype_lock);

392

spin_unlock(&ptype_lock);

393

}

393

}

394

EXPORT_SYMBOL(dev_add_pack);

394

EXPORT_SYMBOL(dev_add_pack);

395

396

/**

396

/**

397

* __dev_remove_pack - remove packet handler

397

* __dev_remove_pack - remove packet handler

398

* @pt: packet type declaration

398

* @pt: packet type declaration

399

*

399

*

400

* Remove a protocol handler that was previously added to the kernel

400

* Remove a protocol handler that was previously added to the kernel

401

* protocol handlers by dev_add_pack(). The passed &packet_type is removed

401

* protocol handlers by dev_add_pack(). The passed &packet_type is removed

402

* from the kernel lists and can be freed or reused once this function

402

* from the kernel lists and can be freed or reused once this function

403

* returns.

403

* returns.

404

*

404

*

405

* The packet type might still be in use by receivers

405

* The packet type might still be in use by receivers

406

* and must not be freed until after all the CPU's have gone

406

* and must not be freed until after all the CPU's have gone

407

* through a quiescent state.

407

* through a quiescent state.

408

*/

408

*/

409

void __dev_remove_pack(struct packet_type *pt)

409

void __dev_remove_pack(struct packet_type *pt)

410

{

410

{

411

struct list_head *head = ptype_head(pt);

411

struct list_head *head = ptype_head(pt);

412

struct packet_type *pt1;

412

struct packet_type *pt1;

413

414

spin_lock(&ptype_lock);

414

spin_lock(&ptype_lock);

415

416

list_for_each_entry(pt1, head, list) {

416

list_for_each_entry(pt1, head, list) {

417

if (pt == pt1) {

417

if (pt == pt1) {

418

list_del_rcu(&pt->list);

418

list_del_rcu(&pt->list);

419

goto out;

419

goto out;

420

}

420

}

421

}

421

}

422

423

pr_warn("dev_remove_pack: %p not found\n", pt);

423

pr_warn("dev_remove_pack: %p not found\n", pt);

424

out:

424

out:

425

spin_unlock(&ptype_lock);

425

spin_unlock(&ptype_lock);

426

}

426

}

427

EXPORT_SYMBOL(__dev_remove_pack);

427

EXPORT_SYMBOL(__dev_remove_pack);

428

429

/**

429

/**

430

* dev_remove_pack - remove packet handler

430

* dev_remove_pack - remove packet handler

431

* @pt: packet type declaration

431

* @pt: packet type declaration

432

*

432

*

433

* Remove a protocol handler that was previously added to the kernel

433

* Remove a protocol handler that was previously added to the kernel

434

* protocol handlers by dev_add_pack(). The passed &packet_type is removed

434

* protocol handlers by dev_add_pack(). The passed &packet_type is removed

435

* from the kernel lists and can be freed or reused once this function

435

* from the kernel lists and can be freed or reused once this function

436

* returns.

436

* returns.

437

*

437

*

438

* This call sleeps to guarantee that no CPU is looking at the packet

438

* This call sleeps to guarantee that no CPU is looking at the packet

439

* type after return.

439

* type after return.

440

*/

440

*/

441

void dev_remove_pack(struct packet_type *pt)

441

void dev_remove_pack(struct packet_type *pt)

442

{

442

{

443

__dev_remove_pack(pt);

443

__dev_remove_pack(pt);

444

445

synchronize_net();

445

synchronize_net();

446

}

446

}

447

EXPORT_SYMBOL(dev_remove_pack);

447

EXPORT_SYMBOL(dev_remove_pack);

448

449

450

/**

450

/**

451

* dev_add_offload - register offload handlers

451

* dev_add_offload - register offload handlers

452

* @po: protocol offload declaration

452

* @po: protocol offload declaration

453

*

453

*

454

* Add protocol offload handlers to the networking stack. The passed

454

* Add protocol offload handlers to the networking stack. The passed

455

* &proto_offload is linked into kernel lists and may not be freed until

455

* &proto_offload is linked into kernel lists and may not be freed until

456

* it has been removed from the kernel lists.

456

* it has been removed from the kernel lists.

457

*

457

*

458

* This call does not sleep therefore it can not

458

* This call does not sleep therefore it can not

459

* guarantee all CPU's that are in middle of receiving packets

459

* guarantee all CPU's that are in middle of receiving packets

460

* will see the new offload handlers (until the next received packet).

460

* will see the new offload handlers (until the next received packet).

461

*/

461

*/

462

void dev_add_offload(struct packet_offload *po)

462

void dev_add_offload(struct packet_offload *po)

463

{

463

{

464

struct list_head *head = &offload_base;

464

struct list_head *head = &offload_base;

465

466

spin_lock(&offload_lock);

466

spin_lock(&offload_lock);

467

list_add_rcu(&po->list, head);

467

list_add_rcu(&po->list, head);

468

spin_unlock(&offload_lock);

468

spin_unlock(&offload_lock);

469

}

469

}

470

EXPORT_SYMBOL(dev_add_offload);

470

EXPORT_SYMBOL(dev_add_offload);

471

472

/**

472

/**

473

* __dev_remove_offload - remove offload handler

473

* __dev_remove_offload - remove offload handler

474

* @po: packet offload declaration

474

* @po: packet offload declaration

475

*

475

*

476

* Remove a protocol offload handler that was previously added to the

476

* Remove a protocol offload handler that was previously added to the

477

* kernel offload handlers by dev_add_offload(). The passed &offload_type

477

* kernel offload handlers by dev_add_offload(). The passed &offload_type

478

* is removed from the kernel lists and can be freed or reused once this

478

* is removed from the kernel lists and can be freed or reused once this

479

* function returns.

479

* function returns.

480

*

480

*

481

* The packet type might still be in use by receivers

481

* The packet type might still be in use by receivers

482

* and must not be freed until after all the CPU's have gone

482

* and must not be freed until after all the CPU's have gone

483

* through a quiescent state.

483

* through a quiescent state.

484

*/

484

*/

485

static void __dev_remove_offload(struct packet_offload *po)

485

static void __dev_remove_offload(struct packet_offload *po)

486

{

486

{

487

struct list_head *head = &offload_base;

487

struct list_head *head = &offload_base;

488

struct packet_offload *po1;

488

struct packet_offload *po1;

489

490

spin_lock(&offload_lock);

490

spin_lock(&offload_lock);

491

492

list_for_each_entry(po1, head, list) {

492

list_for_each_entry(po1, head, list) {

493

if (po == po1) {

493

if (po == po1) {

494

list_del_rcu(&po->list);

494

list_del_rcu(&po->list);

495

goto out;

495

goto out;

496

}

496

}

497

}

497

}

498

499

pr_warn("dev_remove_offload: %p not found\n", po);

499

pr_warn("dev_remove_offload: %p not found\n", po);

500

out:

500

out:

501

spin_unlock(&offload_lock);

501

spin_unlock(&offload_lock);

502

}

502

}

503

504

/**

504

/**

505

* dev_remove_offload - remove packet offload handler

505

* dev_remove_offload - remove packet offload handler

506

* @po: packet offload declaration

506

* @po: packet offload declaration

507

*

507

*

508

* Remove a packet offload handler that was previously added to the kernel

508

* Remove a packet offload handler that was previously added to the kernel

509

* offload handlers by dev_add_offload(). The passed &offload_type is

509

* offload handlers by dev_add_offload(). The passed &offload_type is

510

* removed from the kernel lists and can be freed or reused once this

510

* removed from the kernel lists and can be freed or reused once this

511

* function returns.

511

* function returns.

512

*

512

*

513

* This call sleeps to guarantee that no CPU is looking at the packet

513

* This call sleeps to guarantee that no CPU is looking at the packet

514

* type after return.

514

* type after return.

515

*/

515

*/

516

void dev_remove_offload(struct packet_offload *po)

516

void dev_remove_offload(struct packet_offload *po)

517

{

517

{

518

__dev_remove_offload(po);

518

__dev_remove_offload(po);

519

520

synchronize_net();

520

synchronize_net();

521

}

521

}

522

EXPORT_SYMBOL(dev_remove_offload);

522

EXPORT_SYMBOL(dev_remove_offload);

523

524

/******************************************************************************

524

/******************************************************************************

525

526

Device Boot-time Settings Routines

526

Device Boot-time Settings Routines

527

528

*******************************************************************************/

528

*******************************************************************************/

529

530

/* Boot time configuration table */

530

/* Boot time configuration table */

531

static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];

531

static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];

532

533

/**

533

/**

534

* netdev_boot_setup_add - add new setup entry

534

* netdev_boot_setup_add - add new setup entry

535

* @name: name of the device

535

* @name: name of the device

536

* @map: configured settings for the device

536

* @map: configured settings for the device

537

*

537

*

538

* Adds new setup entry to the dev_boot_setup list. The function

538

* Adds new setup entry to the dev_boot_setup list. The function

539

* returns 0 on error and 1 on success. This is a generic routine to

539

* returns 0 on error and 1 on success. This is a generic routine to

540

* all netdevices.

540

* all netdevices.

541

*/

541

*/

542

static int netdev_boot_setup_add(char *name, struct ifmap *map)

542

static int netdev_boot_setup_add(char *name, struct ifmap *map)

543

{

543

{

544

struct netdev_boot_setup *s;

544

struct netdev_boot_setup *s;

545

int i;

545

int i;

546

547

s = dev_boot_setup;

547

s = dev_boot_setup;

548

for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {

548

for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {

549

if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {

549

if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {

550

memset(s[i].name, 0, sizeof(s[i].name));

550

memset(s[i].name, 0, sizeof(s[i].name));

551

strlcpy(s[i].name, name, IFNAMSIZ);

551

strlcpy(s[i].name, name, IFNAMSIZ);

552

memcpy(&s[i].map, map, sizeof(s[i].map));

552

memcpy(&s[i].map, map, sizeof(s[i].map));

553

break;

553

break;

554

}

554

}

555

}

555

}

556

557

return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;

557

return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;

558

}

558

}

559

560

/**

560

/**

561

* netdev_boot_setup_check - check boot time settings

561

* netdev_boot_setup_check - check boot time settings

562

* @dev: the netdevice

562

* @dev: the netdevice

563

*

563

*

564

* Check boot time settings for the device.

564

* Check boot time settings for the device.

565

* The found settings are set for the device to be used

565

* The found settings are set for the device to be used

566

* later in the device probing.

566

* later in the device probing.

567

* Returns 0 if no settings found, 1 if they are.

567

* Returns 0 if no settings found, 1 if they are.

568

*/

568

*/

569

int netdev_boot_setup_check(struct net_device *dev)

569

int netdev_boot_setup_check(struct net_device *dev)

570

{

570

{

571

struct netdev_boot_setup *s = dev_boot_setup;

571

struct netdev_boot_setup *s = dev_boot_setup;

572

int i;

572

int i;

573

574

for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {

574

for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {

575

if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&

575

if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&

576

!strcmp(dev->name, s[i].name)) {

576

!strcmp(dev->name, s[i].name)) {

577

dev->irq = s[i].map.irq;

577

dev->irq = s[i].map.irq;

578

dev->base_addr = s[i].map.base_addr;

578

dev->base_addr = s[i].map.base_addr;

579

dev->mem_start = s[i].map.mem_start;

579

dev->mem_start = s[i].map.mem_start;

580

dev->mem_end = s[i].map.mem_end;

580

dev->mem_end = s[i].map.mem_end;

581

return 1;

581

return 1;

582

}

582

}

583

}

583

}

584

return 0;

584

return 0;

585

}

585

}

586

EXPORT_SYMBOL(netdev_boot_setup_check);

586

EXPORT_SYMBOL(netdev_boot_setup_check);

587

588

589

/**

589

/**

590

* netdev_boot_base - get address from boot time settings

590

* netdev_boot_base - get address from boot time settings

591

* @prefix: prefix for network device

591

* @prefix: prefix for network device

592

* @unit: id for network device

592

* @unit: id for network device

593

*

593

*

594

* Check boot time settings for the base address of device.

594

* Check boot time settings for the base address of device.

595

* The found settings are set for the device to be used

595

* The found settings are set for the device to be used

596

* later in the device probing.

596

* later in the device probing.

597

* Returns 0 if no settings found.

597

* Returns 0 if no settings found.

598

*/

598

*/

599

unsigned long netdev_boot_base(const char *prefix, int unit)

599

unsigned long netdev_boot_base(const char *prefix, int unit)

600

{

600

{

601

const struct netdev_boot_setup *s = dev_boot_setup;

601

const struct netdev_boot_setup *s = dev_boot_setup;

602

char name[IFNAMSIZ];

602

char name[IFNAMSIZ];

603

int i;

603

int i;

604

605

sprintf(name, "%s%d", prefix, unit);

605

sprintf(name, "%s%d", prefix, unit);

606

607

/*

607

/*

608

* If device already registered then return base of 1

608

* If device already registered then return base of 1

609

* to indicate not to probe for this interface

609

* to indicate not to probe for this interface

610

*/

610

*/

611

if (__dev_get_by_name(&init_net, name))

611

if (__dev_get_by_name(&init_net, name))

612

return 1;

612

return 1;

613

614

for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)

614

for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)

615

if (!strcmp(name, s[i].name))

615

if (!strcmp(name, s[i].name))

616

return s[i].map.base_addr;

616

return s[i].map.base_addr;

617

return 0;

617

return 0;

618

}

618

}

619

620

/*

620

/*

621

* Saves at boot time configured settings for any netdevice.

621

* Saves at boot time configured settings for any netdevice.

622

*/

622

*/

623

int __init netdev_boot_setup(char *str)

623

int __init netdev_boot_setup(char *str)

624

{

624

{

625

int ints[5];

625

int ints[5];

626

struct ifmap map;

626

struct ifmap map;

627

628

str = get_options(str, ARRAY_SIZE(ints), ints);

628

str = get_options(str, ARRAY_SIZE(ints), ints);

629

if (!str || !*str)

629

if (!str || !*str)

630

return 0;

630

return 0;

631

632

/* Save settings */

632

/* Save settings */

633

memset(&map, 0, sizeof(map));

633

memset(&map, 0, sizeof(map));

634

if (ints[0] > 0)

634

if (ints[0] > 0)

635

map.irq = ints[1];

635

map.irq = ints[1];

636

if (ints[0] > 1)

636

if (ints[0] > 1)

637

map.base_addr = ints[2];

637

map.base_addr = ints[2];

638

if (ints[0] > 2)

638

if (ints[0] > 2)

639

map.mem_start = ints[3];

639

map.mem_start = ints[3];

640

if (ints[0] > 3)

640

if (ints[0] > 3)

641

map.mem_end = ints[4];

641

map.mem_end = ints[4];

642

643

/* Add new entry to the list */

643

/* Add new entry to the list */

644

return netdev_boot_setup_add(str, &map);

644

return netdev_boot_setup_add(str, &map);

645

}

645

}

646

647

__setup("netdev=", netdev_boot_setup);

647

__setup("netdev=", netdev_boot_setup);

648

649

/*******************************************************************************

649

/*******************************************************************************

650

651

Device Interface Subroutines

651

Device Interface Subroutines

652

653

*******************************************************************************/

653

*******************************************************************************/

654

655

/**

655

/**

656

* __dev_get_by_name - find a device by its name

656

* __dev_get_by_name - find a device by its name

657

* @net: the applicable net namespace

657

* @net: the applicable net namespace

658

* @name: name to find

658

* @name: name to find

659

*

659

*

660

* Find an interface by name. Must be called under RTNL semaphore

660

* Find an interface by name. Must be called under RTNL semaphore

661

* or @dev_base_lock. If the name is found a pointer to the device

661

* or @dev_base_lock. If the name is found a pointer to the device

662

* is returned. If the name is not found then %NULL is returned. The

662

* is returned. If the name is not found then %NULL is returned. The

663

* reference counters are not incremented so the caller must be

663

* reference counters are not incremented so the caller must be

664

* careful with locks.

664

* careful with locks.

665

*/

665

*/

666

667

struct net_device *__dev_get_by_name(struct net *net, const char *name)

667

struct net_device *__dev_get_by_name(struct net *net, const char *name)

668

{

668

{

669

struct net_device *dev;

669

struct net_device *dev;

670

struct hlist_head *head = dev_name_hash(net, name);

670

struct hlist_head *head = dev_name_hash(net, name);

671

672

hlist_for_each_entry(dev, head, name_hlist)

672

hlist_for_each_entry(dev, head, name_hlist)

673

if (!strncmp(dev->name, name, IFNAMSIZ))

673

if (!strncmp(dev->name, name, IFNAMSIZ))

674

return dev;

674

return dev;

675

676

return NULL;

676

return NULL;

677

}

677

}

678

EXPORT_SYMBOL(__dev_get_by_name);

678

EXPORT_SYMBOL(__dev_get_by_name);

679

680

/**

680

/**

681

* dev_get_by_name_rcu - find a device by its name

681

* dev_get_by_name_rcu - find a device by its name

682

* @net: the applicable net namespace

682

* @net: the applicable net namespace

683

* @name: name to find

683

* @name: name to find

684

*

684

*

685

* Find an interface by name.

685

* Find an interface by name.

686

* If the name is found a pointer to the device is returned.

686

* If the name is found a pointer to the device is returned.

687

* If the name is not found then %NULL is returned.

687

* If the name is not found then %NULL is returned.

688

* The reference counters are not incremented so the caller must be

688

* The reference counters are not incremented so the caller must be

689

* careful with locks. The caller must hold RCU lock.

689

* careful with locks. The caller must hold RCU lock.

690

*/

690

*/

691

692

struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)

692

struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)

693

{

693

{

694

struct net_device *dev;

694

struct net_device *dev;

695

struct hlist_head *head = dev_name_hash(net, name);

695

struct hlist_head *head = dev_name_hash(net, name);

696

697

hlist_for_each_entry_rcu(dev, head, name_hlist)

697

hlist_for_each_entry_rcu(dev, head, name_hlist)

698

if (!strncmp(dev->name, name, IFNAMSIZ))

698

if (!strncmp(dev->name, name, IFNAMSIZ))

699

return dev;

699

return dev;

700

701

return NULL;

701

return NULL;

702

}

702

}

703

EXPORT_SYMBOL(dev_get_by_name_rcu);

703

EXPORT_SYMBOL(dev_get_by_name_rcu);

704

705

/**

705

/**

706

* dev_get_by_name - find a device by its name

706

* dev_get_by_name - find a device by its name

707

* @net: the applicable net namespace

707

* @net: the applicable net namespace

708

* @name: name to find

708

* @name: name to find

709

*

709

*

710

* Find an interface by name. This can be called from any

710

* Find an interface by name. This can be called from any

711

* context and does its own locking. The returned handle has

711

* context and does its own locking. The returned handle has

712

* the usage count incremented and the caller must use dev_put() to

712

* the usage count incremented and the caller must use dev_put() to

713

* release it when it is no longer needed. %NULL is returned if no

713

* release it when it is no longer needed. %NULL is returned if no

714

* matching device is found.

714

* matching device is found.

715

*/

715

*/

716

717

struct net_device *dev_get_by_name(struct net *net, const char *name)

717

struct net_device *dev_get_by_name(struct net *net, const char *name)

718

{

718

{

719

struct net_device *dev;

719

struct net_device *dev;

720

721

rcu_read_lock();

721

rcu_read_lock();

722

dev = dev_get_by_name_rcu(net, name);

722

dev = dev_get_by_name_rcu(net, name);

723

if (dev)

723

if (dev)

724

dev_hold(dev);

724

dev_hold(dev);

725

rcu_read_unlock();

725

rcu_read_unlock();

726

return dev;

726

return dev;

727

}

727

}

728

EXPORT_SYMBOL(dev_get_by_name);

728

EXPORT_SYMBOL(dev_get_by_name);

729

730

/**

730

/**

731

* __dev_get_by_index - find a device by its ifindex

731

* __dev_get_by_index - find a device by its ifindex

732

* @net: the applicable net namespace

732

* @net: the applicable net namespace

733

* @ifindex: index of device

733

* @ifindex: index of device

734

*

734

*

735

* Search for an interface by index. Returns %NULL if the device

735

* Search for an interface by index. Returns %NULL if the device

736

* is not found or a pointer to the device. The device has not

736

* is not found or a pointer to the device. The device has not

737

* had its reference counter increased so the caller must be careful

737

* had its reference counter increased so the caller must be careful

738

* about locking. The caller must hold either the RTNL semaphore

738

* about locking. The caller must hold either the RTNL semaphore

739

* or @dev_base_lock.

739

* or @dev_base_lock.

740

*/

740

*/

741

742

struct net_device *__dev_get_by_index(struct net *net, int ifindex)

742

struct net_device *__dev_get_by_index(struct net *net, int ifindex)

743

{

743

{

744

struct net_device *dev;

744

struct net_device *dev;

745

struct hlist_head *head = dev_index_hash(net, ifindex);

745

struct hlist_head *head = dev_index_hash(net, ifindex);

746

747

hlist_for_each_entry(dev, head, index_hlist)

747

hlist_for_each_entry(dev, head, index_hlist)

748

if (dev->ifindex == ifindex)

748

if (dev->ifindex == ifindex)

749

return dev;

749

return dev;

750

751

return NULL;

751

return NULL;

752

}

752

}

753

EXPORT_SYMBOL(__dev_get_by_index);

753

EXPORT_SYMBOL(__dev_get_by_index);

754

755

/**

755

/**

756

* dev_get_by_index_rcu - find a device by its ifindex

756

* dev_get_by_index_rcu - find a device by its ifindex

757

* @net: the applicable net namespace

757

* @net: the applicable net namespace

758

* @ifindex: index of device

758

* @ifindex: index of device

759

*

759

*

760

* Search for an interface by index. Returns %NULL if the device

760

* Search for an interface by index. Returns %NULL if the device

761

* is not found or a pointer to the device. The device has not

761

* is not found or a pointer to the device. The device has not

762

* had its reference counter increased so the caller must be careful

762

* had its reference counter increased so the caller must be careful

763

* about locking. The caller must hold RCU lock.

763

* about locking. The caller must hold RCU lock.

764

*/

764

*/

765

766

struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)

766

struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)

767

{

767

{

768

struct net_device *dev;

768

struct net_device *dev;

769

struct hlist_head *head = dev_index_hash(net, ifindex);

769

struct hlist_head *head = dev_index_hash(net, ifindex);

770

771

hlist_for_each_entry_rcu(dev, head, index_hlist)

771

hlist_for_each_entry_rcu(dev, head, index_hlist)

772

if (dev->ifindex == ifindex)

772

if (dev->ifindex == ifindex)

773

return dev;

773

return dev;

774

775

return NULL;

775

return NULL;

776

}

776

}

777

EXPORT_SYMBOL(dev_get_by_index_rcu);

777

EXPORT_SYMBOL(dev_get_by_index_rcu);

778

779

780

/**

780

/**

781

* dev_get_by_index - find a device by its ifindex

781

* dev_get_by_index - find a device by its ifindex

782

* @net: the applicable net namespace

782

* @net: the applicable net namespace

783

* @ifindex: index of device

783

* @ifindex: index of device

784

*

784

*

785

* Search for an interface by index. Returns NULL if the device

785

* Search for an interface by index. Returns NULL if the device

786

* is not found or a pointer to the device. The device returned has

786

* is not found or a pointer to the device. The device returned has

787

* had a reference added and the pointer is safe until the user calls

787

* had a reference added and the pointer is safe until the user calls

788

* dev_put to indicate they have finished with it.

788

* dev_put to indicate they have finished with it.

789

*/

789

*/

790

791

struct net_device *dev_get_by_index(struct net *net, int ifindex)

791

struct net_device *dev_get_by_index(struct net *net, int ifindex)

792

{

792

{

793

struct net_device *dev;

793

struct net_device *dev;

794

795

rcu_read_lock();

795

rcu_read_lock();

796

dev = dev_get_by_index_rcu(net, ifindex);

796

dev = dev_get_by_index_rcu(net, ifindex);

797

if (dev)

797

if (dev)

798

dev_hold(dev);

798

dev_hold(dev);

799

rcu_read_unlock();

799

rcu_read_unlock();

800

return dev;

800

return dev;

801

}

801

}

802

EXPORT_SYMBOL(dev_get_by_index);

802

EXPORT_SYMBOL(dev_get_by_index);

803

804

/**

804

/**

805

* netdev_get_name - get a netdevice name, knowing its ifindex.

805

* netdev_get_name - get a netdevice name, knowing its ifindex.

806

* @net: network namespace

806

* @net: network namespace

807

* @name: a pointer to the buffer where the name will be stored.

807

* @name: a pointer to the buffer where the name will be stored.

808

* @ifindex: the ifindex of the interface to get the name from.

808

* @ifindex: the ifindex of the interface to get the name from.

809

*

809

*

810

* The use of raw_seqcount_begin() and cond_resched() before

810

* The use of raw_seqcount_begin() and cond_resched() before

811

* retrying is required as we want to give the writers a chance

811

* retrying is required as we want to give the writers a chance

812

* to complete when CONFIG_PREEMPT is not set.

812

* to complete when CONFIG_PREEMPT is not set.

813

*/

813

*/

814

int netdev_get_name(struct net *net, char *name, int ifindex)

814

int netdev_get_name(struct net *net, char *name, int ifindex)

815

{

815

{

816

struct net_device *dev;

816

struct net_device *dev;

817

unsigned int seq;

817

unsigned int seq;

818

819

retry:

819

retry:

820

seq = raw_seqcount_begin(&devnet_rename_seq);

820

seq = raw_seqcount_begin(&devnet_rename_seq);

821

rcu_read_lock();

821

rcu_read_lock();

822

dev = dev_get_by_index_rcu(net, ifindex);

822

dev = dev_get_by_index_rcu(net, ifindex);

823

if (!dev) {

823

if (!dev) {

824

rcu_read_unlock();

824

rcu_read_unlock();

825

return -ENODEV;

825

return -ENODEV;

826

}

826

}

827

828

strcpy(name, dev->name);

828

strcpy(name, dev->name);

829

rcu_read_unlock();

829

rcu_read_unlock();

830

if (read_seqcount_retry(&devnet_rename_seq, seq)) {

830

if (read_seqcount_retry(&devnet_rename_seq, seq)) {

831

cond_resched();

831

cond_resched();

832

goto retry;

832

goto retry;

833

}

833

}

834

835

return 0;

835

return 0;

836

}

836

}

837

838

/**

838

/**

839

* dev_getbyhwaddr_rcu - find a device by its hardware address

839

* dev_getbyhwaddr_rcu - find a device by its hardware address

840

* @net: the applicable net namespace

840

* @net: the applicable net namespace

841

* @type: media type of device

841

* @type: media type of device

842

* @ha: hardware address

842

* @ha: hardware address

843

*

843

*

844

* Search for an interface by MAC address. Returns NULL if the device

844

* Search for an interface by MAC address. Returns NULL if the device

845

* is not found or a pointer to the device.

845

* is not found or a pointer to the device.

846

* The caller must hold RCU or RTNL.

846

* The caller must hold RCU or RTNL.

847

* The returned device has not had its ref count increased

847

* The returned device has not had its ref count increased

848

* and the caller must therefore be careful about locking

848

* and the caller must therefore be careful about locking

849

*

849

*

850

*/

850

*/

851

852

struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,

852

struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,

853

const char *ha)

853

const char *ha)

854

{

854

{

855

struct net_device *dev;

855

struct net_device *dev;

856

857

for_each_netdev_rcu(net, dev)

857

for_each_netdev_rcu(net, dev)

858

if (dev->type == type &&

858

if (dev->type == type &&

859

!memcmp(dev->dev_addr, ha, dev->addr_len))

859

!memcmp(dev->dev_addr, ha, dev->addr_len))

860

return dev;

860

return dev;

861

862

return NULL;

862

return NULL;

863

}

863

}

864

EXPORT_SYMBOL(dev_getbyhwaddr_rcu);

864

EXPORT_SYMBOL(dev_getbyhwaddr_rcu);

865

866

struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)

866

struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)

867

{

867

{

868

struct net_device *dev;

868

struct net_device *dev;

869

870

ASSERT_RTNL();

870

ASSERT_RTNL();

871

for_each_netdev(net, dev)

871

for_each_netdev(net, dev)

872

if (dev->type == type)

872

if (dev->type == type)

873

return dev;

873

return dev;

874

875

return NULL;

875

return NULL;

876

}

876

}

877

EXPORT_SYMBOL(__dev_getfirstbyhwtype);

877

EXPORT_SYMBOL(__dev_getfirstbyhwtype);

878

879

struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)

879

struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)

880

{

880

{

881

struct net_device *dev, *ret = NULL;

881

struct net_device *dev, *ret = NULL;

882

883

rcu_read_lock();

883

rcu_read_lock();

884

for_each_netdev_rcu(net, dev)

884

for_each_netdev_rcu(net, dev)

885

if (dev->type == type) {

885

if (dev->type == type) {

886

dev_hold(dev);

886

dev_hold(dev);

887

ret = dev;

887

ret = dev;

888

break;

888

break;

889

}

889

}

890

rcu_read_unlock();

890

rcu_read_unlock();

891

return ret;

891

return ret;

892

}

892

}

893

EXPORT_SYMBOL(dev_getfirstbyhwtype);

893

EXPORT_SYMBOL(dev_getfirstbyhwtype);

894

895

/**

895

/**

896

* dev_get_by_flags_rcu - find any device with given flags

896

* dev_get_by_flags_rcu - find any device with given flags

897

* @net: the applicable net namespace

897

* @net: the applicable net namespace

898

* @if_flags: IFF_* values

898

* @if_flags: IFF_* values

899

* @mask: bitmask of bits in if_flags to check

899

* @mask: bitmask of bits in if_flags to check

900

*

900

*

901

* Search for any interface with the given flags. Returns NULL if a device

901

* Search for any interface with the given flags. Returns NULL if a device

902

* is not found or a pointer to the device. Must be called inside

902

* is not found or a pointer to the device. Must be called inside

903

* rcu_read_lock(), and result refcount is unchanged.

903

* rcu_read_lock(), and result refcount is unchanged.

904

*/

904

*/

905

906

struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,

906

struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,

907

unsigned short mask)

907

unsigned short mask)

908

{

908

{

909

struct net_device *dev, *ret;

909

struct net_device *dev, *ret;

910

911

ret = NULL;

911

ret = NULL;

912

for_each_netdev_rcu(net, dev) {

912

for_each_netdev_rcu(net, dev) {

913

if (((dev->flags ^ if_flags) & mask) == 0) {

913

if (((dev->flags ^ if_flags) & mask) == 0) {

914

ret = dev;

914

ret = dev;

915

break;

915

break;

916

}

916

}

917

}

917

}

918

return ret;

918

return ret;

919

}

919

}

920

EXPORT_SYMBOL(dev_get_by_flags_rcu);

920

EXPORT_SYMBOL(dev_get_by_flags_rcu);

921

922

/**

922

/**

923

* dev_valid_name - check if name is okay for network device

923

* dev_valid_name - check if name is okay for network device

924

* @name: name string

924

* @name: name string

925

*

925

*

926

* Network device names need to be valid file names to

926

* Network device names need to be valid file names to

927

* to allow sysfs to work. We also disallow any kind of

927

* to allow sysfs to work. We also disallow any kind of

928

* whitespace.

928

* whitespace.

929

*/

929

*/

930

bool dev_valid_name(const char *name)

930

bool dev_valid_name(const char *name)

931

{

931

{

932

if (*name == '\0')

932

if (*name == '\0')

933

return false;

933

return false;

934

if (strlen(name) >= IFNAMSIZ)

934

if (strlen(name) >= IFNAMSIZ)

935

return false;

935

return false;

936

if (!strcmp(name, ".") || !strcmp(name, ".."))

936

if (!strcmp(name, ".") || !strcmp(name, ".."))

937

return false;

937

return false;

938

939

while (*name) {

939

while (*name) {

940

if (*name == '/' || isspace(*name))

940

if (*name == '/' || isspace(*name))

941

return false;

941

return false;

942

name++;

942

name++;

943

}

943

}

944

return true;

944

return true;

945

}

945

}

946

EXPORT_SYMBOL(dev_valid_name);

946

EXPORT_SYMBOL(dev_valid_name);

947

948

/**

948

/**

949

* __dev_alloc_name - allocate a name for a device

949

* __dev_alloc_name - allocate a name for a device

950

* @net: network namespace to allocate the device name in

950

* @net: network namespace to allocate the device name in

951

* @name: name format string

951

* @name: name format string

952

* @buf: scratch buffer and result name string

952

* @buf: scratch buffer and result name string

953

*

953

*

954

* Passed a format string - eg "lt%d" it will try and find a suitable

954

* Passed a format string - eg "lt%d" it will try and find a suitable

955

* id. It scans list of devices to build up a free map, then chooses

955

* id. It scans list of devices to build up a free map, then chooses

956

* the first empty slot. The caller must hold the dev_base or rtnl lock

956

* the first empty slot. The caller must hold the dev_base or rtnl lock

957

* while allocating the name and adding the device in order to avoid

957

* while allocating the name and adding the device in order to avoid

958

* duplicates.

958

* duplicates.

959

* Limited to bits_per_byte * page size devices (ie 32K on most platforms).

959

* Limited to bits_per_byte * page size devices (ie 32K on most platforms).

960

* Returns the number of the unit assigned or a negative errno code.

960

* Returns the number of the unit assigned or a negative errno code.

961

*/

961

*/

962

963

static int __dev_alloc_name(struct net *net, const char *name, char *buf)

963

static int __dev_alloc_name(struct net *net, const char *name, char *buf)

964

{

964

{

965

int i = 0;

965

int i = 0;

966

const char *p;

966

const char *p;

967

const int max_netdevices = 8*PAGE_SIZE;

967

const int max_netdevices = 8*PAGE_SIZE;

968

unsigned long *inuse;

968

unsigned long *inuse;

969

struct net_device *d;

969

struct net_device *d;

970

971

p = strnchr(name, IFNAMSIZ-1, '%');

971

p = strnchr(name, IFNAMSIZ-1, '%');

972

if (p) {

972

if (p) {

973

/*

973

/*

974

* Verify the string as this thing may have come from

974

* Verify the string as this thing may have come from

975

* the user. There must be either one "%d" and no other "%"

975

* the user. There must be either one "%d" and no other "%"

976

* characters.

976

* characters.

977

*/

977

*/

978

if (p[1] != 'd' || strchr(p + 2, '%'))

978

if (p[1] != 'd' || strchr(p + 2, '%'))

979

return -EINVAL;

979

return -EINVAL;

980

981

/* Use one page as a bit array of possible slots */

981

/* Use one page as a bit array of possible slots */

982

inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);

982

inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);

983

if (!inuse)

983

if (!inuse)

984

return -ENOMEM;

984

return -ENOMEM;

985

986

for_each_netdev(net, d) {

986

for_each_netdev(net, d) {

987

if (!sscanf(d->name, name, &i))

987

if (!sscanf(d->name, name, &i))

988

continue;

988

continue;

989

if (i < 0 || i >= max_netdevices)

989

if (i < 0 || i >= max_netdevices)

990

continue;

990

continue;

991

992

/* avoid cases where sscanf is not exact inverse of printf */

992

/* avoid cases where sscanf is not exact inverse of printf */

993

snprintf(buf, IFNAMSIZ, name, i);

993

snprintf(buf, IFNAMSIZ, name, i);

994

if (!strncmp(buf, d->name, IFNAMSIZ))

994

if (!strncmp(buf, d->name, IFNAMSIZ))

995

set_bit(i, inuse);

995

set_bit(i, inuse);

996

}

996

}

997

998

i = find_first_zero_bit(inuse, max_netdevices);

998

i = find_first_zero_bit(inuse, max_netdevices);

999

free_page((unsigned long) inuse);

999

free_page((unsigned long) inuse);

1000

}

1000

}

1001

1002

if (buf != name)

1002

if (buf != name)

1003

snprintf(buf, IFNAMSIZ, name, i);

1003

snprintf(buf, IFNAMSIZ, name, i);

1004

if (!__dev_get_by_name(net, buf))

1004

if (!__dev_get_by_name(net, buf))

1005

return i;

1005

return i;

1006

1007

/* It is possible to run out of possible slots

1007

/* It is possible to run out of possible slots

1008

* when the name is long and there isn't enough space left

1008

* when the name is long and there isn't enough space left

1009

* for the digits, or if all bits are used.

1009

* for the digits, or if all bits are used.

1010

*/

1010

*/

1011

return -ENFILE;

1011

return -ENFILE;

1012

}

1012

}

1013

1014

/**

1014

/**

1015

* dev_alloc_name - allocate a name for a device

1015

* dev_alloc_name - allocate a name for a device

1016

* @dev: device

1016

* @dev: device

1017

* @name: name format string

1017

* @name: name format string

1018

*

1018

*

1019

* Passed a format string - eg "lt%d" it will try and find a suitable

1019

* Passed a format string - eg "lt%d" it will try and find a suitable

1020

* id. It scans list of devices to build up a free map, then chooses

1020

* id. It scans list of devices to build up a free map, then chooses

1021

* the first empty slot. The caller must hold the dev_base or rtnl lock

1021

* the first empty slot. The caller must hold the dev_base or rtnl lock

1022

* while allocating the name and adding the device in order to avoid

1022

* while allocating the name and adding the device in order to avoid

1023

* duplicates.

1023

* duplicates.

1024

* Limited to bits_per_byte * page size devices (ie 32K on most platforms).

1024

* Limited to bits_per_byte * page size devices (ie 32K on most platforms).

1025

* Returns the number of the unit assigned or a negative errno code.

1025

* Returns the number of the unit assigned or a negative errno code.

1026

*/

1026

*/

1027

1028

int dev_alloc_name(struct net_device *dev, const char *name)

1028

int dev_alloc_name(struct net_device *dev, const char *name)

1029

{

1029

{

1030

char buf[IFNAMSIZ];

1030

char buf[IFNAMSIZ];

1031

struct net *net;

1031

struct net *net;

1032

int ret;

1032

int ret;

1033

1034

BUG_ON(!dev_net(dev));

1034

BUG_ON(!dev_net(dev));

1035

net = dev_net(dev);

1035

net = dev_net(dev);

1036

ret = __dev_alloc_name(net, name, buf);

1036

ret = __dev_alloc_name(net, name, buf);

1037

if (ret >= 0)

1037

if (ret >= 0)

1038

strlcpy(dev->name, buf, IFNAMSIZ);

1038

strlcpy(dev->name, buf, IFNAMSIZ);

1039

return ret;

1039

return ret;

1040

}

1040

}

1041

EXPORT_SYMBOL(dev_alloc_name);

1041

EXPORT_SYMBOL(dev_alloc_name);

1042

1043

static int dev_alloc_name_ns(struct net *net,

1043

static int dev_alloc_name_ns(struct net *net,

1044

struct net_device *dev,

1044

struct net_device *dev,

1045

const char *name)

1045

const char *name)

1046

{

1046

{

1047

char buf[IFNAMSIZ];

1047

char buf[IFNAMSIZ];

1048

int ret;

1048

int ret;

1049

1050

ret = __dev_alloc_name(net, name, buf);

1050

ret = __dev_alloc_name(net, name, buf);

1051

if (ret >= 0)

1051

if (ret >= 0)

1052

strlcpy(dev->name, buf, IFNAMSIZ);

1052

strlcpy(dev->name, buf, IFNAMSIZ);

1053

return ret;

1053

return ret;

1054

}

1054

}

1055

1056

static int dev_get_valid_name(struct net *net,

1056

static int dev_get_valid_name(struct net *net,

1057

struct net_device *dev,

1057

struct net_device *dev,

1058

const char *name)

1058

const char *name)

1059

{

1059

{

1060

BUG_ON(!net);

1060

BUG_ON(!net);

1061

1062

if (!dev_valid_name(name))

1062

if (!dev_valid_name(name))

1063

return -EINVAL;

1063

return -EINVAL;

1064

1065

if (strchr(name, '%'))

1065

if (strchr(name, '%'))

1066

return dev_alloc_name_ns(net, dev, name);

1066

return dev_alloc_name_ns(net, dev, name);

1067

else if (__dev_get_by_name(net, name))

1067

else if (__dev_get_by_name(net, name))

1068

return -EEXIST;

1068

return -EEXIST;

1069

else if (dev->name != name)

1069

else if (dev->name != name)

1070

strlcpy(dev->name, name, IFNAMSIZ);

1070

strlcpy(dev->name, name, IFNAMSIZ);

1071

1072

return 0;

1072

return 0;

1073

}

1073

}

1074

1075

/**

1075

/**

1076

* dev_change_name - change name of a device

1076

* dev_change_name - change name of a device

1077

* @dev: device

1077

* @dev: device

1078

* @newname: name (or format string) must be at least IFNAMSIZ

1078

* @newname: name (or format string) must be at least IFNAMSIZ

1079

*

1079

*

1080

* Change name of a device, can pass format strings "eth%d".

1080

* Change name of a device, can pass format strings "eth%d".

1081

* for wildcarding.

1081

* for wildcarding.

1082

*/

1082

*/

1083

int dev_change_name(struct net_device *dev, const char *newname)

1083

int dev_change_name(struct net_device *dev, const char *newname)

1084

{

1084

{

1085

char oldname[IFNAMSIZ];

1085

char oldname[IFNAMSIZ];

1086

int err = 0;

1086

int err = 0;

1087

int ret;

1087

int ret;

1088

struct net *net;

1088

struct net *net;

1089

1090

ASSERT_RTNL();

1090

ASSERT_RTNL();

1091

BUG_ON(!dev_net(dev));

1091

BUG_ON(!dev_net(dev));

1092

1093

net = dev_net(dev);

1093

net = dev_net(dev);

1094

if (dev->flags & IFF_UP)

1094

if (dev->flags & IFF_UP)

1095

return -EBUSY;

1095

return -EBUSY;

1096

1097

write_seqcount_begin(&devnet_rename_seq);

1097

write_seqcount_begin(&devnet_rename_seq);

1098

1099

if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {

1099

if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {

1100

write_seqcount_end(&devnet_rename_seq);

1100

write_seqcount_end(&devnet_rename_seq);

1101

return 0;

1101

return 0;

1102

}

1102

}

1103

1104

memcpy(oldname, dev->name, IFNAMSIZ);

1104

memcpy(oldname, dev->name, IFNAMSIZ);

1105

1106

err = dev_get_valid_name(net, dev, newname);

1106

err = dev_get_valid_name(net, dev, newname);

1107

if (err < 0) {

1107

if (err < 0) {

1108

write_seqcount_end(&devnet_rename_seq);

1108

write_seqcount_end(&devnet_rename_seq);

1109

return err;

1109

return err;

1110

}

1110

}

1111

1112

rollback:

1112

rollback:

1113

ret = device_rename(&dev->dev, dev->name);

1113

ret = device_rename(&dev->dev, dev->name);

1114

if (ret) {

1114

if (ret) {

1115

memcpy(dev->name, oldname, IFNAMSIZ);

1115

memcpy(dev->name, oldname, IFNAMSIZ);

1116

write_seqcount_end(&devnet_rename_seq);

1116

write_seqcount_end(&devnet_rename_seq);

1117

return ret;

1117

return ret;

1118

}

1118

}

1119

1120

write_seqcount_end(&devnet_rename_seq);

1120

write_seqcount_end(&devnet_rename_seq);

1121

1122

netdev_adjacent_rename_links(dev, oldname);

1122

netdev_adjacent_rename_links(dev, oldname);

1123

1124

write_lock_bh(&dev_base_lock);

1124

write_lock_bh(&dev_base_lock);

1125

hlist_del_rcu(&dev->name_hlist);

1125

hlist_del_rcu(&dev->name_hlist);

1126

write_unlock_bh(&dev_base_lock);

1126

write_unlock_bh(&dev_base_lock);

1127

1128

synchronize_rcu();

1128

synchronize_rcu();

1129

1130

write_lock_bh(&dev_base_lock);

1130

write_lock_bh(&dev_base_lock);

1131

hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));

1131

hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));

1132

write_unlock_bh(&dev_base_lock);

1132

write_unlock_bh(&dev_base_lock);

1133

1134

ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);

1134

ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);

1135

ret = notifier_to_errno(ret);

1135

ret = notifier_to_errno(ret);

1136

1137

if (ret) {

1137

if (ret) {

1138

/* err >= 0 after dev_alloc_name() or stores the first errno */

1138

/* err >= 0 after dev_alloc_name() or stores the first errno */

1139

if (err >= 0) {

1139

if (err >= 0) {

1140

err = ret;

1140

err = ret;

1141

write_seqcount_begin(&devnet_rename_seq);

1141

write_seqcount_begin(&devnet_rename_seq);

1142

memcpy(dev->name, oldname, IFNAMSIZ);

1142

memcpy(dev->name, oldname, IFNAMSIZ);

1143

memcpy(oldname, newname, IFNAMSIZ);

1143

memcpy(oldname, newname, IFNAMSIZ);

1144

goto rollback;

1144

goto rollback;

1145

} else {

1145

} else {

1146

pr_err("%s: name change rollback failed: %d\n",

1146

pr_err("%s: name change rollback failed: %d\n",

1147

dev->name, ret);

1147

dev->name, ret);

1148

}

1148

}

1149

}

1149

}

1150

1151

return err;

1151

return err;

1152

}

1152

}

1153

1154

/**

1154

/**

1155

* dev_set_alias - change ifalias of a device

1155

* dev_set_alias - change ifalias of a device

1156

* @dev: device

1156

* @dev: device

1157

* @alias: name up to IFALIASZ

1157

* @alias: name up to IFALIASZ

1158

* @len: limit of bytes to copy from info

1158

* @len: limit of bytes to copy from info

1159

*

1159

*

1160

* Set ifalias for a device,

1160

* Set ifalias for a device,

1161

*/

1161

*/

1162

int dev_set_alias(struct net_device *dev, const char *alias, size_t len)

1162

int dev_set_alias(struct net_device *dev, const char *alias, size_t len)

1163

{

1163

{

1164

char *new_ifalias;

1164

char *new_ifalias;

1165

1166

ASSERT_RTNL();

1166

ASSERT_RTNL();

1167

1168

if (len >= IFALIASZ)

1168

if (len >= IFALIASZ)

1169

return -EINVAL;

1169

return -EINVAL;

1170

1171

if (!len) {

1171

if (!len) {

1172

kfree(dev->ifalias);

1172

kfree(dev->ifalias);

1173

dev->ifalias = NULL;

1173

dev->ifalias = NULL;

1174

return 0;

1174

return 0;

1175

}

1175

}

1176

1177

new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);

1177

new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);

1178

if (!new_ifalias)

1178

if (!new_ifalias)

1179

return -ENOMEM;

1179

return -ENOMEM;

1180

dev->ifalias = new_ifalias;

1180

dev->ifalias = new_ifalias;

1181

1182

strlcpy(dev->ifalias, alias, len+1);

1182

strlcpy(dev->ifalias, alias, len+1);

1183

return len;

1183

return len;

1184

}

1184

}

1185

1186

1187

/**

1187

/**

1188

* netdev_features_change - device changes features

1188

* netdev_features_change - device changes features

1189

* @dev: device to cause notification

1189

* @dev: device to cause notification

1190

*

1190

*

1191

* Called to indicate a device has changed features.

1191

* Called to indicate a device has changed features.

1192

*/

1192

*/

1193

void netdev_features_change(struct net_device *dev)

1193

void netdev_features_change(struct net_device *dev)

1194

{

1194

{

1195

call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);

1195

call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);

1196

}

1196

}

1197

EXPORT_SYMBOL(netdev_features_change);

1197

EXPORT_SYMBOL(netdev_features_change);

1198

1199

/**

1199

/**

1200

* netdev_state_change - device changes state

1200

* netdev_state_change - device changes state

1201

* @dev: device to cause notification

1201

* @dev: device to cause notification

1202

*

1202

*

1203

* Called to indicate a device has changed state. This function calls

1203

* Called to indicate a device has changed state. This function calls

1204

* the notifier chains for netdev_chain and sends a NEWLINK message

1204

* the notifier chains for netdev_chain and sends a NEWLINK message

1205

* to the routing socket.

1205

* to the routing socket.

1206

*/

1206

*/

1207

void netdev_state_change(struct net_device *dev)

1207

void netdev_state_change(struct net_device *dev)

1208

{

1208

{

1209

if (dev->flags & IFF_UP) {

1209

if (dev->flags & IFF_UP) {

1210

call_netdevice_notifiers(NETDEV_CHANGE, dev);

1210

call_netdevice_notifiers(NETDEV_CHANGE, dev);

1211

rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);

1211

rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);

1212

}

1212

}

1213

}

1213

}

1214

EXPORT_SYMBOL(netdev_state_change);

1214

EXPORT_SYMBOL(netdev_state_change);

1215

1216

/**

1216

/**

1217

* netdev_notify_peers - notify network peers about existence of @dev

1217

* netdev_notify_peers - notify network peers about existence of @dev

1218

* @dev: network device

1218

* @dev: network device

1219

*

1219

*

1220

* Generate traffic such that interested network peers are aware of

1220

* Generate traffic such that interested network peers are aware of

1221

* @dev, such as by generating a gratuitous ARP. This may be used when

1221

* @dev, such as by generating a gratuitous ARP. This may be used when

1222

* a device wants to inform the rest of the network about some sort of

1222

* a device wants to inform the rest of the network about some sort of

1223

* reconfiguration such as a failover event or virtual machine

1223

* reconfiguration such as a failover event or virtual machine

1224

* migration.

1224

* migration.

1225

*/

1225

*/

1226

void netdev_notify_peers(struct net_device *dev)

1226

void netdev_notify_peers(struct net_device *dev)

1227

{

1227

{

1228

rtnl_lock();

1228

rtnl_lock();

1229

call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);

1229

call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);

1230

rtnl_unlock();

1230

rtnl_unlock();

1231

}

1231

}

1232

EXPORT_SYMBOL(netdev_notify_peers);

1232

EXPORT_SYMBOL(netdev_notify_peers);

1233

1234

static int __dev_open(struct net_device *dev)

1234

static int __dev_open(struct net_device *dev)

1235

{

1235

{

1236

const struct net_device_ops *ops = dev->netdev_ops;

1236

const struct net_device_ops *ops = dev->netdev_ops;

1237

int ret;

1237

int ret;

1238

1239

ASSERT_RTNL();

1239

ASSERT_RTNL();

1240

1241

if (!netif_device_present(dev))

1241

if (!netif_device_present(dev))

1242

return -ENODEV;

1242

return -ENODEV;

1243

1244

/* Block netpoll from trying to do any rx path servicing.

1244

/* Block netpoll from trying to do any rx path servicing.

1245

* If we don't do this there is a chance ndo_poll_controller

1245

* If we don't do this there is a chance ndo_poll_controller

1246

* or ndo_poll may be running while we open the device

1246

* or ndo_poll may be running while we open the device

1247

*/

1247

*/

1248

netpoll_rx_disable(dev);

1248

netpoll_rx_disable(dev);

1249

1250

ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);

1250

ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);

1251

ret = notifier_to_errno(ret);

1251

ret = notifier_to_errno(ret);

1252

if (ret)

1252

if (ret)

1253

return ret;

1253

return ret;

1254

1255

set_bit(__LINK_STATE_START, &dev->state);

1255

set_bit(__LINK_STATE_START, &dev->state);

1256

1257

if (ops->ndo_validate_addr)

1257

if (ops->ndo_validate_addr)

1258

ret = ops->ndo_validate_addr(dev);

1258

ret = ops->ndo_validate_addr(dev);

1259

1260

if (!ret && ops->ndo_open)

1260

if (!ret && ops->ndo_open)

1261

ret = ops->ndo_open(dev);

1261

ret = ops->ndo_open(dev);

1262

1263

netpoll_rx_enable(dev);

1263

netpoll_rx_enable(dev);

1264

1265

if (ret)

1265

if (ret)

1266

clear_bit(__LINK_STATE_START, &dev->state);

1266

clear_bit(__LINK_STATE_START, &dev->state);

1267

else {

1267

else {

1268

dev->flags |= IFF_UP;

1268

dev->flags |= IFF_UP;

1269

net_dmaengine_get();

1269

net_dmaengine_get();

1270

dev_set_rx_mode(dev);

1270

dev_set_rx_mode(dev);

1271

dev_activate(dev);

1271

dev_activate(dev);

1272

add_device_randomness(dev->dev_addr, dev->addr_len);

1272

add_device_randomness(dev->dev_addr, dev->addr_len);

1273

}

1273

}

1274

1275

return ret;

1275

return ret;

1276

}

1276

}

1277

1278

/**

1278

/**

1279

* dev_open - prepare an interface for use.

1279

* dev_open - prepare an interface for use.

1280

* @dev: device to open

1280

* @dev: device to open

1281

*

1281

*

1282

* Takes a device from down to up state. The device's private open

1282

* Takes a device from down to up state. The device's private open

1283

* function is invoked and then the multicast lists are loaded. Finally

1283

* function is invoked and then the multicast lists are loaded. Finally

1284

* the device is moved into the up state and a %NETDEV_UP message is

1284

* the device is moved into the up state and a %NETDEV_UP message is

1285

* sent to the netdev notifier chain.

1285

* sent to the netdev notifier chain.

1286

*

1286

*

1287

* Calling this function on an active interface is a nop. On a failure

1287

* Calling this function on an active interface is a nop. On a failure

1288

* a negative errno code is returned.

1288

* a negative errno code is returned.

1289

*/

1289

*/

1290

int dev_open(struct net_device *dev)

1290

int dev_open(struct net_device *dev)

1291

{

1291

{

1292

int ret;

1292

int ret;

1293

1294

if (dev->flags & IFF_UP)

1294

if (dev->flags & IFF_UP)

1295

return 0;

1295

return 0;

1296

1297

ret = __dev_open(dev);

1297

ret = __dev_open(dev);

1298

if (ret < 0)

1298

if (ret < 0)

1299

return ret;

1299

return ret;

1300

1301

rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);

1301

rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);

1302

call_netdevice_notifiers(NETDEV_UP, dev);

1302

call_netdevice_notifiers(NETDEV_UP, dev);

1303

1304

return ret;

1304

return ret;

1305

}

1305

}

1306

EXPORT_SYMBOL(dev_open);

1306

EXPORT_SYMBOL(dev_open);

1307

1308

static int __dev_close_many(struct list_head *head)

1308

static int __dev_close_many(struct list_head *head)

1309

{

1309

{

1310

struct net_device *dev;

1310

struct net_device *dev;

1311

1312

ASSERT_RTNL();

1312

ASSERT_RTNL();

1313

might_sleep();

1313

might_sleep();

1314

1315

list_for_each_entry(dev, head, close_list) {

1315

list_for_each_entry(dev, head, close_list) {

1316

call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);

1316

call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);

1317

1318

clear_bit(__LINK_STATE_START, &dev->state);

1318

clear_bit(__LINK_STATE_START, &dev->state);

1319

1320

/* Synchronize to scheduled poll. We cannot touch poll list, it

1320

/* Synchronize to scheduled poll. We cannot touch poll list, it

1321

* can be even on different cpu. So just clear netif_running().

1321

* can be even on different cpu. So just clear netif_running().

1322

*

1322

*

1323

* dev->stop() will invoke napi_disable() on all of it's

1323

* dev->stop() will invoke napi_disable() on all of it's

1324

* napi_struct instances on this device.

1324

* napi_struct instances on this device.

1325

*/

1325

*/

1326

smp_mb__after_clear_bit(); /* Commit netif_running(). */

1326

smp_mb__after_clear_bit(); /* Commit netif_running(). */

1327

}

1327

}

1328

1329

dev_deactivate_many(head);

1329

dev_deactivate_many(head);

1330

1331

list_for_each_entry(dev, head, close_list) {

1331

list_for_each_entry(dev, head, close_list) {

1332

const struct net_device_ops *ops = dev->netdev_ops;

1332

const struct net_device_ops *ops = dev->netdev_ops;

1333

1334

/*

1334

/*

1335

* Call the device specific close. This cannot fail.

1335

* Call the device specific close. This cannot fail.

1336

* Only if device is UP

1336

* Only if device is UP

1337

*

1337

*

1338

* We allow it to be called even after a DETACH hot-plug

1338

* We allow it to be called even after a DETACH hot-plug

1339

* event.

1339

* event.

1340

*/

1340

*/

1341

if (ops->ndo_stop)

1341

if (ops->ndo_stop)

1342

ops->ndo_stop(dev);

1342

ops->ndo_stop(dev);

1343

1344

dev->flags &= ~IFF_UP;

1344

dev->flags &= ~IFF_UP;

1345

net_dmaengine_put();

1345

net_dmaengine_put();

1346

}

1346

}

1347

1348

return 0;

1348

return 0;

1349

}

1349

}

1350

1351

static int __dev_close(struct net_device *dev)

1351

static int __dev_close(struct net_device *dev)

1352

{

1352

{

1353

int retval;

1353

int retval;

1354

LIST_HEAD(single);

1354

LIST_HEAD(single);

1355

1356

/* Temporarily disable netpoll until the interface is down */

1356

/* Temporarily disable netpoll until the interface is down */

1357

netpoll_rx_disable(dev);

1357

netpoll_rx_disable(dev);

1358

1359

list_add(&dev->close_list, &single);

1359

list_add(&dev->close_list, &single);

1360

retval = __dev_close_many(&single);

1360

retval = __dev_close_many(&single);

1361

list_del(&single);

1361

list_del(&single);

1362

1363

netpoll_rx_enable(dev);

1363

netpoll_rx_enable(dev);

1364

return retval;

1364

return retval;

1365

}

1365

}

1366

1367

static int dev_close_many(struct list_head *head)

1367

static int dev_close_many(struct list_head *head)

1368

{

1368

{

1369

struct net_device *dev, *tmp;

1369

struct net_device *dev, *tmp;

1370

1371

/* Remove the devices that don't need to be closed */

1371

/* Remove the devices that don't need to be closed */

1372

list_for_each_entry_safe(dev, tmp, head, close_list)

1372

list_for_each_entry_safe(dev, tmp, head, close_list)

1373

if (!(dev->flags & IFF_UP))

1373

if (!(dev->flags & IFF_UP))

1374

list_del_init(&dev->close_list);

1374

list_del_init(&dev->close_list);

1375

1376

__dev_close_many(head);

1376

__dev_close_many(head);

1377

1378

list_for_each_entry_safe(dev, tmp, head, close_list) {

1378

list_for_each_entry_safe(dev, tmp, head, close_list) {

1379

rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);

1379

rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);

1380

call_netdevice_notifiers(NETDEV_DOWN, dev);

1380

call_netdevice_notifiers(NETDEV_DOWN, dev);

1381

list_del_init(&dev->close_list);

1381

list_del_init(&dev->close_list);

1382

}

1382

}

1383

1384

return 0;

1384

return 0;

1385

}

1385

}

1386

1387

/**

1387

/**

1388

* dev_close - shutdown an interface.

1388

* dev_close - shutdown an interface.

1389

* @dev: device to shutdown

1389

* @dev: device to shutdown

1390

*

1390

*

1391

* This function moves an active device into down state. A

1391

* This function moves an active device into down state. A

1392

* %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device

1392

* %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device

1393

* is then deactivated and finally a %NETDEV_DOWN is sent to the notifier

1393

* is then deactivated and finally a %NETDEV_DOWN is sent to the notifier

1394

* chain.

1394

* chain.

1395

*/

1395

*/

1396

int dev_close(struct net_device *dev)

1396

int dev_close(struct net_device *dev)

1397

{

1397

{

1398

if (dev->flags & IFF_UP) {

1398

if (dev->flags & IFF_UP) {

1399

LIST_HEAD(single);

1399

LIST_HEAD(single);

1400

1401

/* Block netpoll rx while the interface is going down */

1401

/* Block netpoll rx while the interface is going down */

1402

netpoll_rx_disable(dev);

1402

netpoll_rx_disable(dev);

1403

1404

list_add(&dev->close_list, &single);

1404

list_add(&dev->close_list, &single);

1405

dev_close_many(&single);

1405

dev_close_many(&single);

1406

list_del(&single);

1406

list_del(&single);

1407

1408

netpoll_rx_enable(dev);

1408

netpoll_rx_enable(dev);

1409

}

1409

}

1410

return 0;

1410

return 0;

1411

}

1411

}

1412

EXPORT_SYMBOL(dev_close);

1412

EXPORT_SYMBOL(dev_close);

1413

1414

1415

/**

1415

/**

1416

* dev_disable_lro - disable Large Receive Offload on a device

1416

* dev_disable_lro - disable Large Receive Offload on a device

1417

* @dev: device

1417

* @dev: device

1418

*

1418

*

1419

* Disable Large Receive Offload (LRO) on a net device. Must be

1419

* Disable Large Receive Offload (LRO) on a net device. Must be

1420

* called under RTNL. This is needed if received packets may be

1420

* called under RTNL. This is needed if received packets may be

1421

* forwarded to another interface.

1421

* forwarded to another interface.

1422

*/

1422

*/

1423

void dev_disable_lro(struct net_device *dev)

1423

void dev_disable_lro(struct net_device *dev)

1424

{

1424

{

1425

/*

1425

/*

1426

* If we're trying to disable lro on a vlan device

1426

* If we're trying to disable lro on a vlan device

1427

* use the underlying physical device instead

1427

* use the underlying physical device instead

1428

*/

1428

*/

1429

if (is_vlan_dev(dev))

1429

if (is_vlan_dev(dev))

1430

dev = vlan_dev_real_dev(dev);

1430

dev = vlan_dev_real_dev(dev);

1431

1432

/* the same for macvlan devices */

1432

/* the same for macvlan devices */

1433

if (netif_is_macvlan(dev))

1433

if (netif_is_macvlan(dev))

1434

dev = macvlan_dev_real_dev(dev);

1434

dev = macvlan_dev_real_dev(dev);

1435

1436

dev->wanted_features &= ~NETIF_F_LRO;

1436

dev->wanted_features &= ~NETIF_F_LRO;

1437

netdev_update_features(dev);

1437

netdev_update_features(dev);

1438

1439

if (unlikely(dev->features & NETIF_F_LRO))

1439

if (unlikely(dev->features & NETIF_F_LRO))

1440

netdev_WARN(dev, "failed to disable LRO!\n");

1440

netdev_WARN(dev, "failed to disable LRO!\n");

1441

}

1441

}

1442

EXPORT_SYMBOL(dev_disable_lro);

1442

EXPORT_SYMBOL(dev_disable_lro);

1443

1444

static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,

1444

static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,

1445

struct net_device *dev)

1445

struct net_device *dev)

1446

{

1446

{

1447

struct netdev_notifier_info info;

1447

struct netdev_notifier_info info;

1448

1449

netdev_notifier_info_init(&info, dev);

1449

netdev_notifier_info_init(&info, dev);

1450

return nb->notifier_call(nb, val, &info);

1450

return nb->notifier_call(nb, val, &info);

1451

}

1451

}

1452

1453

static int dev_boot_phase = 1;

1453

static int dev_boot_phase = 1;

1454

1455

/**

1455

/**

1456

* register_netdevice_notifier - register a network notifier block

1456

* register_netdevice_notifier - register a network notifier block

1457

* @nb: notifier

1457

* @nb: notifier

1458

*

1458

*

1459

* Register a notifier to be called when network device events occur.

1459

* Register a notifier to be called when network device events occur.

1460

* The notifier passed is linked into the kernel structures and must

1460

* The notifier passed is linked into the kernel structures and must

1461

* not be reused until it has been unregistered. A negative errno code

1461

* not be reused until it has been unregistered. A negative errno code

1462

* is returned on a failure.

1462

* is returned on a failure.

1463

*

1463

*

1464

* When registered all registration and up events are replayed

1464

* When registered all registration and up events are replayed

1465

* to the new notifier to allow device to have a race free

1465

* to the new notifier to allow device to have a race free

1466

* view of the network device list.

1466

* view of the network device list.

1467

*/

1467

*/

1468

1469

int register_netdevice_notifier(struct notifier_block *nb)

1469

int register_netdevice_notifier(struct notifier_block *nb)

1470

{

1470

{

1471

struct net_device *dev;

1471

struct net_device *dev;

1472

struct net_device *last;

1472

struct net_device *last;

1473

struct net *net;

1473

struct net *net;

1474

int err;

1474

int err;

1475

1476

rtnl_lock();

1476

rtnl_lock();

1477

err = raw_notifier_chain_register(&netdev_chain, nb);

1477

err = raw_notifier_chain_register(&netdev_chain, nb);

1478

if (err)

1478

if (err)

1479

goto unlock;

1479

goto unlock;

1480

if (dev_boot_phase)

1480

if (dev_boot_phase)

1481

goto unlock;

1481

goto unlock;

1482

for_each_net(net) {

1482

for_each_net(net) {

1483

for_each_netdev(net, dev) {

1483

for_each_netdev(net, dev) {

1484

err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);

1484

err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);

1485

err = notifier_to_errno(err);

1485

err = notifier_to_errno(err);

1486

if (err)

1486

if (err)

1487

goto rollback;

1487

goto rollback;

1488

1489

if (!(dev->flags & IFF_UP))

1489

if (!(dev->flags & IFF_UP))

1490

continue;

1490

continue;

1491

1492

call_netdevice_notifier(nb, NETDEV_UP, dev);

1492

call_netdevice_notifier(nb, NETDEV_UP, dev);

1493

}

1493

}

1494

}

1494

}

1495

1496

unlock:

1496

unlock:

1497

rtnl_unlock();

1497

rtnl_unlock();

1498

return err;

1498

return err;

1499

1500

rollback:

1500

rollback:

1501

last = dev;

1501

last = dev;

1502

for_each_net(net) {

1502

for_each_net(net) {

1503

for_each_netdev(net, dev) {

1503

for_each_netdev(net, dev) {

1504

if (dev == last)

1504

if (dev == last)

1505

goto outroll;

1505

goto outroll;

1506

1507

if (dev->flags & IFF_UP) {

1507

if (dev->flags & IFF_UP) {

1508

call_netdevice_notifier(nb, NETDEV_GOING_DOWN,

1508

call_netdevice_notifier(nb, NETDEV_GOING_DOWN,

1509

dev);

1509

dev);

1510

call_netdevice_notifier(nb, NETDEV_DOWN, dev);

1510

call_netdevice_notifier(nb, NETDEV_DOWN, dev);

1511

}

1511

}

1512

call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);

1512

call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);

1513

}

1513

}

1514

}

1514

}

1515

1516

outroll:

1516

outroll:

1517

raw_notifier_chain_unregister(&netdev_chain, nb);

1517

raw_notifier_chain_unregister(&netdev_chain, nb);

1518

goto unlock;

1518

goto unlock;

1519

}

1519

}

1520

EXPORT_SYMBOL(register_netdevice_notifier);

1520

EXPORT_SYMBOL(register_netdevice_notifier);

1521

1522

/**

1522

/**

1523

* unregister_netdevice_notifier - unregister a network notifier block

1523

* unregister_netdevice_notifier - unregister a network notifier block

1524

* @nb: notifier

1524

* @nb: notifier

1525

*

1525

*

1526

* Unregister a notifier previously registered by

1526

* Unregister a notifier previously registered by

1527

* register_netdevice_notifier(). The notifier is unlinked into the

1527

* register_netdevice_notifier(). The notifier is unlinked into the

1528

* kernel structures and may then be reused. A negative errno code

1528

* kernel structures and may then be reused. A negative errno code

1529

* is returned on a failure.

1529

* is returned on a failure.

1530

*

1530

*

1531

* After unregistering unregister and down device events are synthesized

1531

* After unregistering unregister and down device events are synthesized

1532

* for all devices on the device list to the removed notifier to remove

1532

* for all devices on the device list to the removed notifier to remove

1533

* the need for special case cleanup code.

1533

* the need for special case cleanup code.

1534

*/

1534

*/

1535

1536

int unregister_netdevice_notifier(struct notifier_block *nb)

1536

int unregister_netdevice_notifier(struct notifier_block *nb)

1537

{

1537

{

1538

struct net_device *dev;

1538

struct net_device *dev;

1539

struct net *net;

1539

struct net *net;

1540

int err;

1540

int err;

1541

1542

rtnl_lock();

1542

rtnl_lock();

1543

err = raw_notifier_chain_unregister(&netdev_chain, nb);

1543

err = raw_notifier_chain_unregister(&netdev_chain, nb);

1544

if (err)

1544

if (err)

1545

goto unlock;

1545

goto unlock;

1546

1547

for_each_net(net) {

1547

for_each_net(net) {

1548

for_each_netdev(net, dev) {

1548

for_each_netdev(net, dev) {

1549

if (dev->flags & IFF_UP) {

1549

if (dev->flags & IFF_UP) {

1550

call_netdevice_notifier(nb, NETDEV_GOING_DOWN,

1550

call_netdevice_notifier(nb, NETDEV_GOING_DOWN,

1551

dev);

1551

dev);

1552

call_netdevice_notifier(nb, NETDEV_DOWN, dev);

1552

call_netdevice_notifier(nb, NETDEV_DOWN, dev);

1553

}

1553

}

1554

call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);

1554

call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);

1555

}

1555

}

1556

}

1556

}

1557

unlock:

1557

unlock:

1558

rtnl_unlock();

1558

rtnl_unlock();

1559

return err;

1559

return err;

1560

}

1560

}

1561

EXPORT_SYMBOL(unregister_netdevice_notifier);

1561

EXPORT_SYMBOL(unregister_netdevice_notifier);

1562

1563

/**

1563

/**

1564

* call_netdevice_notifiers_info - call all network notifier blocks

1564

* call_netdevice_notifiers_info - call all network notifier blocks

1565

* @val: value passed unmodified to notifier function

1565

* @val: value passed unmodified to notifier function

1566

* @dev: net_device pointer passed unmodified to notifier function

1566

* @dev: net_device pointer passed unmodified to notifier function

1567

* @info: notifier information data

1567

* @info: notifier information data

1568

*

1568

*

1569

* Call all network notifier blocks. Parameters and return value

1569

* Call all network notifier blocks. Parameters and return value

1570

* are as for raw_notifier_call_chain().

1570

* are as for raw_notifier_call_chain().

1571

*/

1571

*/

1572

1573

static int call_netdevice_notifiers_info(unsigned long val,

1573

static int call_netdevice_notifiers_info(unsigned long val,

1574

struct net_device *dev,

1574

struct net_device *dev,

1575

struct netdev_notifier_info *info)

1575

struct netdev_notifier_info *info)

1576

{

1576

{

1577

ASSERT_RTNL();

1577

ASSERT_RTNL();

1578

netdev_notifier_info_init(info, dev);

1578

netdev_notifier_info_init(info, dev);

1579

return raw_notifier_call_chain(&netdev_chain, val, info);

1579

return raw_notifier_call_chain(&netdev_chain, val, info);

1580

}

1580

}

1581

1582

/**

1582

/**

1583

* call_netdevice_notifiers - call all network notifier blocks

1583

* call_netdevice_notifiers - call all network notifier blocks

1584

* @val: value passed unmodified to notifier function

1584

* @val: value passed unmodified to notifier function

1585

* @dev: net_device pointer passed unmodified to notifier function

1585

* @dev: net_device pointer passed unmodified to notifier function

1586

*

1586

*

1587

* Call all network notifier blocks. Parameters and return value

1587

* Call all network notifier blocks. Parameters and return value

1588

* are as for raw_notifier_call_chain().

1588

* are as for raw_notifier_call_chain().

1589

*/

1589

*/

1590

1591

int call_netdevice_notifiers(unsigned long val, struct net_device *dev)

1591

int call_netdevice_notifiers(unsigned long val, struct net_device *dev)

1592

{

1592

{

1593

struct netdev_notifier_info info;

1593

struct netdev_notifier_info info;

1594

1595

return call_netdevice_notifiers_info(val, dev, &info);

1595

return call_netdevice_notifiers_info(val, dev, &info);

1596

}

1596

}

1597

EXPORT_SYMBOL(call_netdevice_notifiers);

1597

EXPORT_SYMBOL(call_netdevice_notifiers);

1598

1599

static struct static_key netstamp_needed __read_mostly;

1599

static struct static_key netstamp_needed __read_mostly;

1600

#ifdef HAVE_JUMP_LABEL

1600

#ifdef HAVE_JUMP_LABEL

1601

/* We are not allowed to call static_key_slow_dec() from irq context

1601

/* We are not allowed to call static_key_slow_dec() from irq context

1602

* If net_disable_timestamp() is called from irq context, defer the

1602

* If net_disable_timestamp() is called from irq context, defer the

1603

* static_key_slow_dec() calls.

1603

* static_key_slow_dec() calls.

1604

*/

1604

*/

1605

static atomic_t netstamp_needed_deferred;

1605

static atomic_t netstamp_needed_deferred;

1606

#endif

1606

#endif

1607

1608

void net_enable_timestamp(void)

1608

void net_enable_timestamp(void)

1609

{

1609

{

1610

#ifdef HAVE_JUMP_LABEL

1610

#ifdef HAVE_JUMP_LABEL

1611

int deferred = atomic_xchg(&netstamp_needed_deferred, 0);

1611

int deferred = atomic_xchg(&netstamp_needed_deferred, 0);

1612

1613

if (deferred) {

1613

if (deferred) {

1614

while (--deferred)

1614

while (--deferred)

1615

static_key_slow_dec(&netstamp_needed);

1615

static_key_slow_dec(&netstamp_needed);

1616

return;

1616

return;

1617

}

1617

}

1618

#endif

1618

#endif

1619

static_key_slow_inc(&netstamp_needed);

1619

static_key_slow_inc(&netstamp_needed);

1620

}

1620

}

1621

EXPORT_SYMBOL(net_enable_timestamp);

1621

EXPORT_SYMBOL(net_enable_timestamp);

1622

1623

void net_disable_timestamp(void)

1623

void net_disable_timestamp(void)

1624

{

1624

{

1625

#ifdef HAVE_JUMP_LABEL

1625

#ifdef HAVE_JUMP_LABEL

1626

if (in_interrupt()) {

1626

if (in_interrupt()) {

1627

atomic_inc(&netstamp_needed_deferred);

1627

atomic_inc(&netstamp_needed_deferred);

1628

return;

1628

return;

1629

}

1629

}

1630

#endif

1630

#endif

1631

static_key_slow_dec(&netstamp_needed);

1631

static_key_slow_dec(&netstamp_needed);

1632

}

1632

}

1633

EXPORT_SYMBOL(net_disable_timestamp);

1633

EXPORT_SYMBOL(net_disable_timestamp);

1634

1635

static inline void net_timestamp_set(struct sk_buff *skb)

1635

static inline void net_timestamp_set(struct sk_buff *skb)

1636

{

1636

{

1637

skb->tstamp.tv64 = 0;

1637

skb->tstamp.tv64 = 0;

1638

if (static_key_false(&netstamp_needed))

1638

if (static_key_false(&netstamp_needed))

1639

__net_timestamp(skb);

1639

__net_timestamp(skb);

1640

}

1640

}

1641

1642

#define net_timestamp_check(COND, SKB) \

1642

#define net_timestamp_check(COND, SKB) \

1643

if (static_key_false(&netstamp_needed)) { \

1643

if (static_key_false(&netstamp_needed)) { \

1644

if ((COND) && !(SKB)->tstamp.tv64) \

1644

if ((COND) && !(SKB)->tstamp.tv64) \

1645

__net_timestamp(SKB); \

1645

__net_timestamp(SKB); \

1646

} \

1646

} \

1647

1648

static inline bool is_skb_forwardable(struct net_device *dev,

1648

static inline bool is_skb_forwardable(struct net_device *dev,

1649

struct sk_buff *skb)

1649

struct sk_buff *skb)

1650

{

1650

{

1651

unsigned int len;

1651

unsigned int len;

1652

1653

if (!(dev->flags & IFF_UP))

1653

if (!(dev->flags & IFF_UP))

1654

return false;

1654

return false;

1655

1656

len = dev->mtu + dev->hard_header_len + VLAN_HLEN;

1656

len = dev->mtu + dev->hard_header_len + VLAN_HLEN;

1657

if (skb->len <= len)

1657

if (skb->len <= len)

1658

return true;

1658

return true;

1659

1660

/* if TSO is enabled, we don't care about the length as the packet

1660

/* if TSO is enabled, we don't care about the length as the packet

1661

* could be forwarded without being segmented before

1661

* could be forwarded without being segmented before

1662

*/

1662

*/

1663

if (skb_is_gso(skb))

1663

if (skb_is_gso(skb))

1664

return true;

1664

return true;

1665

1666

return false;

1666

return false;

1667

}

1667

}

1668

1669

/**

1669

/**

1670

* dev_forward_skb - loopback an skb to another netif

1670

* dev_forward_skb - loopback an skb to another netif

1671

*

1671

*

1672

* @dev: destination network device

1672

* @dev: destination network device

1673

* @skb: buffer to forward

1673

* @skb: buffer to forward

1674

*

1674

*

1675

* return values:

1675

* return values:

1676

* NET_RX_SUCCESS (no congestion)

1676

* NET_RX_SUCCESS (no congestion)

1677

* NET_RX_DROP (packet was dropped, but freed)

1677

* NET_RX_DROP (packet was dropped, but freed)

1678

*

1678

*

1679

* dev_forward_skb can be used for injecting an skb from the

1679

* dev_forward_skb can be used for injecting an skb from the

1680

* start_xmit function of one device into the receive queue

1680

* start_xmit function of one device into the receive queue

1681

* of another device.

1681

* of another device.

1682

*

1682

*

1683

* The receiving device may be in another namespace, so

1683

* The receiving device may be in another namespace, so

1684

* we have to clear all information in the skb that could

1684

* we have to clear all information in the skb that could

1685

* impact namespace isolation.

1685

* impact namespace isolation.

1686

*/

1686

*/

1687

int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)

1687

int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)

1688

{

1688

{

1689

if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {

1689

if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {

1690

if (skb_copy_ubufs(skb, GFP_ATOMIC)) {

1690

if (skb_copy_ubufs(skb, GFP_ATOMIC)) {

1691

atomic_long_inc(&dev->rx_dropped);

1691

atomic_long_inc(&dev->rx_dropped);

1692

kfree_skb(skb);

1692

kfree_skb(skb);

1693

return NET_RX_DROP;

1693

return NET_RX_DROP;

1694

}

1694

}

1695

}

1695

}

1696

1697

if (unlikely(!is_skb_forwardable(dev, skb))) {

1697

if (unlikely(!is_skb_forwardable(dev, skb))) {

1698

atomic_long_inc(&dev->rx_dropped);

1698

atomic_long_inc(&dev->rx_dropped);

1699

kfree_skb(skb);

1699

kfree_skb(skb);

1700

return NET_RX_DROP;

1700

return NET_RX_DROP;

1701

}

1701

}

1702

1703

skb_scrub_packet(skb, true);

1703

skb_scrub_packet(skb, true);

1704

skb->protocol = eth_type_trans(skb, dev);

1704

skb->protocol = eth_type_trans(skb, dev);

1705

1706

return netif_rx_internal(skb);

1706

return netif_rx_internal(skb);

1707

}

1707

}

1708

EXPORT_SYMBOL_GPL(dev_forward_skb);

1708

EXPORT_SYMBOL_GPL(dev_forward_skb);

1709

1710

static inline int deliver_skb(struct sk_buff *skb,

1710

static inline int deliver_skb(struct sk_buff *skb,

1711

struct packet_type *pt_prev,

1711

struct packet_type *pt_prev,

1712

struct net_device *orig_dev)

1712

struct net_device *orig_dev)

1713

{

1713

{

1714

if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))

1714

if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))

1715

return -ENOMEM;

1715

return -ENOMEM;

1716

atomic_inc(&skb->users);

1716

atomic_inc(&skb->users);

1717

return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);

1717

return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);

1718

}

1718

}

1719

1720

static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)

1720

static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)

1721

{

1721

{

1722

if (!ptype->af_packet_priv || !skb->sk)

1722

if (!ptype->af_packet_priv || !skb->sk)

1723

return false;

1723

return false;

1724

1725

if (ptype->id_match)

1725

if (ptype->id_match)

1726

return ptype->id_match(ptype, skb->sk);

1726

return ptype->id_match(ptype, skb->sk);

1727

else if ((struct sock *)ptype->af_packet_priv == skb->sk)

1727

else if ((struct sock *)ptype->af_packet_priv == skb->sk)

1728

return true;

1728

return true;

1729

1730

return false;

1730

return false;

1731

}

1731

}

1732

1733

/*

1733

/*

1734

* Support routine. Sends outgoing frames to any network

1734

* Support routine. Sends outgoing frames to any network

1735

* taps currently in use.

1735

* taps currently in use.

1736

*/

1736

*/

1737

1738

static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)

1738

static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)

1739

{

1739

{

1740

struct packet_type *ptype;

1740

struct packet_type *ptype;

1741

struct sk_buff *skb2 = NULL;

1741

struct sk_buff *skb2 = NULL;

1742

struct packet_type *pt_prev = NULL;

1742

struct packet_type *pt_prev = NULL;

1743

1744

rcu_read_lock();

1744

rcu_read_lock();

1745

list_for_each_entry_rcu(ptype, &ptype_all, list) {

1745

list_for_each_entry_rcu(ptype, &ptype_all, list) {

1746

/* Never send packets back to the socket

1746

/* Never send packets back to the socket

1747

* they originated from - MvS (miquels@drinkel.ow.org)

1747

* they originated from - MvS (miquels@drinkel.ow.org)

1748

*/

1748

*/

1749

if ((ptype->dev == dev || !ptype->dev) &&

1749

if ((ptype->dev == dev || !ptype->dev) &&

1750

(!skb_loop_sk(ptype, skb))) {

1750

(!skb_loop_sk(ptype, skb))) {

1751

if (pt_prev) {

1751

if (pt_prev) {

1752

deliver_skb(skb2, pt_prev, skb->dev);

1752

deliver_skb(skb2, pt_prev, skb->dev);

1753

pt_prev = ptype;

1753

pt_prev = ptype;

1754

continue;

1754

continue;

1755

}

1755

}

1756

1757

skb2 = skb_clone(skb, GFP_ATOMIC);

1757

skb2 = skb_clone(skb, GFP_ATOMIC);

1758

if (!skb2)

1758

if (!skb2)

1759

break;

1759

break;

1760

1761

net_timestamp_set(skb2);

1761

net_timestamp_set(skb2);

1762

1763

/* skb->nh should be correctly

1763

/* skb->nh should be correctly

1764

set by sender, so that the second statement is

1764

set by sender, so that the second statement is

1765

just protection against buggy protocols.

1765

just protection against buggy protocols.

1766

*/

1766

*/

1767

skb_reset_mac_header(skb2);

1767

skb_reset_mac_header(skb2);

1768

1769

if (skb_network_header(skb2) < skb2->data ||

1769

if (skb_network_header(skb2) < skb2->data ||

1770

skb_network_header(skb2) > skb_tail_pointer(skb2)) {

1770

skb_network_header(skb2) > skb_tail_pointer(skb2)) {

1771

net_crit_ratelimited("protocol %04x is buggy, dev %s\n",

1771

net_crit_ratelimited("protocol %04x is buggy, dev %s\n",

1772

ntohs(skb2->protocol),

1772

ntohs(skb2->protocol),

1773

dev->name);

1773

dev->name);

1774

skb_reset_network_header(skb2);

1774

skb_reset_network_header(skb2);

1775

}

1775

}

1776

1777

skb2->transport_header = skb2->network_header;

1777

skb2->transport_header = skb2->network_header;

1778

skb2->pkt_type = PACKET_OUTGOING;

1778

skb2->pkt_type = PACKET_OUTGOING;

1779

pt_prev = ptype;

1779

pt_prev = ptype;

1780

}

1780

}

1781

}

1781

}

1782

if (pt_prev)

1782

if (pt_prev)

1783

pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);

1783

pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);

1784

rcu_read_unlock();

1784

rcu_read_unlock();

1785

}

1785

}

1786

1787

/**

1787

/**

1788

* netif_setup_tc - Handle tc mappings on real_num_tx_queues change

1788

* netif_setup_tc - Handle tc mappings on real_num_tx_queues change

1789

* @dev: Network device

1789

* @dev: Network device

1790

* @txq: number of queues available

1790

* @txq: number of queues available

1791

*

1791

*

1792

* If real_num_tx_queues is changed the tc mappings may no longer be

1792

* If real_num_tx_queues is changed the tc mappings may no longer be

1793

* valid. To resolve this verify the tc mapping remains valid and if

1793

* valid. To resolve this verify the tc mapping remains valid and if

1794

* not NULL the mapping. With no priorities mapping to this

1794

* not NULL the mapping. With no priorities mapping to this

1795

* offset/count pair it will no longer be used. In the worst case TC0

1795

* offset/count pair it will no longer be used. In the worst case TC0

1796

* is invalid nothing can be done so disable priority mappings. If is

1796

* is invalid nothing can be done so disable priority mappings. If is

1797

* expected that drivers will fix this mapping if they can before

1797

* expected that drivers will fix this mapping if they can before

1798

* calling netif_set_real_num_tx_queues.

1798

* calling netif_set_real_num_tx_queues.

1799

*/

1799

*/

1800

static void netif_setup_tc(struct net_device *dev, unsigned int txq)

1800

static void netif_setup_tc(struct net_device *dev, unsigned int txq)

1801

{

1801

{

1802

int i;

1802

int i;

1803

struct netdev_tc_txq *tc = &dev->tc_to_txq[0];

1803

struct netdev_tc_txq *tc = &dev->tc_to_txq[0];

1804

1805

/* If TC0 is invalidated disable TC mapping */

1805

/* If TC0 is invalidated disable TC mapping */

1806

if (tc->offset + tc->count > txq) {

1806

if (tc->offset + tc->count > txq) {

1807

pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");

1807

pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");

1808

dev->num_tc = 0;

1808

dev->num_tc = 0;

1809

return;

1809

return;

1810

}

1810

}

1811

1812

/* Invalidated prio to tc mappings set to TC0 */

1812

/* Invalidated prio to tc mappings set to TC0 */

1813

for (i = 1; i < TC_BITMASK + 1; i++) {

1813

for (i = 1; i < TC_BITMASK + 1; i++) {

1814

int q = netdev_get_prio_tc_map(dev, i);

1814

int q = netdev_get_prio_tc_map(dev, i);

1815

1816

tc = &dev->tc_to_txq[q];

1816

tc = &dev->tc_to_txq[q];

1817

if (tc->offset + tc->count > txq) {

1817

if (tc->offset + tc->count > txq) {

1818

pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",

1818

pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",

1819

i, q);

1819

i, q);

1820

netdev_set_prio_tc_map(dev, i, 0);

1820

netdev_set_prio_tc_map(dev, i, 0);

1821

}

1821

}

1822

}

1822

}

1823

}

1823

}

1824

1825

#ifdef CONFIG_XPS

1825

#ifdef CONFIG_XPS

1826

static DEFINE_MUTEX(xps_map_mutex);

1826

static DEFINE_MUTEX(xps_map_mutex);

1827

#define xmap_dereference(P) \

1827

#define xmap_dereference(P) \

1828

rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))

1828

rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))

1829

1830

static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,

1830

static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,

1831

int cpu, u16 index)

1831

int cpu, u16 index)

1832

{

1832

{

1833

struct xps_map *map = NULL;

1833

struct xps_map *map = NULL;

1834

int pos;

1834

int pos;

1835

1836

if (dev_maps)

1836

if (dev_maps)

1837

map = xmap_dereference(dev_maps->cpu_map[cpu]);

1837

map = xmap_dereference(dev_maps->cpu_map[cpu]);

1838

1839

for (pos = 0; map && pos < map->len; pos++) {

1839

for (pos = 0; map && pos < map->len; pos++) {

1840

if (map->queues[pos] == index) {

1840

if (map->queues[pos] == index) {

1841

if (map->len > 1) {

1841

if (map->len > 1) {

1842

map->queues[pos] = map->queues[--map->len];

1842

map->queues[pos] = map->queues[--map->len];

1843

} else {

1843

} else {

1844

RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);

1844

RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);

1845

kfree_rcu(map, rcu);

1845

kfree_rcu(map, rcu);

1846

map = NULL;

1846

map = NULL;

1847

}

1847

}

1848

break;

1848

break;

1849

}

1849

}

1850

}

1850

}

1851

1852

return map;

1852

return map;

1853

}

1853

}

1854

1855

static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)

1855

static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)

1856

{

1856

{

1857

struct xps_dev_maps *dev_maps;

1857

struct xps_dev_maps *dev_maps;

1858

int cpu, i;

1858

int cpu, i;

1859

bool active = false;

1859

bool active = false;

1860

1861

mutex_lock(&xps_map_mutex);

1861

mutex_lock(&xps_map_mutex);

1862

dev_maps = xmap_dereference(dev->xps_maps);

1862

dev_maps = xmap_dereference(dev->xps_maps);

1863

1864

if (!dev_maps)

1864

if (!dev_maps)

1865

goto out_no_maps;

1865

goto out_no_maps;

1866

1867

for_each_possible_cpu(cpu) {

1867

for_each_possible_cpu(cpu) {

1868

for (i = index; i < dev->num_tx_queues; i++) {

1868

for (i = index; i < dev->num_tx_queues; i++) {

1869

if (!remove_xps_queue(dev_maps, cpu, i))

1869

if (!remove_xps_queue(dev_maps, cpu, i))

1870

break;

1870

break;

1871

}

1871

}

1872

if (i == dev->num_tx_queues)

1872

if (i == dev->num_tx_queues)

1873

active = true;

1873

active = true;

1874

}

1874

}

1875

1876

if (!active) {

1876

if (!active) {

1877

RCU_INIT_POINTER(dev->xps_maps, NULL);

1877

RCU_INIT_POINTER(dev->xps_maps, NULL);

1878

kfree_rcu(dev_maps, rcu);

1878

kfree_rcu(dev_maps, rcu);

1879

}

1879

}

1880

1881

for (i = index; i < dev->num_tx_queues; i++)

1881

for (i = index; i < dev->num_tx_queues; i++)

1882

netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),

1882

netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),

1883

NUMA_NO_NODE);

1883

NUMA_NO_NODE);

1884

1885

out_no_maps:

1885

out_no_maps:

1886

mutex_unlock(&xps_map_mutex);

1886

mutex_unlock(&xps_map_mutex);

1887

}

1887

}

1888

1889

static struct xps_map *expand_xps_map(struct xps_map *map,

1889

static struct xps_map *expand_xps_map(struct xps_map *map,

1890

int cpu, u16 index)

1890

int cpu, u16 index)

1891

{

1891

{

1892

struct xps_map *new_map;

1892

struct xps_map *new_map;

1893

int alloc_len = XPS_MIN_MAP_ALLOC;

1893

int alloc_len = XPS_MIN_MAP_ALLOC;

1894

int i, pos;

1894

int i, pos;

1895

1896

for (pos = 0; map && pos < map->len; pos++) {

1896

for (pos = 0; map && pos < map->len; pos++) {

1897

if (map->queues[pos] != index)

1897

if (map->queues[pos] != index)

1898

continue;

1898

continue;

1899

return map;

1899

return map;

1900

}

1900

}

1901

1902

/* Need to add queue to this CPU's existing map */

1902

/* Need to add queue to this CPU's existing map */

1903

if (map) {

1903

if (map) {

1904

if (pos < map->alloc_len)

1904

if (pos < map->alloc_len)

1905

return map;

1905

return map;

1906

1907

alloc_len = map->alloc_len * 2;

1907

alloc_len = map->alloc_len * 2;

1908

}

1908

}

1909

1910

/* Need to allocate new map to store queue on this CPU's map */

1910

/* Need to allocate new map to store queue on this CPU's map */

1911

new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,

1911

new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,

1912

cpu_to_node(cpu));

1912

cpu_to_node(cpu));

1913

if (!new_map)

1913

if (!new_map)

1914

return NULL;

1914

return NULL;

1915

1916

for (i = 0; i < pos; i++)

1916

for (i = 0; i < pos; i++)

1917

new_map->queues[i] = map->queues[i];

1917

new_map->queues[i] = map->queues[i];

1918

new_map->alloc_len = alloc_len;

1918

new_map->alloc_len = alloc_len;

1919

new_map->len = pos;

1919

new_map->len = pos;

1920

1921

return new_map;

1921

return new_map;

1922

}

1922

}

1923

1924

int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,

1924

int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,

1925

u16 index)

1925

u16 index)

1926

{

1926

{

1927

struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;

1927

struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;

1928

struct xps_map *map, *new_map;

1928

struct xps_map *map, *new_map;

1929

int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);

1929

int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);

1930

int cpu, numa_node_id = -2;

1930

int cpu, numa_node_id = -2;

1931

bool active = false;

1931

bool active = false;

1932

1933

mutex_lock(&xps_map_mutex);

1933

mutex_lock(&xps_map_mutex);

1934

1935

dev_maps = xmap_dereference(dev->xps_maps);

1935

dev_maps = xmap_dereference(dev->xps_maps);

1936

1937

/* allocate memory for queue storage */

1937

/* allocate memory for queue storage */

1938

for_each_online_cpu(cpu) {

1938

for_each_online_cpu(cpu) {

1939

if (!cpumask_test_cpu(cpu, mask))

1939

if (!cpumask_test_cpu(cpu, mask))

1940

continue;

1940

continue;

1941

1942

if (!new_dev_maps)

1942

if (!new_dev_maps)

1943

new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);

1943

new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);

1944

if (!new_dev_maps) {

1944

if (!new_dev_maps) {

1945

mutex_unlock(&xps_map_mutex);

1945

mutex_unlock(&xps_map_mutex);

1946

return -ENOMEM;

1946

return -ENOMEM;

1947

}

1947

}

1948

1949

map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :

1949

map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :

1950

NULL;

1950

NULL;

1951

1952

map = expand_xps_map(map, cpu, index);

1952

map = expand_xps_map(map, cpu, index);

1953

if (!map)

1953

if (!map)

1954

goto error;

1954

goto error;

1955

1956

RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);

1956

RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);

1957

}

1957

}

1958

1959

if (!new_dev_maps)

1959

if (!new_dev_maps)

1960

goto out_no_new_maps;

1960

goto out_no_new_maps;

1961

1962

for_each_possible_cpu(cpu) {

1962

for_each_possible_cpu(cpu) {

1963

if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {

1963

if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {

1964

/* add queue to CPU maps */

1964

/* add queue to CPU maps */

1965

int pos = 0;

1965

int pos = 0;

1966

1967

map = xmap_dereference(new_dev_maps->cpu_map[cpu]);

1967

map = xmap_dereference(new_dev_maps->cpu_map[cpu]);

1968

while ((pos < map->len) && (map->queues[pos] != index))

1968

while ((pos < map->len) && (map->queues[pos] != index))

1969

pos++;

1969

pos++;

1970

1971

if (pos == map->len)

1971

if (pos == map->len)

1972

map->queues[map->len++] = index;

1972

map->queues[map->len++] = index;

1973

#ifdef CONFIG_NUMA

1973

#ifdef CONFIG_NUMA

1974

if (numa_node_id == -2)

1974

if (numa_node_id == -2)

1975

numa_node_id = cpu_to_node(cpu);

1975

numa_node_id = cpu_to_node(cpu);

1976

else if (numa_node_id != cpu_to_node(cpu))

1976

else if (numa_node_id != cpu_to_node(cpu))

1977

numa_node_id = -1;

1977

numa_node_id = -1;

1978

#endif

1978

#endif

1979

} else if (dev_maps) {

1979

} else if (dev_maps) {

1980

/* fill in the new device map from the old device map */

1980

/* fill in the new device map from the old device map */

1981

map = xmap_dereference(dev_maps->cpu_map[cpu]);

1981

map = xmap_dereference(dev_maps->cpu_map[cpu]);

1982

RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);

1982

RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);

1983

}

1983

}

1984

1985

}

1985

}

1986

1987

rcu_assign_pointer(dev->xps_maps, new_dev_maps);

1987

rcu_assign_pointer(dev->xps_maps, new_dev_maps);

1988

1989

/* Cleanup old maps */

1989

/* Cleanup old maps */

1990

if (dev_maps) {

1990

if (dev_maps) {

1991

for_each_possible_cpu(cpu) {

1991

for_each_possible_cpu(cpu) {

1992

new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);

1992

new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);

1993

map = xmap_dereference(dev_maps->cpu_map[cpu]);

1993

map = xmap_dereference(dev_maps->cpu_map[cpu]);

1994

if (map && map != new_map)

1994

if (map && map != new_map)

1995

kfree_rcu(map, rcu);

1995

kfree_rcu(map, rcu);

1996

}

1996

}

1997

1998

kfree_rcu(dev_maps, rcu);

1998

kfree_rcu(dev_maps, rcu);

1999

}

1999

}

2000

2001

dev_maps = new_dev_maps;

2001

dev_maps = new_dev_maps;

2002

active = true;

2002

active = true;

2003

2004

out_no_new_maps:

2004

out_no_new_maps:

2005

/* update Tx queue numa node */

2005

/* update Tx queue numa node */

2006

netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),

2006

netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),

2007

(numa_node_id >= 0) ? numa_node_id :

2007

(numa_node_id >= 0) ? numa_node_id :

2008

NUMA_NO_NODE);

2008

NUMA_NO_NODE);

2009

2010

if (!dev_maps)

2010

if (!dev_maps)

2011

goto out_no_maps;

2011

goto out_no_maps;

2012

2013

/* removes queue from unused CPUs */

2013

/* removes queue from unused CPUs */

2014

for_each_possible_cpu(cpu) {

2014

for_each_possible_cpu(cpu) {

2015

if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))

2015

if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))

2016

continue;

2016

continue;

2017

2018

if (remove_xps_queue(dev_maps, cpu, index))

2018

if (remove_xps_queue(dev_maps, cpu, index))

2019

active = true;

2019

active = true;

2020

}

2020

}

2021

2022

/* free map if not active */

2022

/* free map if not active */

2023

if (!active) {

2023

if (!active) {

2024

RCU_INIT_POINTER(dev->xps_maps, NULL);

2024

RCU_INIT_POINTER(dev->xps_maps, NULL);

2025

kfree_rcu(dev_maps, rcu);

2025

kfree_rcu(dev_maps, rcu);

2026

}

2026

}

2027

2028

out_no_maps:

2028

out_no_maps:

2029

mutex_unlock(&xps_map_mutex);

2029

mutex_unlock(&xps_map_mutex);

2030

2031

return 0;

2031

return 0;

2032

error:

2032

error:

2033

/* remove any maps that we added */

2033

/* remove any maps that we added */

2034

for_each_possible_cpu(cpu) {

2034

for_each_possible_cpu(cpu) {

2035

new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);

2035

new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);

2036

map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :

2036

map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :

2037

NULL;

2037

NULL;

2038

if (new_map && new_map != map)

2038

if (new_map && new_map != map)

2039

kfree(new_map);

2039

kfree(new_map);

2040

}

2040

}

2041

2042

mutex_unlock(&xps_map_mutex);

2042

mutex_unlock(&xps_map_mutex);

2043

2044

kfree(new_dev_maps);

2044

kfree(new_dev_maps);

2045

return -ENOMEM;

2045

return -ENOMEM;

2046

}

2046

}

2047

EXPORT_SYMBOL(netif_set_xps_queue);

2047

EXPORT_SYMBOL(netif_set_xps_queue);

2048

2049

#endif

2049

#endif

2050

/*

2050

/*

2051

* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues

2051

* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues

2052

* greater then real_num_tx_queues stale skbs on the qdisc must be flushed.

2052

* greater then real_num_tx_queues stale skbs on the qdisc must be flushed.

2053

*/

2053

*/

2054

int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)

2054

int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)

2055

{

2055

{

2056

int rc;

2056

int rc;

2057

2058

if (txq < 1 || txq > dev->num_tx_queues)

2058

if (txq < 1 || txq > dev->num_tx_queues)

2059

return -EINVAL;

2059

return -EINVAL;

2060

2061

if (dev->reg_state == NETREG_REGISTERED ||

2061

if (dev->reg_state == NETREG_REGISTERED ||

2062

dev->reg_state == NETREG_UNREGISTERING) {

2062

dev->reg_state == NETREG_UNREGISTERING) {

2063

ASSERT_RTNL();

2063

ASSERT_RTNL();

2064

2065

rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,

2065

rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,

2066

txq);

2066

txq);

2067

if (rc)

2067

if (rc)

2068

return rc;

2068

return rc;

2069

2070

if (dev->num_tc)

2070

if (dev->num_tc)

2071

netif_setup_tc(dev, txq);

2071

netif_setup_tc(dev, txq);

2072

2073

if (txq < dev->real_num_tx_queues) {

2073

if (txq < dev->real_num_tx_queues) {

2074

qdisc_reset_all_tx_gt(dev, txq);

2074

qdisc_reset_all_tx_gt(dev, txq);

2075

#ifdef CONFIG_XPS

2075

#ifdef CONFIG_XPS

2076

netif_reset_xps_queues_gt(dev, txq);

2076

netif_reset_xps_queues_gt(dev, txq);

2077

#endif

2077

#endif

2078

}

2078

}

2079

}

2079

}

2080

2081

dev->real_num_tx_queues = txq;

2081

dev->real_num_tx_queues = txq;

2082

return 0;

2082

return 0;

2083

}

2083

}

2084

EXPORT_SYMBOL(netif_set_real_num_tx_queues);

2084

EXPORT_SYMBOL(netif_set_real_num_tx_queues);

2085

2086

#ifdef CONFIG_SYSFS

2086

#ifdef CONFIG_SYSFS

2087

/**

2087

/**

2088

* netif_set_real_num_rx_queues - set actual number of RX queues used

2088

* netif_set_real_num_rx_queues - set actual number of RX queues used

2089

* @dev: Network device

2089

* @dev: Network device

2090

* @rxq: Actual number of RX queues

2090

* @rxq: Actual number of RX queues

2091

*

2091

*

2092

* This must be called either with the rtnl_lock held or before

2092

* This must be called either with the rtnl_lock held or before

2093

* registration of the net device. Returns 0 on success, or a

2093

* registration of the net device. Returns 0 on success, or a

2094

* negative error code. If called before registration, it always

2094

* negative error code. If called before registration, it always

2095

* succeeds.

2095

* succeeds.

2096

*/

2096

*/

2097

int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)

2097

int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)

2098

{

2098

{

2099

int rc;

2099

int rc;

2100

2101

if (rxq < 1 || rxq > dev->num_rx_queues)

2101

if (rxq < 1 || rxq > dev->num_rx_queues)

2102

return -EINVAL;

2102

return -EINVAL;

2103

2104

if (dev->reg_state == NETREG_REGISTERED) {

2104

if (dev->reg_state == NETREG_REGISTERED) {

2105

ASSERT_RTNL();

2105

ASSERT_RTNL();

2106

2107

rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,

2107

rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,

2108

rxq);

2108

rxq);

2109

if (rc)

2109

if (rc)

2110

return rc;

2110

return rc;

2111

}

2111

}

2112

2113

dev->real_num_rx_queues = rxq;

2113

dev->real_num_rx_queues = rxq;

2114

return 0;

2114

return 0;

2115

}

2115

}

2116

EXPORT_SYMBOL(netif_set_real_num_rx_queues);

2116

EXPORT_SYMBOL(netif_set_real_num_rx_queues);

2117

#endif

2117

#endif

2118

2119

/**

2119

/**

2120

* netif_get_num_default_rss_queues - default number of RSS queues

2120

* netif_get_num_default_rss_queues - default number of RSS queues

2121

*

2121

*

2122

* This routine should set an upper limit on the number of RSS queues

2122

* This routine should set an upper limit on the number of RSS queues

2123

* used by default by multiqueue devices.

2123

* used by default by multiqueue devices.

2124

*/

2124

*/

2125

int netif_get_num_default_rss_queues(void)

2125

int netif_get_num_default_rss_queues(void)

2126

{

2126

{

2127

return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());

2127

return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());

2128

}

2128

}

2129

EXPORT_SYMBOL(netif_get_num_default_rss_queues);

2129

EXPORT_SYMBOL(netif_get_num_default_rss_queues);

2130

2131

static inline void __netif_reschedule(struct Qdisc *q)

2131

static inline void __netif_reschedule(struct Qdisc *q)

2132

{

2132

{

2133

struct softnet_data *sd;

2133

struct softnet_data *sd;

2134

unsigned long flags;

2134

unsigned long flags;

2135

2136

local_irq_save(flags);

2136

local_irq_save(flags);

2137

sd = &__get_cpu_var(softnet_data);

2137

sd = &__get_cpu_var(softnet_data);

2138

q->next_sched = NULL;

2138

q->next_sched = NULL;

2139

*sd->output_queue_tailp = q;

2139

*sd->output_queue_tailp = q;

2140

sd->output_queue_tailp = &q->next_sched;

2140

sd->output_queue_tailp = &q->next_sched;

2141

raise_softirq_irqoff(NET_TX_SOFTIRQ);

2141

raise_softirq_irqoff(NET_TX_SOFTIRQ);

2142

local_irq_restore(flags);

2142

local_irq_restore(flags);

2143

}

2143

}

2144

2145

void __netif_schedule(struct Qdisc *q)

2145

void __netif_schedule(struct Qdisc *q)

2146

{

2146

{

2147

if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))

2147

if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))

2148

__netif_reschedule(q);

2148

__netif_reschedule(q);

2149

}

2149

}

2150

EXPORT_SYMBOL(__netif_schedule);

2150

EXPORT_SYMBOL(__netif_schedule);

2151

2152

struct dev_kfree_skb_cb {

2152

struct dev_kfree_skb_cb {

2153

enum skb_free_reason reason;

2153

enum skb_free_reason reason;

2154

};

2154

};

2155

2156

static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)

2156

static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)

2157

{

2157

{

2158

return (struct dev_kfree_skb_cb *)skb->cb;

2158

return (struct dev_kfree_skb_cb *)skb->cb;

2159

}

2159

}

2160

2161

void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)

2161

void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)

2162

{

2162

{

2163

unsigned long flags;

2163

unsigned long flags;

2164

2165

if (likely(atomic_read(&skb->users) == 1)) {

2165

if (likely(atomic_read(&skb->users) == 1)) {

2166

smp_rmb();

2166

smp_rmb();

2167

atomic_set(&skb->users, 0);

2167

atomic_set(&skb->users, 0);

2168

} else if (likely(!atomic_dec_and_test(&skb->users))) {

2168

} else if (likely(!atomic_dec_and_test(&skb->users))) {

2169

return;

2169

return;

2170

}

2170

}

2171

get_kfree_skb_cb(skb)->reason = reason;

2171

get_kfree_skb_cb(skb)->reason = reason;

2172

local_irq_save(flags);

2172

local_irq_save(flags);

2173

skb->next = __this_cpu_read(softnet_data.completion_queue);

2173

skb->next = __this_cpu_read(softnet_data.completion_queue);

2174

__this_cpu_write(softnet_data.completion_queue, skb);

2174

__this_cpu_write(softnet_data.completion_queue, skb);

2175

raise_softirq_irqoff(NET_TX_SOFTIRQ);

2175

raise_softirq_irqoff(NET_TX_SOFTIRQ);

2176

local_irq_restore(flags);

2176

local_irq_restore(flags);

2177

}

2177

}

2178

EXPORT_SYMBOL(__dev_kfree_skb_irq);

2178

EXPORT_SYMBOL(__dev_kfree_skb_irq);

2179

2180

void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)

2180

void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)

2181

{

2181

{

2182

if (in_irq() || irqs_disabled())

2182

if (in_irq() || irqs_disabled())

2183

__dev_kfree_skb_irq(skb, reason);

2183

__dev_kfree_skb_irq(skb, reason);

2184

else

2184

else

2185

dev_kfree_skb(skb);

2185

dev_kfree_skb(skb);

2186

}

2186

}

2187

EXPORT_SYMBOL(__dev_kfree_skb_any);

2187

EXPORT_SYMBOL(__dev_kfree_skb_any);

2188

2189

2190

/**

2190

/**

2191

* netif_device_detach - mark device as removed

2191

* netif_device_detach - mark device as removed

2192

* @dev: network device

2192

* @dev: network device

2193

*

2193

*

2194

* Mark device as removed from system and therefore no longer available.

2194

* Mark device as removed from system and therefore no longer available.

2195

*/

2195

*/

2196

void netif_device_detach(struct net_device *dev)

2196

void netif_device_detach(struct net_device *dev)

2197

{

2197

{

2198

if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&

2198

if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&

2199

netif_running(dev)) {

2199

netif_running(dev)) {

2200

netif_tx_stop_all_queues(dev);

2200

netif_tx_stop_all_queues(dev);

2201

}

2201

}

2202

}

2202

}

2203

EXPORT_SYMBOL(netif_device_detach);

2203

EXPORT_SYMBOL(netif_device_detach);

2204

2205

/**

2205

/**

2206

* netif_device_attach - mark device as attached

2206

* netif_device_attach - mark device as attached

2207

* @dev: network device

2207

* @dev: network device

2208

*

2208

*

2209

* Mark device as attached from system and restart if needed.

2209

* Mark device as attached from system and restart if needed.

2210

*/

2210

*/

2211

void netif_device_attach(struct net_device *dev)

2211

void netif_device_attach(struct net_device *dev)

2212

{

2212

{

2213

if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&

2213

if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&

2214

netif_running(dev)) {

2214

netif_running(dev)) {

2215

netif_tx_wake_all_queues(dev);

2215

netif_tx_wake_all_queues(dev);

2216

__netdev_watchdog_up(dev);

2216

__netdev_watchdog_up(dev);

2217

}

2217

}

2218

}

2218

}

2219

EXPORT_SYMBOL(netif_device_attach);

2219

EXPORT_SYMBOL(netif_device_attach);

2220

2221

static void skb_warn_bad_offload(const struct sk_buff *skb)

2221

static void skb_warn_bad_offload(const struct sk_buff *skb)

2222

{

2222

{

2223

static const netdev_features_t null_features = 0;

2223

static const netdev_features_t null_features = 0;

2224

struct net_device *dev = skb->dev;

2224

struct net_device *dev = skb->dev;

2225

const char *driver = "";

2225

const char *driver = "";

2226

2227

if (!net_ratelimit())

2227

if (!net_ratelimit())

2228

return;

2228

return;

2229

2230

if (dev && dev->dev.parent)

2230

if (dev && dev->dev.parent)

2231

driver = dev_driver_string(dev->dev.parent);

2231

driver = dev_driver_string(dev->dev.parent);

2232

2233

WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "

2233

WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "

2234

"gso_type=%d ip_summed=%d\n",

2234

"gso_type=%d ip_summed=%d\n",

2235

driver, dev ? &dev->features : &null_features,

2235

driver, dev ? &dev->features : &null_features,

2236

skb->sk ? &skb->sk->sk_route_caps : &null_features,

2236

skb->sk ? &skb->sk->sk_route_caps : &null_features,

2237

skb->len, skb->data_len, skb_shinfo(skb)->gso_size,

2237

skb->len, skb->data_len, skb_shinfo(skb)->gso_size,

2238

skb_shinfo(skb)->gso_type, skb->ip_summed);

2238

skb_shinfo(skb)->gso_type, skb->ip_summed);

2239

}

2239

}

2240

2241

/*

2241

/*

2242

* Invalidate hardware checksum when packet is to be mangled, and

2242

* Invalidate hardware checksum when packet is to be mangled, and

2243

* complete checksum manually on outgoing path.

2243

* complete checksum manually on outgoing path.

2244

*/

2244

*/

2245

int skb_checksum_help(struct sk_buff *skb)

2245

int skb_checksum_help(struct sk_buff *skb)

2246

{

2246

{

2247

__wsum csum;

2247

__wsum csum;

2248

int ret = 0, offset;

2248

int ret = 0, offset;

2249

2250

if (skb->ip_summed == CHECKSUM_COMPLETE)

2250

if (skb->ip_summed == CHECKSUM_COMPLETE)

2251

goto out_set_summed;

2251

goto out_set_summed;

2252

2253

if (unlikely(skb_shinfo(skb)->gso_size)) {

2253

if (unlikely(skb_shinfo(skb)->gso_size)) {

2254

skb_warn_bad_offload(skb);

2254

skb_warn_bad_offload(skb);

2255

return -EINVAL;

2255

return -EINVAL;

2256

}

2256

}

2257

2258

/* Before computing a checksum, we should make sure no frag could

2258

/* Before computing a checksum, we should make sure no frag could

2259

* be modified by an external entity : checksum could be wrong.

2259

* be modified by an external entity : checksum could be wrong.

2260

*/

2260

*/

2261

if (skb_has_shared_frag(skb)) {

2261

if (skb_has_shared_frag(skb)) {

2262

ret = __skb_linearize(skb);

2262

ret = __skb_linearize(skb);

2263

if (ret)

2263

if (ret)

2264

goto out;

2264

goto out;

2265

}

2265

}

2266

2267

offset = skb_checksum_start_offset(skb);

2267

offset = skb_checksum_start_offset(skb);

2268

BUG_ON(offset >= skb_headlen(skb));

2268

BUG_ON(offset >= skb_headlen(skb));

2269

csum = skb_checksum(skb, offset, skb->len - offset, 0);

2269

csum = skb_checksum(skb, offset, skb->len - offset, 0);

2270

2271

offset += skb->csum_offset;

2271

offset += skb->csum_offset;

2272

BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));

2272

BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));

2273

2274

if (skb_cloned(skb) &&

2274

if (skb_cloned(skb) &&

2275

!skb_clone_writable(skb, offset + sizeof(__sum16))) {

2275

!skb_clone_writable(skb, offset + sizeof(__sum16))) {

2276

ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);

2276

ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);

2277

if (ret)

2277

if (ret)

2278

goto out;

2278

goto out;

2279

}

2279

}

2280

2281

*(__sum16 *)(skb->data + offset) = csum_fold(csum);

2281

*(__sum16 *)(skb->data + offset) = csum_fold(csum);

2282

out_set_summed:

2282

out_set_summed:

2283

skb->ip_summed = CHECKSUM_NONE;

2283

skb->ip_summed = CHECKSUM_NONE;

2284

out:

2284

out:

2285

return ret;

2285

return ret;

2286

}

2286

}

2287

EXPORT_SYMBOL(skb_checksum_help);

2287

EXPORT_SYMBOL(skb_checksum_help);

2288

2289

__be16 skb_network_protocol(struct sk_buff *skb, int *depth)

2289

__be16 skb_network_protocol(struct sk_buff *skb, int *depth)

2290

{

2290

{

2291

__be16 type = skb->protocol;

2291

__be16 type = skb->protocol;

2292

int vlan_depth = skb->mac_len;

2292

int vlan_depth = skb->mac_len;

2293

2294

/* Tunnel gso handlers can set protocol to ethernet. */

2294

/* Tunnel gso handlers can set protocol to ethernet. */

2295

if (type == htons(ETH_P_TEB)) {

2295

if (type == htons(ETH_P_TEB)) {

2296

struct ethhdr *eth;

2296

struct ethhdr *eth;

2297

2298

if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))

2298

if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))

2299

return 0;

2299

return 0;

2300

2301

eth = (struct ethhdr *)skb_mac_header(skb);

2301

eth = (struct ethhdr *)skb_mac_header(skb);

2302

type = eth->h_proto;

2302

type = eth->h_proto;

2303

}

2303

}

2304

2305

while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {

2305

while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {

2306

struct vlan_hdr *vh;

2306

struct vlan_hdr *vh;

2307

2308

if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))

2308

if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))

2309

return 0;

2309

return 0;

2310

2311

vh = (struct vlan_hdr *)(skb->data + vlan_depth);

2311

vh = (struct vlan_hdr *)(skb->data + vlan_depth);

2312

type = vh->h_vlan_encapsulated_proto;

2312

type = vh->h_vlan_encapsulated_proto;

2313

vlan_depth += VLAN_HLEN;

2313

vlan_depth += VLAN_HLEN;

2314

}

2314

}

2315

2316

*depth = vlan_depth;

2316

*depth = vlan_depth;

2317

2318

return type;

2318

return type;

2319

}

2319

}

2320

2321

/**

2321

/**

2322

* skb_mac_gso_segment - mac layer segmentation handler.

2322

* skb_mac_gso_segment - mac layer segmentation handler.

2323

* @skb: buffer to segment

2323

* @skb: buffer to segment

2324

* @features: features for the output path (see dev->features)

2324

* @features: features for the output path (see dev->features)

2325

*/

2325

*/

2326

struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,

2326

struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,

2327

netdev_features_t features)

2327

netdev_features_t features)

2328

{

2328

{

2329

struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);

2329

struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);

2330

struct packet_offload *ptype;

2330

struct packet_offload *ptype;

2331

int vlan_depth = skb->mac_len;

2331

int vlan_depth = skb->mac_len;

2332

__be16 type = skb_network_protocol(skb, &vlan_depth);

2332

__be16 type = skb_network_protocol(skb, &vlan_depth);

2333

2334

if (unlikely(!type))

2334

if (unlikely(!type))

2335

return ERR_PTR(-EINVAL);

2335

return ERR_PTR(-EINVAL);

2336

2337

__skb_pull(skb, vlan_depth);

2337

__skb_pull(skb, vlan_depth);

2338

2339

rcu_read_lock();

2339

rcu_read_lock();

2340

list_for_each_entry_rcu(ptype, &offload_base, list) {

2340

list_for_each_entry_rcu(ptype, &offload_base, list) {

2341

if (ptype->type == type && ptype->callbacks.gso_segment) {

2341

if (ptype->type == type && ptype->callbacks.gso_segment) {

2342

if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {

2342

if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {

2343

int err;

2343

int err;

2344

2345

err = ptype->callbacks.gso_send_check(skb);

2345

err = ptype->callbacks.gso_send_check(skb);

2346

segs = ERR_PTR(err);

2346

segs = ERR_PTR(err);

2347

if (err || skb_gso_ok(skb, features))

2347

if (err || skb_gso_ok(skb, features))

2348

break;

2348

break;

2349

__skb_push(skb, (skb->data -

2349

__skb_push(skb, (skb->data -

2350

skb_network_header(skb)));

2350

skb_network_header(skb)));

2351

}

2351

}

2352

segs = ptype->callbacks.gso_segment(skb, features);

2352

segs = ptype->callbacks.gso_segment(skb, features);

2353

break;

2353

break;

2354

}

2354

}

2355

}

2355

}

2356

rcu_read_unlock();

2356

rcu_read_unlock();

2357

2358

__skb_push(skb, skb->data - skb_mac_header(skb));

2358

__skb_push(skb, skb->data - skb_mac_header(skb));

2359

2360

return segs;

2360

return segs;

2361

}

2361

}

2362

EXPORT_SYMBOL(skb_mac_gso_segment);

2362

EXPORT_SYMBOL(skb_mac_gso_segment);

2363

2364

2365

/* openvswitch calls this on rx path, so we need a different check.

2365

/* openvswitch calls this on rx path, so we need a different check.

2366

*/

2366

*/

2367

static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)

2367

static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)

2368

{

2368

{

2369

if (tx_path)

2369

if (tx_path)

2370

return skb->ip_summed != CHECKSUM_PARTIAL;

2370

return skb->ip_summed != CHECKSUM_PARTIAL;

2371

else

2371

else

2372

return skb->ip_summed == CHECKSUM_NONE;

2372

return skb->ip_summed == CHECKSUM_NONE;

2373

}

2373

}

2374

2375

/**

2375

/**

2376

* __skb_gso_segment - Perform segmentation on skb.

2376

* __skb_gso_segment - Perform segmentation on skb.

2377

* @skb: buffer to segment

2377

* @skb: buffer to segment

2378

* @features: features for the output path (see dev->features)

2378

* @features: features for the output path (see dev->features)

2379

* @tx_path: whether it is called in TX path

2379

* @tx_path: whether it is called in TX path

2380

*

2380

*

2381

* This function segments the given skb and returns a list of segments.

2381

* This function segments the given skb and returns a list of segments.

2382

*

2382

*

2383

* It may return NULL if the skb requires no segmentation. This is

2383

* It may return NULL if the skb requires no segmentation. This is

2384

* only possible when GSO is used for verifying header integrity.

2384

* only possible when GSO is used for verifying header integrity.

2385

*/

2385

*/

2386

struct sk_buff *__skb_gso_segment(struct sk_buff *skb,

2386

struct sk_buff *__skb_gso_segment(struct sk_buff *skb,

2387

netdev_features_t features, bool tx_path)

2387

netdev_features_t features, bool tx_path)

2388

{

2388

{

2389

if (unlikely(skb_needs_check(skb, tx_path))) {

2389

if (unlikely(skb_needs_check(skb, tx_path))) {

2390

int err;

2390

int err;

2391

2392

skb_warn_bad_offload(skb);

2392

skb_warn_bad_offload(skb);

2393

2394

if (skb_header_cloned(skb) &&

2394

if (skb_header_cloned(skb) &&

2395

(err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))

2395

(err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))

2396

return ERR_PTR(err);

2396

return ERR_PTR(err);

2397

}

2397

}

2398

2399

SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);

2399

SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);

2400

SKB_GSO_CB(skb)->encap_level = 0;

2400

SKB_GSO_CB(skb)->encap_level = 0;

2401

2402

skb_reset_mac_header(skb);

2402

skb_reset_mac_header(skb);

2403

skb_reset_mac_len(skb);

2403

skb_reset_mac_len(skb);

2404

2405

return skb_mac_gso_segment(skb, features);

2405

return skb_mac_gso_segment(skb, features);

2406

}

2406

}

2407

EXPORT_SYMBOL(__skb_gso_segment);

2407

EXPORT_SYMBOL(__skb_gso_segment);

2408

2409

/* Take action when hardware reception checksum errors are detected. */

2409

/* Take action when hardware reception checksum errors are detected. */

2410

#ifdef CONFIG_BUG

2410

#ifdef CONFIG_BUG

2411

void netdev_rx_csum_fault(struct net_device *dev)

2411

void netdev_rx_csum_fault(struct net_device *dev)

2412

{

2412

{

2413

if (net_ratelimit()) {

2413

if (net_ratelimit()) {

2414

pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");

2414

pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");

2415

dump_stack();

2415

dump_stack();

2416

}

2416

}

2417

}

2417

}

2418

EXPORT_SYMBOL(netdev_rx_csum_fault);

2418

EXPORT_SYMBOL(netdev_rx_csum_fault);

2419

#endif

2419

#endif

2420

2421

/* Actually, we should eliminate this check as soon as we know, that:

2421

/* Actually, we should eliminate this check as soon as we know, that:

2422

* 1. IOMMU is present and allows to map all the memory.

2422

* 1. IOMMU is present and allows to map all the memory.

2423

* 2. No high memory really exists on this machine.

2423

* 2. No high memory really exists on this machine.

2424

*/

2424

*/

2425

2426

static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)

2426

static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)

2427

{

2427

{

2428

#ifdef CONFIG_HIGHMEM

2428

#ifdef CONFIG_HIGHMEM

2429

int i;

2429

int i;

2430

if (!(dev->features & NETIF_F_HIGHDMA)) {

2430

if (!(dev->features & NETIF_F_HIGHDMA)) {

2431

for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {

2431

for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {

2432

skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

2432

skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

2433

if (PageHighMem(skb_frag_page(frag)))

2433

if (PageHighMem(skb_frag_page(frag)))

2434

return 1;

2434

return 1;

2435

}

2435

}

2436

}

2436

}

2437

2438

if (PCI_DMA_BUS_IS_PHYS) {

2438

if (PCI_DMA_BUS_IS_PHYS) {

2439

struct device *pdev = dev->dev.parent;

2439

struct device *pdev = dev->dev.parent;

2440

2441

if (!pdev)

2441

if (!pdev)

2442

return 0;

2442

return 0;

2443

for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {

2443

for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {

2444

skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

2444

skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

2445

dma_addr_t addr = page_to_phys(skb_frag_page(frag));

2445

dma_addr_t addr = page_to_phys(skb_frag_page(frag));

2446

if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)

2446

if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)

2447

return 1;

2447

return 1;

2448

}

2448

}

2449

}

2449

}

2450

#endif

2450

#endif

2451

return 0;

2451

return 0;

2452

}

2452

}

2453

2454

struct dev_gso_cb {

2454

struct dev_gso_cb {

2455

void (*destructor)(struct sk_buff *skb);

2455

void (*destructor)(struct sk_buff *skb);

2456

};

2456

};

2457

2458

#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)

2458

#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)

2459

2460

static void dev_gso_skb_destructor(struct sk_buff *skb)

2460

static void dev_gso_skb_destructor(struct sk_buff *skb)

2461

{

2461

{

2462

struct dev_gso_cb *cb;

2462

struct dev_gso_cb *cb;

2463

2464

kfree_skb_list(skb->next);

2464

kfree_skb_list(skb->next);

2465

skb->next = NULL;

2465

skb->next = NULL;

2466

2467

cb = DEV_GSO_CB(skb);

2467

cb = DEV_GSO_CB(skb);

2468

if (cb->destructor)

2468

if (cb->destructor)

2469

cb->destructor(skb);

2469

cb->destructor(skb);

2470

}

2470

}

2471

2472

/**

2472

/**

2473

* dev_gso_segment - Perform emulated hardware segmentation on skb.

2473

* dev_gso_segment - Perform emulated hardware segmentation on skb.

2474

* @skb: buffer to segment

2474

* @skb: buffer to segment

2475

* @features: device features as applicable to this skb

2475

* @features: device features as applicable to this skb

2476

*

2476

*

2477

* This function segments the given skb and stores the list of segments

2477

* This function segments the given skb and stores the list of segments

2478

* in skb->next.

2478

* in skb->next.

2479

*/

2479

*/

2480

static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)

2480

static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)

2481

{

2481

{

2482

struct sk_buff *segs;

2482

struct sk_buff *segs;

2483

2484

segs = skb_gso_segment(skb, features);

2484

segs = skb_gso_segment(skb, features);

2485

2486

/* Verifying header integrity only. */

2486

/* Verifying header integrity only. */

2487

if (!segs)

2487

if (!segs)

2488

return 0;

2488

return 0;

2489

2490

if (IS_ERR(segs))

2490

if (IS_ERR(segs))

2491

return PTR_ERR(segs);

2491

return PTR_ERR(segs);

2492

2493

skb->next = segs;

2493

skb->next = segs;

2494

DEV_GSO_CB(skb)->destructor = skb->destructor;

2494

DEV_GSO_CB(skb)->destructor = skb->destructor;

2495

skb->destructor = dev_gso_skb_destructor;

2495

skb->destructor = dev_gso_skb_destructor;

2496

2497

return 0;

2497

return 0;

2498

}

2498

}

2499

2500

static netdev_features_t harmonize_features(struct sk_buff *skb,

2500

static netdev_features_t harmonize_features(struct sk_buff *skb,

2501

const struct net_device *dev,

2501

const struct net_device *dev,

2502

netdev_features_t features)

2502

netdev_features_t features)

2503

{

2503

{

2504

int tmp;

2504

int tmp;

2505

2506

if (skb->ip_summed != CHECKSUM_NONE &&

2506

if (skb->ip_summed != CHECKSUM_NONE &&

2507

!can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) {

2507

!can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) {

2508

features &= ~NETIF_F_ALL_CSUM;

2508

features &= ~NETIF_F_ALL_CSUM;

2509

} else if (illegal_highdma(dev, skb)) {

2509

} else if (illegal_highdma(dev, skb)) {

2510

features &= ~NETIF_F_SG;

2510

features &= ~NETIF_F_SG;

2511

}

2511

}

2512

2513

return features;

2513

return features;

2514

}

2514

}

2515

2516

netdev_features_t netif_skb_dev_features(struct sk_buff *skb,

2516

netdev_features_t netif_skb_dev_features(struct sk_buff *skb,

2517

const struct net_device *dev)

2517

const struct net_device *dev)

2518

{

2518

{

2519

__be16 protocol = skb->protocol;

2519

__be16 protocol = skb->protocol;

2520

netdev_features_t features = dev->features;

2520

netdev_features_t features = dev->features;

2521

2522

if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)

2522

if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)

2523

features &= ~NETIF_F_GSO_MASK;

2523

features &= ~NETIF_F_GSO_MASK;

2524

2525

if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {

2525

if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {

2526

struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;

2526

struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;

2527

protocol = veh->h_vlan_encapsulated_proto;

2527

protocol = veh->h_vlan_encapsulated_proto;

2528

} else if (!vlan_tx_tag_present(skb)) {

2528

} else if (!vlan_tx_tag_present(skb)) {

2529

return harmonize_features(skb, dev, features);

2529

return harmonize_features(skb, dev, features);

2530

}

2530

}

2531

2532

features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |

2532

features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |

2533

NETIF_F_HW_VLAN_STAG_TX);

2533

NETIF_F_HW_VLAN_STAG_TX);

2534

2535

if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))

2535

if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))

2536

features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |

2536

features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |

2537

NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |

2537

NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |

2538

NETIF_F_HW_VLAN_STAG_TX;

2538

NETIF_F_HW_VLAN_STAG_TX;

2539

2540

return harmonize_features(skb, dev, features);

2540

return harmonize_features(skb, dev, features);

2541

}

2541

}

2542

EXPORT_SYMBOL(netif_skb_dev_features);

2542

EXPORT_SYMBOL(netif_skb_dev_features);

2543

2544

int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,

2544

int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,

2545

struct netdev_queue *txq)

2545

struct netdev_queue *txq)

2546

{

2546

{

2547

const struct net_device_ops *ops = dev->netdev_ops;

2547

const struct net_device_ops *ops = dev->netdev_ops;

2548

int rc = NETDEV_TX_OK;

2548

int rc = NETDEV_TX_OK;

2549

unsigned int skb_len;

2549

unsigned int skb_len;

2550

2551

if (likely(!skb->next)) {

2551

if (likely(!skb->next)) {

2552

netdev_features_t features;

2552

netdev_features_t features;

2553

2554

/*

2554

/*

2555

* If device doesn't need skb->dst, release it right now while

2555

* If device doesn't need skb->dst, release it right now while

2556

* its hot in this cpu cache

2556

* its hot in this cpu cache

2557

*/

2557

*/

2558

if (dev->priv_flags & IFF_XMIT_DST_RELEASE)

2558

if (dev->priv_flags & IFF_XMIT_DST_RELEASE)

2559

skb_dst_drop(skb);

2559

skb_dst_drop(skb);

2560

2561

features = netif_skb_features(skb);

2561

features = netif_skb_features(skb);

2562

2563

if (vlan_tx_tag_present(skb) &&

2563

if (vlan_tx_tag_present(skb) &&

2564

!vlan_hw_offload_capable(features, skb->vlan_proto)) {

2564

!vlan_hw_offload_capable(features, skb->vlan_proto)) {

2565

skb = __vlan_put_tag(skb, skb->vlan_proto,

2565

skb = __vlan_put_tag(skb, skb->vlan_proto,

2566

vlan_tx_tag_get(skb));

2566

vlan_tx_tag_get(skb));

2567

if (unlikely(!skb))

2567

if (unlikely(!skb))

2568

goto out;

2568

goto out;

2569

2570

skb->vlan_tci = 0;

2570

skb->vlan_tci = 0;

2571

}

2571

}

2572

2573

/* If encapsulation offload request, verify we are testing

2573

/* If encapsulation offload request, verify we are testing

2574

* hardware encapsulation features instead of standard

2574

* hardware encapsulation features instead of standard

2575

* features for the netdev

2575

* features for the netdev

2576

*/

2576

*/

2577

if (skb->encapsulation)

2577

if (skb->encapsulation)

2578

features &= dev->hw_enc_features;

2578

features &= dev->hw_enc_features;

2579

2580

if (netif_needs_gso(skb, features)) {

2580

if (netif_needs_gso(skb, features)) {

2581

if (unlikely(dev_gso_segment(skb, features)))

2581

if (unlikely(dev_gso_segment(skb, features)))

2582

goto out_kfree_skb;

2582

goto out_kfree_skb;

2583

if (skb->next)

2583

if (skb->next)

2584

goto gso;

2584

goto gso;

2585

} else {

2585

} else {

2586

if (skb_needs_linearize(skb, features) &&

2586

if (skb_needs_linearize(skb, features) &&

2587

__skb_linearize(skb))

2587

__skb_linearize(skb))

2588

goto out_kfree_skb;

2588

goto out_kfree_skb;

2589

2590

/* If packet is not checksummed and device does not

2590

/* If packet is not checksummed and device does not

2591

* support checksumming for this protocol, complete

2591

* support checksumming for this protocol, complete

2592

* checksumming here.

2592

* checksumming here.

2593

*/

2593

*/

2594

if (skb->ip_summed == CHECKSUM_PARTIAL) {

2594

if (skb->ip_summed == CHECKSUM_PARTIAL) {

2595

if (skb->encapsulation)

2595

if (skb->encapsulation)

2596

skb_set_inner_transport_header(skb,

2596

skb_set_inner_transport_header(skb,

2597

skb_checksum_start_offset(skb));

2597

skb_checksum_start_offset(skb));

2598

else

2598

else

2599

skb_set_transport_header(skb,

2599

skb_set_transport_header(skb,

2600

skb_checksum_start_offset(skb));

2600

skb_checksum_start_offset(skb));

2601

if (!(features & NETIF_F_ALL_CSUM) &&

2601

if (!(features & NETIF_F_ALL_CSUM) &&

2602

skb_checksum_help(skb))

2602

skb_checksum_help(skb))

2603

goto out_kfree_skb;

2603

goto out_kfree_skb;

2604

}

2604

}

2605

}

2605

}

2606

2607

if (!list_empty(&ptype_all))

2607

if (!list_empty(&ptype_all))

2608

dev_queue_xmit_nit(skb, dev);

2608

dev_queue_xmit_nit(skb, dev);

2609

2610

skb_len = skb->len;

2610

skb_len = skb->len;

2611

trace_net_dev_start_xmit(skb, dev);

2611

trace_net_dev_start_xmit(skb, dev);

2612

rc = ops->ndo_start_xmit(skb, dev);

2612

rc = ops->ndo_start_xmit(skb, dev);

2613

trace_net_dev_xmit(skb, rc, dev, skb_len);

2613

trace_net_dev_xmit(skb, rc, dev, skb_len);

2614

if (rc == NETDEV_TX_OK)

2614

if (rc == NETDEV_TX_OK)

2615

txq_trans_update(txq);

2615

txq_trans_update(txq);

2616

return rc;

2616

return rc;

2617

}

2617

}

2618

2619

gso:

2619

gso:

2620

do {

2620

do {

2621

struct sk_buff *nskb = skb->next;

2621

struct sk_buff *nskb = skb->next;

2622

2623

skb->next = nskb->next;

2623

skb->next = nskb->next;

2624

nskb->next = NULL;

2624

nskb->next = NULL;

2625

2626

if (!list_empty(&ptype_all))

2626

if (!list_empty(&ptype_all))

2627

dev_queue_xmit_nit(nskb, dev);

2627

dev_queue_xmit_nit(nskb, dev);

2628

2629

skb_len = nskb->len;

2629

skb_len = nskb->len;

2630

trace_net_dev_start_xmit(nskb, dev);

2630

trace_net_dev_start_xmit(nskb, dev);

2631

rc = ops->ndo_start_xmit(nskb, dev);

2631

rc = ops->ndo_start_xmit(nskb, dev);

2632

trace_net_dev_xmit(nskb, rc, dev, skb_len);

2632

trace_net_dev_xmit(nskb, rc, dev, skb_len);

2633

if (unlikely(rc != NETDEV_TX_OK)) {

2633

if (unlikely(rc != NETDEV_TX_OK)) {

2634

if (rc & ~NETDEV_TX_MASK)

2634

if (rc & ~NETDEV_TX_MASK)

2635

goto out_kfree_gso_skb;

2635

goto out_kfree_gso_skb;

2636

nskb->next = skb->next;

2636

nskb->next = skb->next;

2637

skb->next = nskb;

2637

skb->next = nskb;

2638

return rc;

2638

return rc;

2639

}

2639

}

2640

txq_trans_update(txq);

2640

txq_trans_update(txq);

2641

if (unlikely(netif_xmit_stopped(txq) && skb->next))

2641

if (unlikely(netif_xmit_stopped(txq) && skb->next))

2642

return NETDEV_TX_BUSY;

2642

return NETDEV_TX_BUSY;

2643

} while (skb->next);

2643

} while (skb->next);

2644

2645

out_kfree_gso_skb:

2645

out_kfree_gso_skb:

2646

if (likely(skb->next == NULL)) {

2646

if (likely(skb->next == NULL)) {

2647

skb->destructor = DEV_GSO_CB(skb)->destructor;

2647

skb->destructor = DEV_GSO_CB(skb)->destructor;

2648

consume_skb(skb);

2648

consume_skb(skb);

2649

return rc;

2649

return rc;

2650

}

2650

}

2651

out_kfree_skb:

2651

out_kfree_skb:

2652

kfree_skb(skb);

2652

kfree_skb(skb);

2653

out:

2653

out:

2654

return rc;

2654

return rc;

2655

}

2655

}

2656

EXPORT_SYMBOL_GPL(dev_hard_start_xmit);

2656

EXPORT_SYMBOL_GPL(dev_hard_start_xmit);

2657

2658

static void qdisc_pkt_len_init(struct sk_buff *skb)

2658

static void qdisc_pkt_len_init(struct sk_buff *skb)

2659

{

2659

{

2660

const struct skb_shared_info *shinfo = skb_shinfo(skb);

2660

const struct skb_shared_info *shinfo = skb_shinfo(skb);

2661

2662

qdisc_skb_cb(skb)->pkt_len = skb->len;

2662

qdisc_skb_cb(skb)->pkt_len = skb->len;

2663

2664

/* To get more precise estimation of bytes sent on wire,

2664

/* To get more precise estimation of bytes sent on wire,

2665

* we add to pkt_len the headers size of all segments

2665

* we add to pkt_len the headers size of all segments

2666

*/

2666

*/

2667

if (shinfo->gso_size) {

2667

if (shinfo->gso_size) {

2668

unsigned int hdr_len;

2668

unsigned int hdr_len;

2669

u16 gso_segs = shinfo->gso_segs;

2669

u16 gso_segs = shinfo->gso_segs;

2670

2671

/* mac layer + network layer */

2671

/* mac layer + network layer */

2672

hdr_len = skb_transport_header(skb) - skb_mac_header(skb);

2672

hdr_len = skb_transport_header(skb) - skb_mac_header(skb);

2673

2674

/* + transport layer */

2674

/* + transport layer */

2675

if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))

2675

if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))

2676

hdr_len += tcp_hdrlen(skb);

2676

hdr_len += tcp_hdrlen(skb);

2677

else

2677

else

2678

hdr_len += sizeof(struct udphdr);

2678

hdr_len += sizeof(struct udphdr);

2679

2680

if (shinfo->gso_type & SKB_GSO_DODGY)

2680

if (shinfo->gso_type & SKB_GSO_DODGY)

2681

gso_segs = DIV_ROUND_UP(skb->len - hdr_len,

2681

gso_segs = DIV_ROUND_UP(skb->len - hdr_len,

2682

shinfo->gso_size);

2682

shinfo->gso_size);

2683

2684

qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;

2684

qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;

2685

}

2685

}

2686

}

2686

}

2687

2688

static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,

2688

static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,

2689

struct net_device *dev,

2689

struct net_device *dev,

2690

struct netdev_queue *txq)

2690

struct netdev_queue *txq)

2691

{

2691

{

2692

spinlock_t *root_lock = qdisc_lock(q);

2692

spinlock_t *root_lock = qdisc_lock(q);

2693

bool contended;

2693

bool contended;

2694

int rc;

2694

int rc;

2695

2696

qdisc_pkt_len_init(skb);

2696

qdisc_pkt_len_init(skb);

2697

qdisc_calculate_pkt_len(skb, q);

2697

qdisc_calculate_pkt_len(skb, q);

2698

/*

2698

/*

2699

* Heuristic to force contended enqueues to serialize on a

2699

* Heuristic to force contended enqueues to serialize on a

2700

* separate lock before trying to get qdisc main lock.

2700

* separate lock before trying to get qdisc main lock.

2701

* This permits __QDISC_STATE_RUNNING owner to get the lock more often

2701

* This permits __QDISC_STATE_RUNNING owner to get the lock more often

2702

* and dequeue packets faster.

2702

* and dequeue packets faster.

2703

*/

2703

*/

2704

contended = qdisc_is_running(q);

2704

contended = qdisc_is_running(q);

2705

if (unlikely(contended))

2705

if (unlikely(contended))

2706

spin_lock(&q->busylock);

2706

spin_lock(&q->busylock);

2707

2708

spin_lock(root_lock);

2708

spin_lock(root_lock);

2709

if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {

2709

if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {

2710

kfree_skb(skb);

2710

kfree_skb(skb);

2711

rc = NET_XMIT_DROP;

2711

rc = NET_XMIT_DROP;

2712

} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&

2712

} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&

2713

qdisc_run_begin(q)) {

2713

qdisc_run_begin(q)) {

2714

/*

2714

/*

2715

* This is a work-conserving queue; there are no old skbs

2715

* This is a work-conserving queue; there are no old skbs

2716

* waiting to be sent out; and the qdisc is not running -

2716

* waiting to be sent out; and the qdisc is not running -

2717

* xmit the skb directly.

2717

* xmit the skb directly.

2718

*/

2718

*/

2719

if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))

2719

if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))

2720

skb_dst_force(skb);

2720

skb_dst_force(skb);

2721

2722

qdisc_bstats_update(q, skb);

2722

qdisc_bstats_update(q, skb);

2723

2724

if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {

2724

if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {

2725

if (unlikely(contended)) {

2725

if (unlikely(contended)) {

2726

spin_unlock(&q->busylock);

2726

spin_unlock(&q->busylock);

2727

contended = false;

2727

contended = false;

2728

}

2728

}

2729

__qdisc_run(q);

2729

__qdisc_run(q);

2730

} else

2730

} else

2731

qdisc_run_end(q);

2731

qdisc_run_end(q);

2732

2733

rc = NET_XMIT_SUCCESS;

2733

rc = NET_XMIT_SUCCESS;

2734

} else {

2734

} else {

2735

skb_dst_force(skb);

2735

skb_dst_force(skb);

2736

rc = q->enqueue(skb, q) & NET_XMIT_MASK;

2736

rc = q->enqueue(skb, q) & NET_XMIT_MASK;

2737

if (qdisc_run_begin(q)) {

2737

if (qdisc_run_begin(q)) {

2738

if (unlikely(contended)) {

2738

if (unlikely(contended)) {

2739

spin_unlock(&q->busylock);

2739

spin_unlock(&q->busylock);

2740

contended = false;

2740

contended = false;

2741

}

2741

}

2742

__qdisc_run(q);

2742

__qdisc_run(q);

2743

}

2743

}

2744

}

2744

}

2745

spin_unlock(root_lock);

2745

spin_unlock(root_lock);

2746

if (unlikely(contended))

2746

if (unlikely(contended))

2747

spin_unlock(&q->busylock);

2747

spin_unlock(&q->busylock);

2748

return rc;

2748

return rc;

2749

}

2749

}

2750

2751

#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)

2751

#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)

2752

static void skb_update_prio(struct sk_buff *skb)

2752

static void skb_update_prio(struct sk_buff *skb)

2753

{

2753

{

2754

struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);

2754

struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);

2755

2756

if (!skb->priority && skb->sk && map) {

2756

if (!skb->priority && skb->sk && map) {

2757

unsigned int prioidx = skb->sk->sk_cgrp_prioidx;

2757

unsigned int prioidx = skb->sk->sk_cgrp_prioidx;

2758

2759

if (prioidx < map->priomap_len)

2759

if (prioidx < map->priomap_len)

2760

skb->priority = map->priomap[prioidx];

2760

skb->priority = map->priomap[prioidx];

2761

}

2761

}

2762

}

2762

}

2763

#else

2763

#else

2764

#define skb_update_prio(skb)

2764

#define skb_update_prio(skb)

2765

#endif

2765

#endif

2766

2767

static DEFINE_PER_CPU(int, xmit_recursion);

2767

static DEFINE_PER_CPU(int, xmit_recursion);

2768

#define RECURSION_LIMIT 10

2768

#define RECURSION_LIMIT 10

2769

2770

/**

2770

/**

2771

* dev_loopback_xmit - loop back @skb

2771

* dev_loopback_xmit - loop back @skb

2772

* @skb: buffer to transmit

2772

* @skb: buffer to transmit

2773

*/

2773

*/

2774

int dev_loopback_xmit(struct sk_buff *skb)

2774

int dev_loopback_xmit(struct sk_buff *skb)

2775

{

2775

{

2776

skb_reset_mac_header(skb);

2776

skb_reset_mac_header(skb);

2777

__skb_pull(skb, skb_network_offset(skb));

2777

__skb_pull(skb, skb_network_offset(skb));

2778

skb->pkt_type = PACKET_LOOPBACK;

2778

skb->pkt_type = PACKET_LOOPBACK;

2779

skb->ip_summed = CHECKSUM_UNNECESSARY;

2779

skb->ip_summed = CHECKSUM_UNNECESSARY;

2780

WARN_ON(!skb_dst(skb));

2780

WARN_ON(!skb_dst(skb));

2781

skb_dst_force(skb);

2781

skb_dst_force(skb);

2782

netif_rx_ni(skb);

2782

netif_rx_ni(skb);

2783

return 0;

2783

return 0;

2784

}

2784

}

2785

EXPORT_SYMBOL(dev_loopback_xmit);

2785

EXPORT_SYMBOL(dev_loopback_xmit);

2786

2787

/**

2787

/**

2788

* __dev_queue_xmit - transmit a buffer

2788

* __dev_queue_xmit - transmit a buffer

2789

* @skb: buffer to transmit

2789

* @skb: buffer to transmit

2790

* @accel_priv: private data used for L2 forwarding offload

2790

* @accel_priv: private data used for L2 forwarding offload

2791

*

2791

*

2792

* Queue a buffer for transmission to a network device. The caller must

2792

* Queue a buffer for transmission to a network device. The caller must

2793

* have set the device and priority and built the buffer before calling

2793

* have set the device and priority and built the buffer before calling

2794

* this function. The function can be called from an interrupt.

2794

* this function. The function can be called from an interrupt.

2795

*

2795

*

2796

* A negative errno code is returned on a failure. A success does not

2796

* A negative errno code is returned on a failure. A success does not

2797

* guarantee the frame will be transmitted as it may be dropped due

2797

* guarantee the frame will be transmitted as it may be dropped due

2798

* to congestion or traffic shaping.

2798

* to congestion or traffic shaping.

2799

*

2799

*

2800

* -----------------------------------------------------------------------------------

2800

* -----------------------------------------------------------------------------------

2801

* I notice this method can also return errors from the queue disciplines,

2801

* I notice this method can also return errors from the queue disciplines,

2802

* including NET_XMIT_DROP, which is a positive value. So, errors can also

2802

* including NET_XMIT_DROP, which is a positive value. So, errors can also

2803

* be positive.

2803

* be positive.

2804

*

2804

*

2805

* Regardless of the return value, the skb is consumed, so it is currently

2805

* Regardless of the return value, the skb is consumed, so it is currently

2806

* difficult to retry a send to this method. (You can bump the ref count

2806

* difficult to retry a send to this method. (You can bump the ref count

2807

* before sending to hold a reference for retry if you are careful.)

2807

* before sending to hold a reference for retry if you are careful.)

2808

*

2808

*

2809

* When calling this method, interrupts MUST be enabled. This is because

2809

* When calling this method, interrupts MUST be enabled. This is because

2810

* the BH enable code must have IRQs enabled so that it will not deadlock.

2810

* the BH enable code must have IRQs enabled so that it will not deadlock.

2811

* --BLG

2811

* --BLG

2812

*/

2812

*/

2813

static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)

2813

static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)

2814

{

2814

{

2815

struct net_device *dev = skb->dev;

2815

struct net_device *dev = skb->dev;

2816

struct netdev_queue *txq;

2816

struct netdev_queue *txq;

2817

struct Qdisc *q;

2817

struct Qdisc *q;

2818

int rc = -ENOMEM;

2818

int rc = -ENOMEM;

2819

2820

skb_reset_mac_header(skb);

2820

skb_reset_mac_header(skb);

2821

2822

/* Disable soft irqs for various locks below. Also

2822

/* Disable soft irqs for various locks below. Also

2823

* stops preemption for RCU.

2823

* stops preemption for RCU.

2824

*/

2824

*/

2825

rcu_read_lock_bh();

2825

rcu_read_lock_bh();

2826

2827

skb_update_prio(skb);

2827

skb_update_prio(skb);

2828

2829

txq = netdev_pick_tx(dev, skb, accel_priv);

2829

txq = netdev_pick_tx(dev, skb, accel_priv);

2830

q = rcu_dereference_bh(txq->qdisc);

2830

q = rcu_dereference_bh(txq->qdisc);

2831

2832

#ifdef CONFIG_NET_CLS_ACT

2832

#ifdef CONFIG_NET_CLS_ACT

2833

skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);

2833

skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);

2834

#endif

2834

#endif

2835

trace_net_dev_queue(skb);

2835

trace_net_dev_queue(skb);

2836

if (q->enqueue) {

2836

if (q->enqueue) {

2837

rc = __dev_xmit_skb(skb, q, dev, txq);

2837

rc = __dev_xmit_skb(skb, q, dev, txq);

2838

goto out;

2838

goto out;

2839

}

2839

}

2840

2841

/* The device has no queue. Common case for software devices:

2841

/* The device has no queue. Common case for software devices:

2842

loopback, all the sorts of tunnels...

2842

loopback, all the sorts of tunnels...

2843

2844

Really, it is unlikely that netif_tx_lock protection is necessary

2844

Really, it is unlikely that netif_tx_lock protection is necessary

2845

here. (f.e. loopback and IP tunnels are clean ignoring statistics

2845

here. (f.e. loopback and IP tunnels are clean ignoring statistics

2846

counters.)

2846

counters.)

2847

However, it is possible, that they rely on protection

2847

However, it is possible, that they rely on protection

2848

made by us here.

2848

made by us here.

2849

2850

Check this and shot the lock. It is not prone from deadlocks.

2850

Check this and shot the lock. It is not prone from deadlocks.

2851

Either shot noqueue qdisc, it is even simpler 8)

2851

Either shot noqueue qdisc, it is even simpler 8)

2852

*/

2852

*/

2853

if (dev->flags & IFF_UP) {

2853

if (dev->flags & IFF_UP) {

2854

int cpu = smp_processor_id(); /* ok because BHs are off */

2854

int cpu = smp_processor_id(); /* ok because BHs are off */

2855

2856

if (txq->xmit_lock_owner != cpu) {

2856

if (txq->xmit_lock_owner != cpu) {

2857

2858

if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)

2858

if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)

2859

goto recursion_alert;

2859

goto recursion_alert;

2860

2861

HARD_TX_LOCK(dev, txq, cpu);

2861

HARD_TX_LOCK(dev, txq, cpu);

2862

2863

if (!netif_xmit_stopped(txq)) {

2863

if (!netif_xmit_stopped(txq)) {

2864

__this_cpu_inc(xmit_recursion);

2864

__this_cpu_inc(xmit_recursion);

2865

rc = dev_hard_start_xmit(skb, dev, txq);

2865

rc = dev_hard_start_xmit(skb, dev, txq);

2866

__this_cpu_dec(xmit_recursion);

2866

__this_cpu_dec(xmit_recursion);

2867

if (dev_xmit_complete(rc)) {

2867

if (dev_xmit_complete(rc)) {

2868

HARD_TX_UNLOCK(dev, txq);

2868

HARD_TX_UNLOCK(dev, txq);

2869

goto out;

2869

goto out;

2870

}

2870

}

2871

}

2871

}

2872

HARD_TX_UNLOCK(dev, txq);

2872

HARD_TX_UNLOCK(dev, txq);

2873

net_crit_ratelimited("Virtual device %s asks to queue packet!\n",

2873

net_crit_ratelimited("Virtual device %s asks to queue packet!\n",

2874

dev->name);

2874

dev->name);

2875

} else {

2875

} else {

2876

/* Recursion is detected! It is possible,

2876

/* Recursion is detected! It is possible,

2877

* unfortunately

2877

* unfortunately

2878

*/

2878

*/

2879

recursion_alert:

2879

recursion_alert:

2880

net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",

2880

net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",

2881

dev->name);

2881

dev->name);

2882

}

2882

}

2883

}

2883

}

2884

2885

rc = -ENETDOWN;

2885

rc = -ENETDOWN;

2886

rcu_read_unlock_bh();

2886

rcu_read_unlock_bh();

2887

2888

kfree_skb(skb);

2888

kfree_skb(skb);

2889

return rc;

2889

return rc;

2890

out:

2890

out:

2891

rcu_read_unlock_bh();

2891

rcu_read_unlock_bh();

2892

return rc;

2892

return rc;

2893

}

2893

}

2894

2895

int dev_queue_xmit(struct sk_buff *skb)

2895

int dev_queue_xmit(struct sk_buff *skb)

2896

{

2896

{

2897

return __dev_queue_xmit(skb, NULL);

2897

return __dev_queue_xmit(skb, NULL);

2898

}

2898

}

2899

EXPORT_SYMBOL(dev_queue_xmit);

2899

EXPORT_SYMBOL(dev_queue_xmit);

2900

2901

int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)

2901

int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)

2902

{

2902

{

2903

return __dev_queue_xmit(skb, accel_priv);

2903

return __dev_queue_xmit(skb, accel_priv);

2904

}

2904

}

2905

EXPORT_SYMBOL(dev_queue_xmit_accel);

2905

EXPORT_SYMBOL(dev_queue_xmit_accel);

2906

2907

2908

/*=======================================================================

2908

/*=======================================================================

2909

Receiver routines

2909

Receiver routines

2910

=======================================================================*/

2910

=======================================================================*/

2911

2912

int netdev_max_backlog __read_mostly = 1000;

2912

int netdev_max_backlog __read_mostly = 1000;

2913

EXPORT_SYMBOL(netdev_max_backlog);

2913

EXPORT_SYMBOL(netdev_max_backlog);

2914

2915

int netdev_tstamp_prequeue __read_mostly = 1;

2915

int netdev_tstamp_prequeue __read_mostly = 1;

2916

int netdev_budget __read_mostly = 300;

2916

int netdev_budget __read_mostly = 300;

2917

int weight_p __read_mostly = 64; /* old backlog weight */

2917

int weight_p __read_mostly = 64; /* old backlog weight */

2918

2919

/* Called with irq disabled */

2919

/* Called with irq disabled */

2920

static inline void ____napi_schedule(struct softnet_data *sd,

2920

static inline void ____napi_schedule(struct softnet_data *sd,

2921

struct napi_struct *napi)

2921

struct napi_struct *napi)

2922

{

2922

{

2923

list_add_tail(&napi->poll_list, &sd->poll_list);

2923

list_add_tail(&napi->poll_list, &sd->poll_list);

2924

__raise_softirq_irqoff(NET_RX_SOFTIRQ);

2924

__raise_softirq_irqoff(NET_RX_SOFTIRQ);

2925

}

2925

}

2926

2927

#ifdef CONFIG_RPS

2927

#ifdef CONFIG_RPS

2928

2929

/* One global table that all flow-based protocols share. */

2929

/* One global table that all flow-based protocols share. */

2930

struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;

2930

struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;

2931

EXPORT_SYMBOL(rps_sock_flow_table);

2931

EXPORT_SYMBOL(rps_sock_flow_table);

2932

2933

struct static_key rps_needed __read_mostly;

2933

struct static_key rps_needed __read_mostly;

2934

2935

static struct rps_dev_flow *

2935

static struct rps_dev_flow *

2936

set_rps_cpu(struct net_device *dev, struct sk_buff *skb,

2936

set_rps_cpu(struct net_device *dev, struct sk_buff *skb,

2937

struct rps_dev_flow *rflow, u16 next_cpu)

2937

struct rps_dev_flow *rflow, u16 next_cpu)

2938

{

2938

{

2939

if (next_cpu != RPS_NO_CPU) {

2939

if (next_cpu != RPS_NO_CPU) {

2940

#ifdef CONFIG_RFS_ACCEL

2940

#ifdef CONFIG_RFS_ACCEL

2941

struct netdev_rx_queue *rxqueue;

2941

struct netdev_rx_queue *rxqueue;

2942

struct rps_dev_flow_table *flow_table;

2942

struct rps_dev_flow_table *flow_table;

2943

struct rps_dev_flow *old_rflow;

2943

struct rps_dev_flow *old_rflow;

2944

u32 flow_id;

2944

u32 flow_id;

2945

u16 rxq_index;

2945

u16 rxq_index;

2946

int rc;

2946

int rc;

2947

2948

/* Should we steer this flow to a different hardware queue? */

2948

/* Should we steer this flow to a different hardware queue? */

2949

if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||

2949

if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||

2950

!(dev->features & NETIF_F_NTUPLE))

2950

!(dev->features & NETIF_F_NTUPLE))

2951

goto out;

2951

goto out;

2952

rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);

2952

rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);

2953

if (rxq_index == skb_get_rx_queue(skb))

2953

if (rxq_index == skb_get_rx_queue(skb))

2954

goto out;

2954

goto out;

2955

2956

rxqueue = dev->_rx + rxq_index;

2956

rxqueue = dev->_rx + rxq_index;

2957

flow_table = rcu_dereference(rxqueue->rps_flow_table);

2957

flow_table = rcu_dereference(rxqueue->rps_flow_table);

2958

if (!flow_table)

2958

if (!flow_table)

2959

goto out;

2959

goto out;

2960

flow_id = skb->rxhash & flow_table->mask;

2960

flow_id = skb->rxhash & flow_table->mask;

2961

rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,

2961

rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,

2962

rxq_index, flow_id);

2962

rxq_index, flow_id);

2963

if (rc < 0)

2963

if (rc < 0)

2964

goto out;

2964

goto out;

2965

old_rflow = rflow;

2965

old_rflow = rflow;

2966

rflow = &flow_table->flows[flow_id];

2966

rflow = &flow_table->flows[flow_id];

2967

rflow->filter = rc;

2967

rflow->filter = rc;

2968

if (old_rflow->filter == rflow->filter)

2968

if (old_rflow->filter == rflow->filter)

2969

old_rflow->filter = RPS_NO_FILTER;

2969

old_rflow->filter = RPS_NO_FILTER;

2970

out:

2970

out:

2971

#endif

2971

#endif

2972

rflow->last_qtail =

2972

rflow->last_qtail =

2973

per_cpu(softnet_data, next_cpu).input_queue_head;

2973

per_cpu(softnet_data, next_cpu).input_queue_head;

2974

}

2974

}

2975

2976

rflow->cpu = next_cpu;

2976

rflow->cpu = next_cpu;

2977

return rflow;

2977

return rflow;

2978

}

2978

}

2979

2980

/*

2980

/*

2981

* get_rps_cpu is called from netif_receive_skb and returns the target

2981

* get_rps_cpu is called from netif_receive_skb and returns the target

2982

* CPU from the RPS map of the receiving queue for a given skb.

2982

* CPU from the RPS map of the receiving queue for a given skb.

2983

* rcu_read_lock must be held on entry.

2983

* rcu_read_lock must be held on entry.

2984

*/

2984

*/

2985

static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,

2985

static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,

2986

struct rps_dev_flow **rflowp)

2986

struct rps_dev_flow **rflowp)

2987

{

2987

{

2988

struct netdev_rx_queue *rxqueue;

2988

struct netdev_rx_queue *rxqueue;

2989

struct rps_map *map;

2989

struct rps_map *map;

2990

struct rps_dev_flow_table *flow_table;

2990

struct rps_dev_flow_table *flow_table;

2991

struct rps_sock_flow_table *sock_flow_table;

2991

struct rps_sock_flow_table *sock_flow_table;

2992

int cpu = -1;

2992

int cpu = -1;

2993

u16 tcpu;

2993

u16 tcpu;

2994

2995

if (skb_rx_queue_recorded(skb)) {

2995

if (skb_rx_queue_recorded(skb)) {

2996

u16 index = skb_get_rx_queue(skb);

2996

u16 index = skb_get_rx_queue(skb);

2997

if (unlikely(index >= dev->real_num_rx_queues)) {

2997

if (unlikely(index >= dev->real_num_rx_queues)) {

2998

WARN_ONCE(dev->real_num_rx_queues > 1,

2998

WARN_ONCE(dev->real_num_rx_queues > 1,

2999

"%s received packet on queue %u, but number "

2999

"%s received packet on queue %u, but number "

3000

"of RX queues is %u\n",

3000

"of RX queues is %u\n",

3001

dev->name, index, dev->real_num_rx_queues);

3001

dev->name, index, dev->real_num_rx_queues);

3002

goto done;

3002

goto done;

3003

}

3003

}

3004

rxqueue = dev->_rx + index;

3004

rxqueue = dev->_rx + index;

3005

} else

3005

} else

3006

rxqueue = dev->_rx;

3006

rxqueue = dev->_rx;

3007

3008

map = rcu_dereference(rxqueue->rps_map);

3008

map = rcu_dereference(rxqueue->rps_map);

3009

if (map) {

3009

if (map) {

3010

if (map->len == 1 &&

3010

if (map->len == 1 &&

3011

!rcu_access_pointer(rxqueue->rps_flow_table)) {

3011

!rcu_access_pointer(rxqueue->rps_flow_table)) {

3012

tcpu = map->cpus[0];

3012

tcpu = map->cpus[0];

3013

if (cpu_online(tcpu))

3013

if (cpu_online(tcpu))

3014

cpu = tcpu;

3014

cpu = tcpu;

3015

goto done;

3015

goto done;

3016

}

3016

}

3017

} else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {

3017

} else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {

3018

goto done;

3018

goto done;

3019

}

3019

}

3020

3021

skb_reset_network_header(skb);

3021

skb_reset_network_header(skb);

3022

if (!skb_get_hash(skb))

3022

if (!skb_get_hash(skb))

3023

goto done;

3023

goto done;

3024

3025

flow_table = rcu_dereference(rxqueue->rps_flow_table);

3025

flow_table = rcu_dereference(rxqueue->rps_flow_table);

3026

sock_flow_table = rcu_dereference(rps_sock_flow_table);

3026

sock_flow_table = rcu_dereference(rps_sock_flow_table);

3027

if (flow_table && sock_flow_table) {

3027

if (flow_table && sock_flow_table) {

3028

u16 next_cpu;

3028

u16 next_cpu;

3029

struct rps_dev_flow *rflow;

3029

struct rps_dev_flow *rflow;

3030

3031

rflow = &flow_table->flows[skb->rxhash & flow_table->mask];

3031

rflow = &flow_table->flows[skb->rxhash & flow_table->mask];

3032

tcpu = rflow->cpu;

3032

tcpu = rflow->cpu;

3033

3034

next_cpu = sock_flow_table->ents[skb->rxhash &

3034

next_cpu = sock_flow_table->ents[skb->rxhash &

3035

sock_flow_table->mask];

3035

sock_flow_table->mask];

3036

3037

/*

3037

/*

3038

* If the desired CPU (where last recvmsg was done) is

3038

* If the desired CPU (where last recvmsg was done) is

3039

* different from current CPU (one in the rx-queue flow

3039

* different from current CPU (one in the rx-queue flow

3040

* table entry), switch if one of the following holds:

3040

* table entry), switch if one of the following holds:

3041

* - Current CPU is unset (equal to RPS_NO_CPU).

3041

* - Current CPU is unset (equal to RPS_NO_CPU).

3042

* - Current CPU is offline.

3042

* - Current CPU is offline.

3043

* - The current CPU's queue tail has advanced beyond the

3043

* - The current CPU's queue tail has advanced beyond the

3044

* last packet that was enqueued using this table entry.

3044

* last packet that was enqueued using this table entry.

3045

* This guarantees that all previous packets for the flow

3045

* This guarantees that all previous packets for the flow

3046

* have been dequeued, thus preserving in order delivery.

3046

* have been dequeued, thus preserving in order delivery.

3047

*/

3047

*/

3048

if (unlikely(tcpu != next_cpu) &&

3048

if (unlikely(tcpu != next_cpu) &&

3049

(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||

3049

(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||

3050

((int)(per_cpu(softnet_data, tcpu).input_queue_head -

3050

((int)(per_cpu(softnet_data, tcpu).input_queue_head -

3051

rflow->last_qtail)) >= 0)) {

3051

rflow->last_qtail)) >= 0)) {

3052

tcpu = next_cpu;

3052

tcpu = next_cpu;

3053

rflow = set_rps_cpu(dev, skb, rflow, next_cpu);

3053

rflow = set_rps_cpu(dev, skb, rflow, next_cpu);

3054

}

3054

}

3055

3056

if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {

3056

if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {

3057

*rflowp = rflow;

3057

*rflowp = rflow;

3058

cpu = tcpu;

3058

cpu = tcpu;

3059

goto done;

3059

goto done;

3060

}

3060

}

3061

}

3061

}

3062

3063

if (map) {

3063

if (map) {

3064

tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];

3064

tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];

3065

3066

if (cpu_online(tcpu)) {

3066

if (cpu_online(tcpu)) {

3067

cpu = tcpu;

3067

cpu = tcpu;

3068

goto done;

3068

goto done;

3069

}

3069

}

3070

}

3070

}

3071

3072

done:

3072

done:

3073

return cpu;

3073

return cpu;

3074

}

3074

}

3075

3076

#ifdef CONFIG_RFS_ACCEL

3076

#ifdef CONFIG_RFS_ACCEL

3077

3078

/**

3078

/**

3079

* rps_may_expire_flow - check whether an RFS hardware filter may be removed

3079

* rps_may_expire_flow - check whether an RFS hardware filter may be removed

3080

* @dev: Device on which the filter was set

3080

* @dev: Device on which the filter was set

3081

* @rxq_index: RX queue index

3081

* @rxq_index: RX queue index

3082

* @flow_id: Flow ID passed to ndo_rx_flow_steer()

3082

* @flow_id: Flow ID passed to ndo_rx_flow_steer()

3083

* @filter_id: Filter ID returned by ndo_rx_flow_steer()

3083

* @filter_id: Filter ID returned by ndo_rx_flow_steer()

3084

*

3084

*

3085

* Drivers that implement ndo_rx_flow_steer() should periodically call

3085

* Drivers that implement ndo_rx_flow_steer() should periodically call

3086

* this function for each installed filter and remove the filters for

3086

* this function for each installed filter and remove the filters for

3087

* which it returns %true.

3087

* which it returns %true.

3088

*/

3088

*/

3089

bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,

3089

bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,

3090

u32 flow_id, u16 filter_id)

3090

u32 flow_id, u16 filter_id)

3091

{

3091

{

3092

struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;

3092

struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;

3093

struct rps_dev_flow_table *flow_table;

3093

struct rps_dev_flow_table *flow_table;

3094

struct rps_dev_flow *rflow;

3094

struct rps_dev_flow *rflow;

3095

bool expire = true;

3095

bool expire = true;

3096

int cpu;

3096

int cpu;

3097

3098

rcu_read_lock();

3098

rcu_read_lock();

3099

flow_table = rcu_dereference(rxqueue->rps_flow_table);

3099

flow_table = rcu_dereference(rxqueue->rps_flow_table);

3100

if (flow_table && flow_id <= flow_table->mask) {

3100

if (flow_table && flow_id <= flow_table->mask) {

3101

rflow = &flow_table->flows[flow_id];

3101

rflow = &flow_table->flows[flow_id];

3102

cpu = ACCESS_ONCE(rflow->cpu);

3102

cpu = ACCESS_ONCE(rflow->cpu);

3103

if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&

3103

if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&

3104

((int)(per_cpu(softnet_data, cpu).input_queue_head -

3104

((int)(per_cpu(softnet_data, cpu).input_queue_head -

3105

rflow->last_qtail) <

3105

rflow->last_qtail) <

3106

(int)(10 * flow_table->mask)))

3106

(int)(10 * flow_table->mask)))

3107

expire = false;

3107

expire = false;

3108

}

3108

}

3109

rcu_read_unlock();

3109

rcu_read_unlock();

3110

return expire;

3110

return expire;

3111

}

3111

}

3112

EXPORT_SYMBOL(rps_may_expire_flow);

3112

EXPORT_SYMBOL(rps_may_expire_flow);

3113

3114

#endif /* CONFIG_RFS_ACCEL */

3114

#endif /* CONFIG_RFS_ACCEL */

3115

3116

/* Called from hardirq (IPI) context */

3116

/* Called from hardirq (IPI) context */

3117

static void rps_trigger_softirq(void *data)

3117

static void rps_trigger_softirq(void *data)

3118

{

3118

{

3119

struct softnet_data *sd = data;

3119

struct softnet_data *sd = data;

3120

3121

____napi_schedule(sd, &sd->backlog);

3121

____napi_schedule(sd, &sd->backlog);

3122

sd->received_rps++;

3122

sd->received_rps++;

3123

}

3123

}

3124

3125

#endif /* CONFIG_RPS */

3125

#endif /* CONFIG_RPS */

3126

3127

/*

3127

/*

3128

* Check if this softnet_data structure is another cpu one

3128

* Check if this softnet_data structure is another cpu one

3129

* If yes, queue it to our IPI list and return 1

3129

* If yes, queue it to our IPI list and return 1

3130

* If no, return 0

3130

* If no, return 0

3131

*/

3131

*/

3132

static int rps_ipi_queued(struct softnet_data *sd)

3132

static int rps_ipi_queued(struct softnet_data *sd)

3133

{

3133

{

3134

#ifdef CONFIG_RPS

3134

#ifdef CONFIG_RPS

3135

struct softnet_data *mysd = &__get_cpu_var(softnet_data);

3135

struct softnet_data *mysd = &__get_cpu_var(softnet_data);

3136

3137

if (sd != mysd) {

3137

if (sd != mysd) {

3138

sd->rps_ipi_next = mysd->rps_ipi_list;

3138

sd->rps_ipi_next = mysd->rps_ipi_list;

3139

mysd->rps_ipi_list = sd;

3139

mysd->rps_ipi_list = sd;

3140

3141

__raise_softirq_irqoff(NET_RX_SOFTIRQ);

3141

__raise_softirq_irqoff(NET_RX_SOFTIRQ);

3142

return 1;

3142

return 1;

3143

}

3143

}

3144

#endif /* CONFIG_RPS */

3144

#endif /* CONFIG_RPS */

3145

return 0;

3145

return 0;

3146

}

3146

}

3147

3148

#ifdef CONFIG_NET_FLOW_LIMIT

3148

#ifdef CONFIG_NET_FLOW_LIMIT

3149

int netdev_flow_limit_table_len __read_mostly = (1 << 12);

3149

int netdev_flow_limit_table_len __read_mostly = (1 << 12);

3150

#endif

3150

#endif

3151

3152

static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)

3152

static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)

3153

{

3153

{

3154

#ifdef CONFIG_NET_FLOW_LIMIT

3154

#ifdef CONFIG_NET_FLOW_LIMIT

3155

struct sd_flow_limit *fl;

3155

struct sd_flow_limit *fl;

3156

struct softnet_data *sd;

3156

struct softnet_data *sd;

3157

unsigned int old_flow, new_flow;

3157

unsigned int old_flow, new_flow;

3158

3159

if (qlen < (netdev_max_backlog >> 1))

3159

if (qlen < (netdev_max_backlog >> 1))

3160

return false;

3160

return false;

3161

3162

sd = &__get_cpu_var(softnet_data);

3162

sd = &__get_cpu_var(softnet_data);

3163

3164

rcu_read_lock();

3164

rcu_read_lock();

3165

fl = rcu_dereference(sd->flow_limit);

3165

fl = rcu_dereference(sd->flow_limit);

3166

if (fl) {

3166

if (fl) {

3167

new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);

3167

new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);

3168

old_flow = fl->history[fl->history_head];

3168

old_flow = fl->history[fl->history_head];

3169

fl->history[fl->history_head] = new_flow;

3169

fl->history[fl->history_head] = new_flow;

3170

3171

fl->history_head++;

3171

fl->history_head++;

3172

fl->history_head &= FLOW_LIMIT_HISTORY - 1;

3172

fl->history_head &= FLOW_LIMIT_HISTORY - 1;

3173

3174

if (likely(fl->buckets[old_flow]))

3174

if (likely(fl->buckets[old_flow]))

3175

fl->buckets[old_flow]--;

3175

fl->buckets[old_flow]--;

3176

3177

if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {

3177

if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {

3178

fl->count++;

3178

fl->count++;

3179

rcu_read_unlock();

3179

rcu_read_unlock();

3180

return true;

3180

return true;

3181

}

3181

}

3182

}

3182

}

3183

rcu_read_unlock();

3183

rcu_read_unlock();

3184

#endif

3184

#endif

3185

return false;

3185

return false;

3186

}

3186

}

3187

3188

/*

3188

/*

3189

* enqueue_to_backlog is called to queue an skb to a per CPU backlog

3189

* enqueue_to_backlog is called to queue an skb to a per CPU backlog

3190

* queue (may be a remote CPU queue).

3190

* queue (may be a remote CPU queue).

3191

*/

3191

*/

3192

static int enqueue_to_backlog(struct sk_buff *skb, int cpu,

3192

static int enqueue_to_backlog(struct sk_buff *skb, int cpu,

3193

unsigned int *qtail)

3193

unsigned int *qtail)

3194

{

3194

{

3195

struct softnet_data *sd;

3195

struct softnet_data *sd;

3196

unsigned long flags;

3196

unsigned long flags;

3197

unsigned int qlen;

3197

unsigned int qlen;

3198

3199

sd = &per_cpu(softnet_data, cpu);

3199

sd = &per_cpu(softnet_data, cpu);

3200

3201

local_irq_save(flags);

3201

local_irq_save(flags);

3202

3203

rps_lock(sd);

3203

rps_lock(sd);

3204

qlen = skb_queue_len(&sd->input_pkt_queue);

3204

qlen = skb_queue_len(&sd->input_pkt_queue);

3205

if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {

3205

if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {

3206

if (skb_queue_len(&sd->input_pkt_queue)) {

3206

if (skb_queue_len(&sd->input_pkt_queue)) {

3207

enqueue:

3207

enqueue:

3208

__skb_queue_tail(&sd->input_pkt_queue, skb);

3208

__skb_queue_tail(&sd->input_pkt_queue, skb);

3209

input_queue_tail_incr_save(sd, qtail);

3209

input_queue_tail_incr_save(sd, qtail);

3210

rps_unlock(sd);

3210

rps_unlock(sd);

3211

local_irq_restore(flags);

3211

local_irq_restore(flags);

3212

return NET_RX_SUCCESS;

3212

return NET_RX_SUCCESS;

3213

}

3213

}

3214

3215

/* Schedule NAPI for backlog device

3215

/* Schedule NAPI for backlog device

3216

* We can use non atomic operation since we own the queue lock

3216

* We can use non atomic operation since we own the queue lock

3217

*/

3217

*/

3218

if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {

3218

if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {

3219

if (!rps_ipi_queued(sd))

3219

if (!rps_ipi_queued(sd))

3220

____napi_schedule(sd, &sd->backlog);

3220

____napi_schedule(sd, &sd->backlog);

3221

}

3221

}

3222

goto enqueue;

3222

goto enqueue;

3223

}

3223

}

3224

3225

sd->dropped++;

3225

sd->dropped++;

3226

rps_unlock(sd);

3226

rps_unlock(sd);

3227

3228

local_irq_restore(flags);

3228

local_irq_restore(flags);

3229

3230

atomic_long_inc(&skb->dev->rx_dropped);

3230

atomic_long_inc(&skb->dev->rx_dropped);

3231

kfree_skb(skb);

3231

kfree_skb(skb);

3232

return NET_RX_DROP;

3232

return NET_RX_DROP;

3233

}

3233

}

3234

3235

static int netif_rx_internal(struct sk_buff *skb)

3235

static int netif_rx_internal(struct sk_buff *skb)

3236

{

3236

{

3237

int ret;

3237

int ret;

3238

3239

/* if netpoll wants it, pretend we never saw it */

3239

/* if netpoll wants it, pretend we never saw it */

3240

if (netpoll_rx(skb))

3240

if (netpoll_rx(skb))

3241

return NET_RX_DROP;

3241

return NET_RX_DROP;

3242

3243

net_timestamp_check(netdev_tstamp_prequeue, skb);

3243

net_timestamp_check(netdev_tstamp_prequeue, skb);

3244

3245

trace_netif_rx(skb);

3245

trace_netif_rx(skb);

3246

#ifdef CONFIG_RPS

3246

#ifdef CONFIG_RPS

3247

if (static_key_false(&rps_needed)) {

3247

if (static_key_false(&rps_needed)) {

3248

struct rps_dev_flow voidflow, *rflow = &voidflow;

3248

struct rps_dev_flow voidflow, *rflow = &voidflow;

3249

int cpu;

3249

int cpu;

3250

3251

preempt_disable();

3251

preempt_disable();

3252

rcu_read_lock();

3252

rcu_read_lock();

3253

3254

cpu = get_rps_cpu(skb->dev, skb, &rflow);

3254

cpu = get_rps_cpu(skb->dev, skb, &rflow);

3255

if (cpu < 0)

3255

if (cpu < 0)

3256

cpu = smp_processor_id();

3256

cpu = smp_processor_id();

3257

3258

ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);

3258

ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);

3259

3260

rcu_read_unlock();

3260

rcu_read_unlock();

3261

preempt_enable();

3261

preempt_enable();

3262

} else

3262

} else

3263

#endif

3263

#endif

3264

{

3264

{

3265

unsigned int qtail;

3265

unsigned int qtail;

3266

ret = enqueue_to_backlog(skb, get_cpu(), &qtail);

3266

ret = enqueue_to_backlog(skb, get_cpu(), &qtail);

3267

put_cpu();

3267

put_cpu();

3268

}

3268

}

3269

return ret;

3269

return ret;

3270

}

3270

}

3271

3272

/**

3272

/**

3273

* netif_rx - post buffer to the network code

3273

* netif_rx - post buffer to the network code

3274

* @skb: buffer to post

3274

* @skb: buffer to post

3275

*

3275

*

3276

* This function receives a packet from a device driver and queues it for

3276

* This function receives a packet from a device driver and queues it for

3277

* the upper (protocol) levels to process. It always succeeds. The buffer

3277

* the upper (protocol) levels to process. It always succeeds. The buffer

3278

* may be dropped during processing for congestion control or by the

3278

* may be dropped during processing for congestion control or by the

3279

* protocol layers.

3279

* protocol layers.

3280

*

3280

*

3281

* return values:

3281

* return values:

3282

* NET_RX_SUCCESS (no congestion)

3282

* NET_RX_SUCCESS (no congestion)

3283

* NET_RX_DROP (packet was dropped)

3283

* NET_RX_DROP (packet was dropped)

3284

*

3284

*

3285

*/

3285

*/

3286

3287

int netif_rx(struct sk_buff *skb)

3287

int netif_rx(struct sk_buff *skb)

3288

{

3288

{

3289

trace_netif_rx_entry(skb);

3289

trace_netif_rx_entry(skb);

3290

3291

return netif_rx_internal(skb);

3291

return netif_rx_internal(skb);

3292

}

3292

}

3293

EXPORT_SYMBOL(netif_rx);

3293

EXPORT_SYMBOL(netif_rx);

3294

3295

int netif_rx_ni(struct sk_buff *skb)

3295

int netif_rx_ni(struct sk_buff *skb)

3296

{

3296

{

3297

int err;

3297

int err;

3298

3299

trace_netif_rx_ni_entry(skb);

3299

trace_netif_rx_ni_entry(skb);

3300

3301

preempt_disable();

3301

preempt_disable();

3302

err = netif_rx_internal(skb);

3302

err = netif_rx_internal(skb);

3303

if (local_softirq_pending())

3303

if (local_softirq_pending())

3304

do_softirq();

3304

do_softirq();

3305

preempt_enable();

3305

preempt_enable();

3306

3307

return err;

3307

return err;

3308

}

3308

}

3309

EXPORT_SYMBOL(netif_rx_ni);

3309

EXPORT_SYMBOL(netif_rx_ni);

3310

3311

static void net_tx_action(struct softirq_action *h)

3311

static void net_tx_action(struct softirq_action *h)

3312

{

3312

{

3313

struct softnet_data *sd = &__get_cpu_var(softnet_data);

3313

struct softnet_data *sd = &__get_cpu_var(softnet_data);

3314

3315

if (sd->completion_queue) {

3315

if (sd->completion_queue) {

3316

struct sk_buff *clist;

3316

struct sk_buff *clist;

3317

3318

local_irq_disable();

3318

local_irq_disable();

3319

clist = sd->completion_queue;

3319

clist = sd->completion_queue;

3320

sd->completion_queue = NULL;

3320

sd->completion_queue = NULL;

3321

local_irq_enable();

3321

local_irq_enable();

3322

3323

while (clist) {

3323

while (clist) {

3324

struct sk_buff *skb = clist;

3324

struct sk_buff *skb = clist;

3325

clist = clist->next;

3325

clist = clist->next;

3326

3327

WARN_ON(atomic_read(&skb->users));

3327

WARN_ON(atomic_read(&skb->users));

3328

if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))

3328

if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))

3329

trace_consume_skb(skb);

3329

trace_consume_skb(skb);

3330

else

3330

else

3331

trace_kfree_skb(skb, net_tx_action);

3331

trace_kfree_skb(skb, net_tx_action);

3332

__kfree_skb(skb);

3332

__kfree_skb(skb);

3333

}

3333

}

3334

}

3334

}

3335

3336

if (sd->output_queue) {

3336

if (sd->output_queue) {

3337

struct Qdisc *head;

3337

struct Qdisc *head;

3338

3339

local_irq_disable();

3339

local_irq_disable();

3340

head = sd->output_queue;

3340

head = sd->output_queue;

3341

sd->output_queue = NULL;

3341

sd->output_queue = NULL;

3342

sd->output_queue_tailp = &sd->output_queue;

3342

sd->output_queue_tailp = &sd->output_queue;

3343

local_irq_enable();

3343

local_irq_enable();

3344

3345

while (head) {

3345

while (head) {

3346

struct Qdisc *q = head;

3346

struct Qdisc *q = head;

3347

spinlock_t *root_lock;

3347

spinlock_t *root_lock;

3348

3349

head = head->next_sched;

3349

head = head->next_sched;

3350

3351

root_lock = qdisc_lock(q);

3351

root_lock = qdisc_lock(q);

3352

if (spin_trylock(root_lock)) {

3352

if (spin_trylock(root_lock)) {

3353

smp_mb__before_clear_bit();

3353

smp_mb__before_clear_bit();

3354

clear_bit(__QDISC_STATE_SCHED,

3354

clear_bit(__QDISC_STATE_SCHED,

3355

&q->state);

3355

&q->state);

3356

qdisc_run(q);

3356

qdisc_run(q);

3357

spin_unlock(root_lock);

3357

spin_unlock(root_lock);

3358

} else {

3358

} else {

3359

if (!test_bit(__QDISC_STATE_DEACTIVATED,

3359

if (!test_bit(__QDISC_STATE_DEACTIVATED,

3360

&q->state)) {

3360

&q->state)) {

3361

__netif_reschedule(q);

3361

__netif_reschedule(q);

3362

} else {

3362

} else {

3363

smp_mb__before_clear_bit();

3363

smp_mb__before_clear_bit();

3364

clear_bit(__QDISC_STATE_SCHED,

3364

clear_bit(__QDISC_STATE_SCHED,

3365

&q->state);

3365

&q->state);

3366

}

3366

}

3367

}

3367

}

3368

}

3368

}

3369

}

3369

}

3370

}

3370

}

3371

3372

#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \

3372

#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \

3373

(defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))

3373

(defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))

3374

/* This hook is defined here for ATM LANE */

3374

/* This hook is defined here for ATM LANE */

3375

int (*br_fdb_test_addr_hook)(struct net_device *dev,

3375

int (*br_fdb_test_addr_hook)(struct net_device *dev,

3376

unsigned char *addr) __read_mostly;

3376

unsigned char *addr) __read_mostly;

3377

EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);

3377

EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);

3378

#endif

3378

#endif

3379

3380

#ifdef CONFIG_NET_CLS_ACT

3380

#ifdef CONFIG_NET_CLS_ACT

3381

/* TODO: Maybe we should just force sch_ingress to be compiled in

3381

/* TODO: Maybe we should just force sch_ingress to be compiled in

3382

* when CONFIG_NET_CLS_ACT is? otherwise some useless instructions

3382

* when CONFIG_NET_CLS_ACT is? otherwise some useless instructions

3383

* a compare and 2 stores extra right now if we dont have it on

3383

* a compare and 2 stores extra right now if we dont have it on

3384

* but have CONFIG_NET_CLS_ACT

3384

* but have CONFIG_NET_CLS_ACT

3385

* NOTE: This doesn't stop any functionality; if you dont have

3385

* NOTE: This doesn't stop any functionality; if you dont have

3386

* the ingress scheduler, you just can't add policies on ingress.

3386

* the ingress scheduler, you just can't add policies on ingress.

3387

*

3387

*

3388

*/

3388

*/

3389

static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)

3389

static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)

3390

{

3390

{

3391

struct net_device *dev = skb->dev;

3391

struct net_device *dev = skb->dev;

3392

u32 ttl = G_TC_RTTL(skb->tc_verd);

3392

u32 ttl = G_TC_RTTL(skb->tc_verd);

3393

int result = TC_ACT_OK;

3393

int result = TC_ACT_OK;

3394

struct Qdisc *q;

3394

struct Qdisc *q;

3395

3396

if (unlikely(MAX_RED_LOOP < ttl++)) {

3396

if (unlikely(MAX_RED_LOOP < ttl++)) {

3397

net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",

3397

net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",

3398

skb->skb_iif, dev->ifindex);

3398

skb->skb_iif, dev->ifindex);

3399

return TC_ACT_SHOT;

3399

return TC_ACT_SHOT;

3400

}

3400

}

3401

3402

skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);

3402

skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);

3403

skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);

3403

skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);

3404

3405

q = rxq->qdisc;

3405

q = rxq->qdisc;

3406

if (q != &noop_qdisc) {

3406

if (q != &noop_qdisc) {

3407

spin_lock(qdisc_lock(q));

3407

spin_lock(qdisc_lock(q));

3408

if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))

3408

if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))

3409

result = qdisc_enqueue_root(skb, q);

3409

result = qdisc_enqueue_root(skb, q);

3410

spin_unlock(qdisc_lock(q));

3410

spin_unlock(qdisc_lock(q));

3411

}

3411

}

3412

3413

return result;

3413

return result;

3414

}

3414

}

3415

3416

static inline struct sk_buff *handle_ing(struct sk_buff *skb,

3416

static inline struct sk_buff *handle_ing(struct sk_buff *skb,

3417

struct packet_type **pt_prev,

3417

struct packet_type **pt_prev,

3418

int *ret, struct net_device *orig_dev)

3418

int *ret, struct net_device *orig_dev)

3419

{

3419

{

3420

struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);

3420

struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);

3421

3422

if (!rxq || rxq->qdisc == &noop_qdisc)

3422

if (!rxq || rxq->qdisc == &noop_qdisc)

3423

goto out;

3423

goto out;

3424

3425

if (*pt_prev) {

3425

if (*pt_prev) {

3426

*ret = deliver_skb(skb, *pt_prev, orig_dev);

3426

*ret = deliver_skb(skb, *pt_prev, orig_dev);

3427

*pt_prev = NULL;

3427

*pt_prev = NULL;

3428

}

3428

}

3429

3430

switch (ing_filter(skb, rxq)) {

3430

switch (ing_filter(skb, rxq)) {

3431

case TC_ACT_SHOT:

3431

case TC_ACT_SHOT:

3432

case TC_ACT_STOLEN:

3432

case TC_ACT_STOLEN:

3433

kfree_skb(skb);

3433

kfree_skb(skb);

3434

return NULL;

3434

return NULL;

3435

}

3435

}

3436

3437

out:

3437

out:

3438

skb->tc_verd = 0;

3438

skb->tc_verd = 0;

3439

return skb;

3439

return skb;

3440

}

3440

}

3441

#endif

3441

#endif

3442

3443

/**

3443

/**

3444

* netdev_rx_handler_register - register receive handler

3444

* netdev_rx_handler_register - register receive handler

3445

* @dev: device to register a handler for

3445

* @dev: device to register a handler for

3446

* @rx_handler: receive handler to register

3446

* @rx_handler: receive handler to register

3447

* @rx_handler_data: data pointer that is used by rx handler

3447

* @rx_handler_data: data pointer that is used by rx handler

3448

*

3448

*

3449

* Register a receive hander for a device. This handler will then be

3449

* Register a receive hander for a device. This handler will then be

3450

* called from __netif_receive_skb. A negative errno code is returned

3450

* called from __netif_receive_skb. A negative errno code is returned

3451

* on a failure.

3451

* on a failure.

3452

*

3452

*

3453

* The caller must hold the rtnl_mutex.

3453

* The caller must hold the rtnl_mutex.

3454

*

3454

*

3455

* For a general description of rx_handler, see enum rx_handler_result.

3455

* For a general description of rx_handler, see enum rx_handler_result.

3456

*/

3456

*/

3457

int netdev_rx_handler_register(struct net_device *dev,

3457

int netdev_rx_handler_register(struct net_device *dev,

3458

rx_handler_func_t *rx_handler,

3458

rx_handler_func_t *rx_handler,

3459

void *rx_handler_data)

3459

void *rx_handler_data)

3460

{

3460

{

3461

ASSERT_RTNL();

3461

ASSERT_RTNL();

3462

3463

if (dev->rx_handler)

3463

if (dev->rx_handler)

3464

return -EBUSY;

3464

return -EBUSY;

3465

3466

/* Note: rx_handler_data must be set before rx_handler */

3466

/* Note: rx_handler_data must be set before rx_handler */

3467

rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);

3467

rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);

3468

rcu_assign_pointer(dev->rx_handler, rx_handler);

3468

rcu_assign_pointer(dev->rx_handler, rx_handler);

3469

3470

return 0;

3470

return 0;

3471

}

3471

}

3472

EXPORT_SYMBOL_GPL(netdev_rx_handler_register);

3472

EXPORT_SYMBOL_GPL(netdev_rx_handler_register);

3473

3474

/**

3474

/**

3475

* netdev_rx_handler_unregister - unregister receive handler

3475

* netdev_rx_handler_unregister - unregister receive handler

3476

* @dev: device to unregister a handler from

3476

* @dev: device to unregister a handler from

3477

*

3477

*

3478

* Unregister a receive handler from a device.

3478

* Unregister a receive handler from a device.

3479

*

3479

*

3480

* The caller must hold the rtnl_mutex.

3480

* The caller must hold the rtnl_mutex.

3481

*/

3481

*/

3482

void netdev_rx_handler_unregister(struct net_device *dev)

3482

void netdev_rx_handler_unregister(struct net_device *dev)

3483

{

3483

{

3484

3485

ASSERT_RTNL();

3485

ASSERT_RTNL();

3486

RCU_INIT_POINTER(dev->rx_handler, NULL);

3486

RCU_INIT_POINTER(dev->rx_handler, NULL);

3487

/* a reader seeing a non NULL rx_handler in a rcu_read_lock()

3487

/* a reader seeing a non NULL rx_handler in a rcu_read_lock()

3488

* section has a guarantee to see a non NULL rx_handler_data

3488

* section has a guarantee to see a non NULL rx_handler_data

3489

* as well.

3489

* as well.

3490

*/

3490

*/

3491

synchronize_net();

3491

synchronize_net();

3492

RCU_INIT_POINTER(dev->rx_handler_data, NULL);

3492

RCU_INIT_POINTER(dev->rx_handler_data, NULL);

3493

}

3493

}

3494

EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);

3494

EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);

3495

3496

/*

3496

/*

3497

* Limit the use of PFMEMALLOC reserves to those protocols that implement

3497

* Limit the use of PFMEMALLOC reserves to those protocols that implement

3498

* the special handling of PFMEMALLOC skbs.

3498

* the special handling of PFMEMALLOC skbs.

3499

*/

3499

*/

3500

static bool skb_pfmemalloc_protocol(struct sk_buff *skb)

3500

static bool skb_pfmemalloc_protocol(struct sk_buff *skb)

3501

{

3501

{

3502

switch (skb->protocol) {

3502

switch (skb->protocol) {

3503

case __constant_htons(ETH_P_ARP):

3503

case __constant_htons(ETH_P_ARP):

3504

case __constant_htons(ETH_P_IP):

3504

case __constant_htons(ETH_P_IP):

3505

case __constant_htons(ETH_P_IPV6):

3505

case __constant_htons(ETH_P_IPV6):

3506

case __constant_htons(ETH_P_8021Q):

3506

case __constant_htons(ETH_P_8021Q):

3507

case __constant_htons(ETH_P_8021AD):

3507

case __constant_htons(ETH_P_8021AD):

3508

return true;

3508

return true;

3509

default:

3509

default:

3510

return false;

3510

return false;

3511

}

3511

}

3512

}

3512

}

3513

3514

static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)

3514

static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)

3515

{

3515

{

3516

struct packet_type *ptype, *pt_prev;

3516

struct packet_type *ptype, *pt_prev;

3517

rx_handler_func_t *rx_handler;

3517

rx_handler_func_t *rx_handler;

3518

struct net_device *orig_dev;

3518

struct net_device *orig_dev;

3519

struct net_device *null_or_dev;

3519

struct net_device *null_or_dev;

3520

bool deliver_exact = false;

3520

bool deliver_exact = false;

3521

int ret = NET_RX_DROP;

3521

int ret = NET_RX_DROP;

3522

__be16 type;

3522

__be16 type;

3523

3524

net_timestamp_check(!netdev_tstamp_prequeue, skb);

3524

net_timestamp_check(!netdev_tstamp_prequeue, skb);

3525

3526

trace_netif_receive_skb(skb);

3526

trace_netif_receive_skb(skb);

3527

3528

/* if we've gotten here through NAPI, check netpoll */

3528

/* if we've gotten here through NAPI, check netpoll */

3529

if (netpoll_receive_skb(skb))

3529

if (netpoll_receive_skb(skb))

3530

goto out;

3530

goto out;

3531

3532

orig_dev = skb->dev;

3532

orig_dev = skb->dev;

3533

3534

skb_reset_network_header(skb);

3534

skb_reset_network_header(skb);

3535

if (!skb_transport_header_was_set(skb))

3535

if (!skb_transport_header_was_set(skb))

3536

skb_reset_transport_header(skb);

3536

skb_reset_transport_header(skb);

3537

skb_reset_mac_len(skb);

3537

skb_reset_mac_len(skb);

3538

3539

pt_prev = NULL;

3539

pt_prev = NULL;

3540

3541

rcu_read_lock();

3541

rcu_read_lock();

3542

3543

another_round:

3543

another_round:

3544

skb->skb_iif = skb->dev->ifindex;

3544

skb->skb_iif = skb->dev->ifindex;

3545

3546

__this_cpu_inc(softnet_data.processed);

3546

__this_cpu_inc(softnet_data.processed);

3547

3548

if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||

3548

if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||

3549

skb->protocol == cpu_to_be16(ETH_P_8021AD)) {

3549

skb->protocol == cpu_to_be16(ETH_P_8021AD)) {

3550

skb = vlan_untag(skb);

3550

skb = vlan_untag(skb);

3551

if (unlikely(!skb))

3551

if (unlikely(!skb))

3552

goto unlock;

3552

goto unlock;

3553

}

3553

}

3554

3555

#ifdef CONFIG_NET_CLS_ACT

3555

#ifdef CONFIG_NET_CLS_ACT

3556

if (skb->tc_verd & TC_NCLS) {

3556

if (skb->tc_verd & TC_NCLS) {

3557

skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);

3557

skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);

3558

goto ncls;

3558

goto ncls;

3559

}

3559

}

3560

#endif

3560

#endif

3561

3562

if (pfmemalloc)

3562

if (pfmemalloc)

3563

goto skip_taps;

3563

goto skip_taps;

3564

3565

list_for_each_entry_rcu(ptype, &ptype_all, list) {

3565

list_for_each_entry_rcu(ptype, &ptype_all, list) {

3566

if (!ptype->dev || ptype->dev == skb->dev) {

3566

if (!ptype->dev || ptype->dev == skb->dev) {

3567

if (pt_prev)

3567

if (pt_prev)

3568

ret = deliver_skb(skb, pt_prev, orig_dev);

3568

ret = deliver_skb(skb, pt_prev, orig_dev);

3569

pt_prev = ptype;

3569

pt_prev = ptype;

3570

}

3570

}

3571

}

3571

}

3572

3573

skip_taps:

3573

skip_taps:

3574

#ifdef CONFIG_NET_CLS_ACT

3574

#ifdef CONFIG_NET_CLS_ACT

3575

skb = handle_ing(skb, &pt_prev, &ret, orig_dev);

3575

skb = handle_ing(skb, &pt_prev, &ret, orig_dev);

3576

if (!skb)

3576

if (!skb)

3577

goto unlock;

3577

goto unlock;

3578

ncls:

3578

ncls:

3579

#endif

3579

#endif

3580

3581

if (pfmemalloc && !skb_pfmemalloc_protocol(skb))

3581

if (pfmemalloc && !skb_pfmemalloc_protocol(skb))

3582

goto drop;

3582

goto drop;

3583

3584

if (vlan_tx_tag_present(skb)) {

3584

if (vlan_tx_tag_present(skb)) {

3585

if (pt_prev) {

3585

if (pt_prev) {

3586

ret = deliver_skb(skb, pt_prev, orig_dev);

3586

ret = deliver_skb(skb, pt_prev, orig_dev);

3587

pt_prev = NULL;

3587

pt_prev = NULL;

3588

}

3588

}

3589

if (vlan_do_receive(&skb))

3589

if (vlan_do_receive(&skb))

3590

goto another_round;

3590

goto another_round;

3591

else if (unlikely(!skb))

3591

else if (unlikely(!skb))

3592

goto unlock;

3592

goto unlock;

3593

}

3593

}

3594

3595

rx_handler = rcu_dereference(skb->dev->rx_handler);

3595

rx_handler = rcu_dereference(skb->dev->rx_handler);

3596

if (rx_handler) {

3596

if (rx_handler) {

3597

if (pt_prev) {

3597

if (pt_prev) {

3598

ret = deliver_skb(skb, pt_prev, orig_dev);

3598

ret = deliver_skb(skb, pt_prev, orig_dev);

3599

pt_prev = NULL;

3599

pt_prev = NULL;

3600

}

3600

}

3601

switch (rx_handler(&skb)) {

3601

switch (rx_handler(&skb)) {

3602

case RX_HANDLER_CONSUMED:

3602

case RX_HANDLER_CONSUMED:

3603

ret = NET_RX_SUCCESS;

3603

ret = NET_RX_SUCCESS;

3604

goto unlock;

3604

goto unlock;

3605

case RX_HANDLER_ANOTHER:

3605

case RX_HANDLER_ANOTHER:

3606

goto another_round;

3606

goto another_round;

3607

case RX_HANDLER_EXACT:

3607

case RX_HANDLER_EXACT:

3608

deliver_exact = true;

3608

deliver_exact = true;

3609

case RX_HANDLER_PASS:

3609

case RX_HANDLER_PASS:

3610

break;

3610

break;

3611

default:

3611

default:

3612

BUG();

3612

BUG();

3613

}

3613

}

3614

}

3614

}

3615

3616

if (unlikely(vlan_tx_tag_present(skb))) {

3616

if (unlikely(vlan_tx_tag_present(skb))) {

3617

if (vlan_tx_tag_get_id(skb))

3617

if (vlan_tx_tag_get_id(skb))

3618

skb->pkt_type = PACKET_OTHERHOST;

3618

skb->pkt_type = PACKET_OTHERHOST;

3619

/* Note: we might in the future use prio bits

3619

/* Note: we might in the future use prio bits

3620

* and set skb->priority like in vlan_do_receive()

3620

* and set skb->priority like in vlan_do_receive()

3621

* For the time being, just ignore Priority Code Point

3621

* For the time being, just ignore Priority Code Point

3622

*/

3622

*/

3623

skb->vlan_tci = 0;

3623

skb->vlan_tci = 0;

3624

}

3624

}

3625

3626

/* deliver only exact match when indicated */

3626

/* deliver only exact match when indicated */

3627

null_or_dev = deliver_exact ? skb->dev : NULL;

3627

null_or_dev = deliver_exact ? skb->dev : NULL;

3628

3629

type = skb->protocol;

3629

type = skb->protocol;

3630

list_for_each_entry_rcu(ptype,

3630

list_for_each_entry_rcu(ptype,

3631

&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {

3631

&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {

3632

if (ptype->type == type &&

3632

if (ptype->type == type &&

3633

(ptype->dev == null_or_dev || ptype->dev == skb->dev ||

3633

(ptype->dev == null_or_dev || ptype->dev == skb->dev ||

3634

ptype->dev == orig_dev)) {

3634

ptype->dev == orig_dev)) {

3635

if (pt_prev)

3635

if (pt_prev)

3636

ret = deliver_skb(skb, pt_prev, orig_dev);

3636

ret = deliver_skb(skb, pt_prev, orig_dev);

3637

pt_prev = ptype;

3637

pt_prev = ptype;

3638

}

3638

}

3639

}

3639

}

3640

3641

if (pt_prev) {

3641

if (pt_prev) {

3642

if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))

3642

if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))

3643

goto drop;

3643

goto drop;

3644

else

3644

else

3645

ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);

3645

ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);

3646

} else {

3646

} else {

3647

drop:

3647

drop:

3648

atomic_long_inc(&skb->dev->rx_dropped);

3648

atomic_long_inc(&skb->dev->rx_dropped);

3649

kfree_skb(skb);

3649

kfree_skb(skb);

3650

/* Jamal, now you will not able to escape explaining

3650

/* Jamal, now you will not able to escape explaining

3651

* me how you were going to use this. :-)

3651

* me how you were going to use this. :-)

3652

*/

3652

*/

3653

ret = NET_RX_DROP;

3653

ret = NET_RX_DROP;

3654

}

3654

}

3655

3656

unlock:

3656

unlock:

3657

rcu_read_unlock();

3657

rcu_read_unlock();

3658

out:

3658

out:

3659

return ret;

3659

return ret;

3660

}

3660

}

3661

3662

static int __netif_receive_skb(struct sk_buff *skb)

3662

static int __netif_receive_skb(struct sk_buff *skb)

3663

{

3663

{

3664

int ret;

3664

int ret;

3665

3666

if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {

3666

if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {

3667

unsigned long pflags = current->flags;

3667

unsigned long pflags = current->flags;

3668

3669

/*

3669

/*

3670

* PFMEMALLOC skbs are special, they should

3670

* PFMEMALLOC skbs are special, they should

3671

* - be delivered to SOCK_MEMALLOC sockets only

3671

* - be delivered to SOCK_MEMALLOC sockets only

3672

* - stay away from userspace

3672

* - stay away from userspace

3673

* - have bounded memory usage

3673

* - have bounded memory usage

3674

*

3674

*

3675

* Use PF_MEMALLOC as this saves us from propagating the allocation

3675

* Use PF_MEMALLOC as this saves us from propagating the allocation

3676

* context down to all allocation sites.

3676

* context down to all allocation sites.

3677

*/

3677

*/

3678

current->flags |= PF_MEMALLOC;

3678

current->flags |= PF_MEMALLOC;

3679

ret = __netif_receive_skb_core(skb, true);

3679

ret = __netif_receive_skb_core(skb, true);

3680

tsk_restore_flags(current, pflags, PF_MEMALLOC);

3680

tsk_restore_flags(current, pflags, PF_MEMALLOC);

3681

} else

3681

} else

3682

ret = __netif_receive_skb_core(skb, false);

3682

ret = __netif_receive_skb_core(skb, false);

3683

3684

return ret;

3684

return ret;

3685

}

3685

}

3686

3687

static int netif_receive_skb_internal(struct sk_buff *skb)

3687

static int netif_receive_skb_internal(struct sk_buff *skb)

3688

{

3688

{

3689

net_timestamp_check(netdev_tstamp_prequeue, skb);

3689

net_timestamp_check(netdev_tstamp_prequeue, skb);

3690

3691

if (skb_defer_rx_timestamp(skb))

3691

if (skb_defer_rx_timestamp(skb))

3692

return NET_RX_SUCCESS;

3692

return NET_RX_SUCCESS;

3693

3694

#ifdef CONFIG_RPS

3694

#ifdef CONFIG_RPS

3695

if (static_key_false(&rps_needed)) {

3695

if (static_key_false(&rps_needed)) {

3696

struct rps_dev_flow voidflow, *rflow = &voidflow;

3696

struct rps_dev_flow voidflow, *rflow = &voidflow;

3697

int cpu, ret;

3697

int cpu, ret;

3698

3699

rcu_read_lock();

3699

rcu_read_lock();

3700

3701

cpu = get_rps_cpu(skb->dev, skb, &rflow);

3701

cpu = get_rps_cpu(skb->dev, skb, &rflow);

3702

3703

if (cpu >= 0) {

3703

if (cpu >= 0) {

3704

ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);

3704

ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);

3705

rcu_read_unlock();

3705

rcu_read_unlock();

3706

return ret;

3706

return ret;

3707

}

3707

}

3708

rcu_read_unlock();

3708

rcu_read_unlock();

3709

}

3709

}

3710

#endif

3710

#endif

3711

return __netif_receive_skb(skb);

3711

return __netif_receive_skb(skb);

3712

}

3712

}

3713

3714

/**

3714

/**

3715

* netif_receive_skb - process receive buffer from network

3715

* netif_receive_skb - process receive buffer from network

3716

* @skb: buffer to process

3716

* @skb: buffer to process

3717

*

3717

*

3718

* netif_receive_skb() is the main receive data processing function.

3718

* netif_receive_skb() is the main receive data processing function.

3719

* It always succeeds. The buffer may be dropped during processing

3719

* It always succeeds. The buffer may be dropped during processing

3720

* for congestion control or by the protocol layers.

3720

* for congestion control or by the protocol layers.

3721

*

3721

*

3722

* This function may only be called from softirq context and interrupts

3722

* This function may only be called from softirq context and interrupts

3723

* should be enabled.

3723

* should be enabled.

3724

*

3724

*

3725

* Return values (usually ignored):

3725

* Return values (usually ignored):

3726

* NET_RX_SUCCESS: no congestion

3726

* NET_RX_SUCCESS: no congestion

3727

* NET_RX_DROP: packet was dropped

3727

* NET_RX_DROP: packet was dropped

3728

*/

3728

*/

3729

int netif_receive_skb(struct sk_buff *skb)

3729

int netif_receive_skb(struct sk_buff *skb)

3730

{

3730

{

3731

trace_netif_receive_skb_entry(skb);

3731

trace_netif_receive_skb_entry(skb);

3732

3733

return netif_receive_skb_internal(skb);

3733

return netif_receive_skb_internal(skb);

3734

}

3734

}

3735

EXPORT_SYMBOL(netif_receive_skb);

3735

EXPORT_SYMBOL(netif_receive_skb);

3736

3737

/* Network device is going away, flush any packets still pending

3737

/* Network device is going away, flush any packets still pending

3738

* Called with irqs disabled.

3738

* Called with irqs disabled.

3739

*/

3739

*/

3740

static void flush_backlog(void *arg)

3740

static void flush_backlog(void *arg)

3741

{

3741

{

3742

struct net_device *dev = arg;

3742

struct net_device *dev = arg;

3743

struct softnet_data *sd = &__get_cpu_var(softnet_data);

3743

struct softnet_data *sd = &__get_cpu_var(softnet_data);

3744

struct sk_buff *skb, *tmp;

3744

struct sk_buff *skb, *tmp;

3745

3746

rps_lock(sd);

3746

rps_lock(sd);

3747

skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {

3747

skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {

3748

if (skb->dev == dev) {

3748

if (skb->dev == dev) {

3749

__skb_unlink(skb, &sd->input_pkt_queue);

3749

__skb_unlink(skb, &sd->input_pkt_queue);

3750

kfree_skb(skb);

3750

kfree_skb(skb);

3751

input_queue_head_incr(sd);

3751

input_queue_head_incr(sd);

3752

}

3752

}

3753

}

3753

}

3754

rps_unlock(sd);

3754

rps_unlock(sd);

3755

3756

skb_queue_walk_safe(&sd->process_queue, skb, tmp) {

3756

skb_queue_walk_safe(&sd->process_queue, skb, tmp) {

3757

if (skb->dev == dev) {

3757

if (skb->dev == dev) {

3758

__skb_unlink(skb, &sd->process_queue);

3758

__skb_unlink(skb, &sd->process_queue);

3759

kfree_skb(skb);

3759

kfree_skb(skb);

3760

input_queue_head_incr(sd);

3760

input_queue_head_incr(sd);

3761

}

3761

}

3762

}

3762

}

3763

}

3763

}

3764

3765

static int napi_gro_complete(struct sk_buff *skb)

3765

static int napi_gro_complete(struct sk_buff *skb)

3766

{

3766

{

3767

struct packet_offload *ptype;

3767

struct packet_offload *ptype;

3768

__be16 type = skb->protocol;

3768

__be16 type = skb->protocol;

3769

struct list_head *head = &offload_base;

3769

struct list_head *head = &offload_base;

3770

int err = -ENOENT;

3770

int err = -ENOENT;

3771

3772

BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));

3772

BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));

3773

3774

if (NAPI_GRO_CB(skb)->count == 1) {

3774

if (NAPI_GRO_CB(skb)->count == 1) {

3775

skb_shinfo(skb)->gso_size = 0;

3775

skb_shinfo(skb)->gso_size = 0;

3776

goto out;

3776

goto out;

3777

}

3777

}

3778

3779

rcu_read_lock();

3779

rcu_read_lock();

3780

list_for_each_entry_rcu(ptype, head, list) {

3780

list_for_each_entry_rcu(ptype, head, list) {

3781

if (ptype->type != type || !ptype->callbacks.gro_complete)

3781

if (ptype->type != type || !ptype->callbacks.gro_complete)

3782

continue;

3782

continue;

3783

3784

err = ptype->callbacks.gro_complete(skb, 0);

3784

err = ptype->callbacks.gro_complete(skb, 0);

3785

break;

3785

break;

3786

}

3786

}

3787

rcu_read_unlock();

3787

rcu_read_unlock();

3788

3789

if (err) {

3789

if (err) {

3790

WARN_ON(&ptype->list == head);

3790

WARN_ON(&ptype->list == head);

3791

kfree_skb(skb);

3791

kfree_skb(skb);

3792

return NET_RX_SUCCESS;

3792

return NET_RX_SUCCESS;

3793

}

3793

}

3794

3795

out:

3795

out:

3796

return netif_receive_skb_internal(skb);

3796

return netif_receive_skb_internal(skb);

3797

}

3797

}

3798

3799

/* napi->gro_list contains packets ordered by age.

3799

/* napi->gro_list contains packets ordered by age.

3800

* youngest packets at the head of it.

3800

* youngest packets at the head of it.

3801

* Complete skbs in reverse order to reduce latencies.

3801

* Complete skbs in reverse order to reduce latencies.

3802

*/

3802

*/

3803

void napi_gro_flush(struct napi_struct *napi, bool flush_old)

3803

void napi_gro_flush(struct napi_struct *napi, bool flush_old)

3804

{

3804

{

3805

struct sk_buff *skb, *prev = NULL;

3805

struct sk_buff *skb, *prev = NULL;

3806

3807

/* scan list and build reverse chain */

3807

/* scan list and build reverse chain */

3808

for (skb = napi->gro_list; skb != NULL; skb = skb->next) {

3808

for (skb = napi->gro_list; skb != NULL; skb = skb->next) {

3809

skb->prev = prev;

3809

skb->prev = prev;

3810

prev = skb;

3810

prev = skb;

3811

}

3811

}

3812

3813

for (skb = prev; skb; skb = prev) {

3813

for (skb = prev; skb; skb = prev) {

3814

skb->next = NULL;

3814

skb->next = NULL;

3815

3816

if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)

3816

if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)

3817

return;

3817

return;

3818

3819

prev = skb->prev;

3819

prev = skb->prev;

3820

napi_gro_complete(skb);

3820

napi_gro_complete(skb);

3821

napi->gro_count--;

3821

napi->gro_count--;

3822

}

3822

}

3823

3824

napi->gro_list = NULL;

3824

napi->gro_list = NULL;

3825

}

3825

}

3826

EXPORT_SYMBOL(napi_gro_flush);

3826

EXPORT_SYMBOL(napi_gro_flush);

3827

3828

static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)

3828

static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)

3829

{

3829

{

3830

struct sk_buff *p;

3830

struct sk_buff *p;

3831

unsigned int maclen = skb->dev->hard_header_len;

3831

unsigned int maclen = skb->dev->hard_header_len;

3832

u32 hash = skb_get_hash_raw(skb);

3832

u32 hash = skb_get_hash_raw(skb);

3833

3834

for (p = napi->gro_list; p; p = p->next) {

3834

for (p = napi->gro_list; p; p = p->next) {

3835

unsigned long diffs;

3835

unsigned long diffs;

3836

3837

NAPI_GRO_CB(p)->flush = 0;

3837

NAPI_GRO_CB(p)->flush = 0;

3838

3839

if (hash != skb_get_hash_raw(p)) {

3839

if (hash != skb_get_hash_raw(p)) {

3840

NAPI_GRO_CB(p)->same_flow = 0;

3840

NAPI_GRO_CB(p)->same_flow = 0;

3841

continue;

3841

continue;

3842

}

3842

}

3843

3844

diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;

3844

diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;

3845

diffs |= p->vlan_tci ^ skb->vlan_tci;

3845

diffs |= p->vlan_tci ^ skb->vlan_tci;

3846

if (maclen == ETH_HLEN)

3846

if (maclen == ETH_HLEN)

3847

diffs |= compare_ether_header(skb_mac_header(p),

3847

diffs |= compare_ether_header(skb_mac_header(p),

3848

skb_gro_mac_header(skb));

3848

skb_gro_mac_header(skb));

3849

else if (!diffs)

3849

else if (!diffs)

3850

diffs = memcmp(skb_mac_header(p),

3850

diffs = memcmp(skb_mac_header(p),

3851

skb_gro_mac_header(skb),

3851

skb_gro_mac_header(skb),

3852

maclen);

3852

maclen);

3853

NAPI_GRO_CB(p)->same_flow = !diffs;

3853

NAPI_GRO_CB(p)->same_flow = !diffs;

3854

}

3854

}

3855

}

3855

}

3856

3857

static void skb_gro_reset_offset(struct sk_buff *skb)

3857

static void skb_gro_reset_offset(struct sk_buff *skb)

3858

{

3858

{

3859

const struct skb_shared_info *pinfo = skb_shinfo(skb);

3859

const struct skb_shared_info *pinfo = skb_shinfo(skb);

3860

const skb_frag_t *frag0 = &pinfo->frags[0];

3860

const skb_frag_t *frag0 = &pinfo->frags[0];

3861

3862

NAPI_GRO_CB(skb)->data_offset = 0;

3862

NAPI_GRO_CB(skb)->data_offset = 0;

3863

NAPI_GRO_CB(skb)->frag0 = NULL;

3863

NAPI_GRO_CB(skb)->frag0 = NULL;

3864

NAPI_GRO_CB(skb)->frag0_len = 0;

3864

NAPI_GRO_CB(skb)->frag0_len = 0;

3865

3866

if (skb_mac_header(skb) == skb_tail_pointer(skb) &&

3866

if (skb_mac_header(skb) == skb_tail_pointer(skb) &&

3867

pinfo->nr_frags &&

3867

pinfo->nr_frags &&

3868

!PageHighMem(skb_frag_page(frag0))) {

3868

!PageHighMem(skb_frag_page(frag0))) {

3869

NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);

3869

NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);

3870

NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);

3870

NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);

3871

}

3871

}

3872

}

3872

}

3873

3874

static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)

3874

static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)

3875

{

3875

{

3876

struct sk_buff **pp = NULL;

3876

struct sk_buff **pp = NULL;

3877

struct packet_offload *ptype;

3877

struct packet_offload *ptype;

3878

__be16 type = skb->protocol;

3878

__be16 type = skb->protocol;

3879

struct list_head *head = &offload_base;

3879

struct list_head *head = &offload_base;

3880

int same_flow;

3880

int same_flow;

3881

enum gro_result ret;

3881

enum gro_result ret;

3882

3883

if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))

3883

if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))

3884

goto normal;

3884

goto normal;

3885

3886

if (skb_is_gso(skb) || skb_has_frag_list(skb))

3886

if (skb_is_gso(skb) || skb_has_frag_list(skb))

3887

goto normal;

3887

goto normal;

3888

3889

skb_gro_reset_offset(skb);

3889

skb_gro_reset_offset(skb);

3890

gro_list_prepare(napi, skb);

3890

gro_list_prepare(napi, skb);

3891

NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */

3891

NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */

3892

3893

rcu_read_lock();

3893

rcu_read_lock();

3894

list_for_each_entry_rcu(ptype, head, list) {

3894

list_for_each_entry_rcu(ptype, head, list) {

3895

if (ptype->type != type || !ptype->callbacks.gro_receive)

3895

if (ptype->type != type || !ptype->callbacks.gro_receive)

3896

continue;

3896

continue;

3897

3898

skb_set_network_header(skb, skb_gro_offset(skb));

3898

skb_set_network_header(skb, skb_gro_offset(skb));

3899

skb_reset_mac_len(skb);

3899

skb_reset_mac_len(skb);

3900

NAPI_GRO_CB(skb)->same_flow = 0;

3900

NAPI_GRO_CB(skb)->same_flow = 0;

3901

NAPI_GRO_CB(skb)->flush = 0;

3901

NAPI_GRO_CB(skb)->flush = 0;

3902

NAPI_GRO_CB(skb)->free = 0;

3902

NAPI_GRO_CB(skb)->free = 0;

3903

NAPI_GRO_CB(skb)->udp_mark = 0;

3903

NAPI_GRO_CB(skb)->udp_mark = 0;

3904

3905

pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);

3905

pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);

3906

break;

3906

break;

3907

}

3907

}

3908

rcu_read_unlock();

3908

rcu_read_unlock();

3909

3910

if (&ptype->list == head)

3910

if (&ptype->list == head)

3911

goto normal;

3911

goto normal;

3912

3913

same_flow = NAPI_GRO_CB(skb)->same_flow;

3913

same_flow = NAPI_GRO_CB(skb)->same_flow;

3914

ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;

3914

ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;

3915

3916

if (pp) {

3916

if (pp) {

3917

struct sk_buff *nskb = *pp;

3917

struct sk_buff *nskb = *pp;

3918

3919

*pp = nskb->next;

3919

*pp = nskb->next;

3920

nskb->next = NULL;

3920

nskb->next = NULL;

3921

napi_gro_complete(nskb);

3921

napi_gro_complete(nskb);

3922

napi->gro_count--;

3922

napi->gro_count--;

3923

}

3923

}

3924

3925

if (same_flow)

3925

if (same_flow)

3926

goto ok;

3926

goto ok;

3927

3928

if (NAPI_GRO_CB(skb)->flush)

3928

if (NAPI_GRO_CB(skb)->flush)

3929

goto normal;

3929

goto normal;

3930

3931

if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {

3931

if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {

3932

struct sk_buff *nskb = napi->gro_list;

3932

struct sk_buff *nskb = napi->gro_list;

3933

3934

/* locate the end of the list to select the 'oldest' flow */

3934

/* locate the end of the list to select the 'oldest' flow */

3935

while (nskb->next) {

3935

while (nskb->next) {

3936

pp = &nskb->next;

3936

pp = &nskb->next;

3937

nskb = *pp;

3937

nskb = *pp;

3938

}

3938

}

3939

*pp = NULL;

3939

*pp = NULL;

3940

nskb->next = NULL;

3940

nskb->next = NULL;

3941

napi_gro_complete(nskb);

3941

napi_gro_complete(nskb);

3942

} else {

3942

} else {

3943

napi->gro_count++;

3943

napi->gro_count++;

3944

}

3944

}

3945

NAPI_GRO_CB(skb)->count = 1;

3945

NAPI_GRO_CB(skb)->count = 1;

3946

NAPI_GRO_CB(skb)->age = jiffies;

3946

NAPI_GRO_CB(skb)->age = jiffies;

3947

NAPI_GRO_CB(skb)->last = skb;

3947

skb_shinfo(skb)->gso_size = skb_gro_len(skb);

3948

skb_shinfo(skb)->gso_size = skb_gro_len(skb);

3948

skb->next = napi->gro_list;

3949

skb->next = napi->gro_list;

3949

napi->gro_list = skb;

3950

napi->gro_list = skb;

3950

ret = GRO_HELD;

3951

ret = GRO_HELD;

3951

3952

pull:

3953

pull:

3953

if (skb_headlen(skb) < skb_gro_offset(skb)) {

3954

if (skb_headlen(skb) < skb_gro_offset(skb)) {

3954

int grow = skb_gro_offset(skb) - skb_headlen(skb);

3955

int grow = skb_gro_offset(skb) - skb_headlen(skb);

3955

3956

BUG_ON(skb->end - skb->tail < grow);

3957

BUG_ON(skb->end - skb->tail < grow);

3957

3958

memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);

3959

memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);

3959

3960

skb->tail += grow;

3961

skb->tail += grow;

3961

skb->data_len -= grow;

3962

skb->data_len -= grow;

3962

3963

skb_shinfo(skb)->frags[0].page_offset += grow;

3964

skb_shinfo(skb)->frags[0].page_offset += grow;

3964

skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);

3965

skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);

3965

3966

if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {

3967

if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {

3967

skb_frag_unref(skb, 0);

3968

skb_frag_unref(skb, 0);

3968

memmove(skb_shinfo(skb)->frags,

3969

memmove(skb_shinfo(skb)->frags,

3969

skb_shinfo(skb)->frags + 1,

3970

skb_shinfo(skb)->frags + 1,

3970

--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));

3971

--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));

3971

}

3972

}

3972

}

3973

}

3973

3974

ok:

3975

ok:

3975

return ret;

3976

return ret;

3976

3977

normal:

3978

normal:

3978

ret = GRO_NORMAL;

3979

ret = GRO_NORMAL;

3979

goto pull;

3980

goto pull;

3980

}

3981

}

3981

3982

struct packet_offload *gro_find_receive_by_type(__be16 type)

3983

struct packet_offload *gro_find_receive_by_type(__be16 type)

3983

{

3984

{

3984

struct list_head *offload_head = &offload_base;

3985

struct list_head *offload_head = &offload_base;

3985

struct packet_offload *ptype;

3986

struct packet_offload *ptype;

3986

3987

list_for_each_entry_rcu(ptype, offload_head, list) {

3988

list_for_each_entry_rcu(ptype, offload_head, list) {

3988

if (ptype->type != type || !ptype->callbacks.gro_receive)

3989

if (ptype->type != type || !ptype->callbacks.gro_receive)

3989

continue;

3990

continue;

3990

return ptype;

3991

return ptype;

3991

}

3992

}

3992

return NULL;

3993

return NULL;

3993

}

3994

}

3994

EXPORT_SYMBOL(gro_find_receive_by_type);

3995

EXPORT_SYMBOL(gro_find_receive_by_type);

3995

3996

struct packet_offload *gro_find_complete_by_type(__be16 type)

3997

struct packet_offload *gro_find_complete_by_type(__be16 type)

3997

{

3998

{

3998

struct list_head *offload_head = &offload_base;

3999

struct list_head *offload_head = &offload_base;

3999

struct packet_offload *ptype;

4000

struct packet_offload *ptype;

4000

4001

list_for_each_entry_rcu(ptype, offload_head, list) {

4002

list_for_each_entry_rcu(ptype, offload_head, list) {

4002

if (ptype->type != type || !ptype->callbacks.gro_complete)

4003

if (ptype->type != type || !ptype->callbacks.gro_complete)

4003

continue;

4004

continue;

4004

return ptype;

4005

return ptype;

4005

}

4006

}

4006

return NULL;

4007

return NULL;

4007

}

4008

}

4008

EXPORT_SYMBOL(gro_find_complete_by_type);

4009

EXPORT_SYMBOL(gro_find_complete_by_type);

4009

4010

static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)

4011

static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)

4011

{

4012

{

4012

switch (ret) {

4013

switch (ret) {

4013

case GRO_NORMAL:

4014

case GRO_NORMAL:

4014

if (netif_receive_skb_internal(skb))

4015

if (netif_receive_skb_internal(skb))

4015

ret = GRO_DROP;

4016

ret = GRO_DROP;

4016

break;

4017

break;

4017

4018

case GRO_DROP:

4019

case GRO_DROP:

4019

kfree_skb(skb);

4020

kfree_skb(skb);

4020

break;

4021

break;

4021

4022

case GRO_MERGED_FREE:

4023

case GRO_MERGED_FREE:

4023

if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)

4024

if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)

4024

kmem_cache_free(skbuff_head_cache, skb);

4025

kmem_cache_free(skbuff_head_cache, skb);

4025

else

4026

else

4026

__kfree_skb(skb);

4027

__kfree_skb(skb);

4027

break;

4028

break;

4028

4029

case GRO_HELD:

4030

case GRO_HELD:

4030

case GRO_MERGED:

4031

case GRO_MERGED:

4031

break;

4032

break;

4032

}

4033

}

4033

4034

return ret;

4035

return ret;

4035

}

4036

}

4036

4037

gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)

4038

gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)

4038

{

4039

{

4039

trace_napi_gro_receive_entry(skb);

4040

trace_napi_gro_receive_entry(skb);

4040

4041

return napi_skb_finish(dev_gro_receive(napi, skb), skb);

4042

return napi_skb_finish(dev_gro_receive(napi, skb), skb);

4042

}

4043

}

4043

EXPORT_SYMBOL(napi_gro_receive);

4044

EXPORT_SYMBOL(napi_gro_receive);

4044

4045

static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)

4046

static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)

4046

{

4047

{

4047

__skb_pull(skb, skb_headlen(skb));

4048

__skb_pull(skb, skb_headlen(skb));

4048

/* restore the reserve we had after netdev_alloc_skb_ip_align() */

4049

/* restore the reserve we had after netdev_alloc_skb_ip_align() */

4049

skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));

4050

skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));

4050

skb->vlan_tci = 0;

4051

skb->vlan_tci = 0;

4051

skb->dev = napi->dev;

4052

skb->dev = napi->dev;

4052

skb->skb_iif = 0;

4053

skb->skb_iif = 0;

4053

4054

napi->skb = skb;

4055

napi->skb = skb;

4055

}

4056

}

4056

4057

struct sk_buff *napi_get_frags(struct napi_struct *napi)

4058

struct sk_buff *napi_get_frags(struct napi_struct *napi)

4058

{

4059

{

4059

struct sk_buff *skb = napi->skb;

4060

struct sk_buff *skb = napi->skb;

4060

4061

if (!skb) {

4062

if (!skb) {

4062

skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);

4063

skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);

4063

napi->skb = skb;

4064

napi->skb = skb;

4064

}

4065

}

4065

return skb;

4066

return skb;

4066

}

4067

}

4067

EXPORT_SYMBOL(napi_get_frags);

4068

EXPORT_SYMBOL(napi_get_frags);

4068

4069

static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,

4070

static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,

4070

gro_result_t ret)

4071

gro_result_t ret)

4071

{

4072

{

4072

switch (ret) {

4073

switch (ret) {

4073

case GRO_NORMAL:

4074

case GRO_NORMAL:

4074

if (netif_receive_skb_internal(skb))

4075

if (netif_receive_skb_internal(skb))

4075

ret = GRO_DROP;

4076

ret = GRO_DROP;

4076

break;

4077

break;

4077

4078

case GRO_DROP:

4079

case GRO_DROP:

4079

case GRO_MERGED_FREE:

4080

case GRO_MERGED_FREE:

4080

napi_reuse_skb(napi, skb);

4081

napi_reuse_skb(napi, skb);

4081

break;

4082

break;

4082

4083

case GRO_HELD:

4084

case GRO_HELD:

4084

case GRO_MERGED:

4085

case GRO_MERGED:

4085

break;

4086

break;

4086

}

4087

}

4087

4088

return ret;

4089

return ret;

4089

}

4090

}

4090

4091

static struct sk_buff *napi_frags_skb(struct napi_struct *napi)

4092

static struct sk_buff *napi_frags_skb(struct napi_struct *napi)

4092

{

4093

{

4093

struct sk_buff *skb = napi->skb;

4094

struct sk_buff *skb = napi->skb;

4094

4095

napi->skb = NULL;

4096

napi->skb = NULL;

4096

4097

if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) {

4098

if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) {

4098

napi_reuse_skb(napi, skb);

4099

napi_reuse_skb(napi, skb);

4099

return NULL;

4100

return NULL;

4100

}

4101

}

4101

skb->protocol = eth_type_trans(skb, skb->dev);

4102

skb->protocol = eth_type_trans(skb, skb->dev);

4102

4103

return skb;

4104

return skb;

4104

}

4105

}

4105

4106

gro_result_t napi_gro_frags(struct napi_struct *napi)

4107

gro_result_t napi_gro_frags(struct napi_struct *napi)

4107

{

4108

{

4108

struct sk_buff *skb = napi_frags_skb(napi);

4109

struct sk_buff *skb = napi_frags_skb(napi);

4109

4110

if (!skb)

4111

if (!skb)

4111

return GRO_DROP;

4112

return GRO_DROP;

4112

4113

trace_napi_gro_frags_entry(skb);

4114

trace_napi_gro_frags_entry(skb);

4114

4115

return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));

4116

return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));

4116

}

4117

}

4117

EXPORT_SYMBOL(napi_gro_frags);

4118

EXPORT_SYMBOL(napi_gro_frags);

4118

4119

/*

4120

/*

4120

* net_rps_action_and_irq_enable sends any pending IPI's for rps.

4121

* net_rps_action_and_irq_enable sends any pending IPI's for rps.

4121

* Note: called with local irq disabled, but exits with local irq enabled.

4122

* Note: called with local irq disabled, but exits with local irq enabled.

4122

*/

4123

*/

4123

static void net_rps_action_and_irq_enable(struct softnet_data *sd)

4124

static void net_rps_action_and_irq_enable(struct softnet_data *sd)

4124

{

4125

{

4125

#ifdef CONFIG_RPS

4126

#ifdef CONFIG_RPS

4126

struct softnet_data *remsd = sd->rps_ipi_list;

4127

struct softnet_data *remsd = sd->rps_ipi_list;

4127

4128

if (remsd) {

4129

if (remsd) {

4129

sd->rps_ipi_list = NULL;

4130

sd->rps_ipi_list = NULL;

4130

4131

local_irq_enable();

4132

local_irq_enable();

4132

4133

/* Send pending IPI's to kick RPS processing on remote cpus. */

4134

/* Send pending IPI's to kick RPS processing on remote cpus. */

4134

while (remsd) {

4135

while (remsd) {

4135

struct softnet_data *next = remsd->rps_ipi_next;

4136

struct softnet_data *next = remsd->rps_ipi_next;

4136

4137

if (cpu_online(remsd->cpu))

4138

if (cpu_online(remsd->cpu))

4138

__smp_call_function_single(remsd->cpu,

4139

__smp_call_function_single(remsd->cpu,

4139

&remsd->csd, 0);

4140

&remsd->csd, 0);

4140

remsd = next;

4141

remsd = next;

4141

}

4142

}

4142

} else

4143

} else

4143

#endif

4144

#endif

4144

local_irq_enable();

4145

local_irq_enable();

4145

}

4146

}

4146

4147

static int process_backlog(struct napi_struct *napi, int quota)

4148

static int process_backlog(struct napi_struct *napi, int quota)

4148

{

4149

{

4149

int work = 0;

4150

int work = 0;

4150

struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);

4151

struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);

4151

4152

#ifdef CONFIG_RPS

4153

#ifdef CONFIG_RPS

4153

/* Check if we have pending ipi, its better to send them now,

4154

/* Check if we have pending ipi, its better to send them now,

4154

* not waiting net_rx_action() end.

4155

* not waiting net_rx_action() end.

4155

*/

4156

*/

4156

if (sd->rps_ipi_list) {

4157

if (sd->rps_ipi_list) {

4157

local_irq_disable();

4158

local_irq_disable();

4158

net_rps_action_and_irq_enable(sd);

4159

net_rps_action_and_irq_enable(sd);

4159

}

4160

}

4160

#endif

4161

#endif

4161

napi->weight = weight_p;

4162

napi->weight = weight_p;

4162

local_irq_disable();

4163

local_irq_disable();

4163

while (work < quota) {

4164

while (work < quota) {

4164

struct sk_buff *skb;

4165

struct sk_buff *skb;

4165

unsigned int qlen;

4166

unsigned int qlen;

4166

4167

while ((skb = __skb_dequeue(&sd->process_queue))) {

4168

while ((skb = __skb_dequeue(&sd->process_queue))) {

4168

local_irq_enable();

4169

local_irq_enable();

4169

__netif_receive_skb(skb);

4170

__netif_receive_skb(skb);

4170

local_irq_disable();

4171

local_irq_disable();

4171

input_queue_head_incr(sd);

4172

input_queue_head_incr(sd);

4172

if (++work >= quota) {

4173

if (++work >= quota) {

4173

local_irq_enable();

4174

local_irq_enable();

4174

return work;

4175

return work;

4175

}

4176

}

4176

}

4177

}

4177

4178

rps_lock(sd);

4179

rps_lock(sd);

4179

qlen = skb_queue_len(&sd->input_pkt_queue);

4180

qlen = skb_queue_len(&sd->input_pkt_queue);

4180

if (qlen)

4181

if (qlen)

4181

skb_queue_splice_tail_init(&sd->input_pkt_queue,

4182

skb_queue_splice_tail_init(&sd->input_pkt_queue,

4182

&sd->process_queue);

4183

&sd->process_queue);

4183

4184

if (qlen < quota - work) {

4185

if (qlen < quota - work) {

4185

/*

4186

/*

4186

* Inline a custom version of __napi_complete().

4187

* Inline a custom version of __napi_complete().

4187

* only current cpu owns and manipulates this napi,

4188

* only current cpu owns and manipulates this napi,

4188

* and NAPI_STATE_SCHED is the only possible flag set on backlog.

4189

* and NAPI_STATE_SCHED is the only possible flag set on backlog.

4189

* we can use a plain write instead of clear_bit(),

4190

* we can use a plain write instead of clear_bit(),

4190

* and we dont need an smp_mb() memory barrier.

4191

* and we dont need an smp_mb() memory barrier.

4191

*/

4192

*/

4192

list_del(&napi->poll_list);

4193

list_del(&napi->poll_list);

4193

napi->state = 0;

4194

napi->state = 0;

4194

4195

quota = work + qlen;

4196

quota = work + qlen;

4196

}

4197

}

4197

rps_unlock(sd);

4198

rps_unlock(sd);

4198

}

4199

}

4199

local_irq_enable();

4200

local_irq_enable();

4200

4201

return work;

4202

return work;

4202

}

4203

}

4203

4204

/**

4205

/**

4205

* __napi_schedule - schedule for receive

4206

* __napi_schedule - schedule for receive

4206

* @n: entry to schedule

4207

* @n: entry to schedule

4207

*

4208

*

4208

* The entry's receive function will be scheduled to run

4209

* The entry's receive function will be scheduled to run

4209

*/

4210

*/

4210

void __napi_schedule(struct napi_struct *n)

4211

void __napi_schedule(struct napi_struct *n)

4211

{

4212

{

4212

unsigned long flags;

4213

unsigned long flags;

4213

4214

local_irq_save(flags);

4215

local_irq_save(flags);

4215

____napi_schedule(&__get_cpu_var(softnet_data), n);

4216

____napi_schedule(&__get_cpu_var(softnet_data), n);

4216

local_irq_restore(flags);

4217

local_irq_restore(flags);

4217

}

4218

}

4218

EXPORT_SYMBOL(__napi_schedule);

4219

EXPORT_SYMBOL(__napi_schedule);

4219

4220

void __napi_complete(struct napi_struct *n)

4221

void __napi_complete(struct napi_struct *n)

4221

{

4222

{

4222

BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));

4223

BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));

4223

BUG_ON(n->gro_list);

4224

BUG_ON(n->gro_list);

4224

4225

list_del(&n->poll_list);

4226

list_del(&n->poll_list);

4226

smp_mb__before_clear_bit();

4227

smp_mb__before_clear_bit();

4227

clear_bit(NAPI_STATE_SCHED, &n->state);

4228

clear_bit(NAPI_STATE_SCHED, &n->state);

4228

}

4229

}

4229

EXPORT_SYMBOL(__napi_complete);

4230

EXPORT_SYMBOL(__napi_complete);

4230

4231

void napi_complete(struct napi_struct *n)

4232

void napi_complete(struct napi_struct *n)

4232

{

4233

{

4233

unsigned long flags;

4234

unsigned long flags;

4234

4235

/*

4236

/*

4236

* don't let napi dequeue from the cpu poll list

4237

* don't let napi dequeue from the cpu poll list

4237

* just in case its running on a different cpu

4238

* just in case its running on a different cpu

4238

*/

4239

*/

4239

if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))

4240

if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))

4240

return;

4241

return;

4241

4242

napi_gro_flush(n, false);

4243

napi_gro_flush(n, false);

4243

local_irq_save(flags);

4244

local_irq_save(flags);

4244

__napi_complete(n);

4245

__napi_complete(n);

4245

local_irq_restore(flags);

4246

local_irq_restore(flags);

4246

}

4247

}

4247

EXPORT_SYMBOL(napi_complete);

4248

EXPORT_SYMBOL(napi_complete);

4248

4249

/* must be called under rcu_read_lock(), as we dont take a reference */

4250

/* must be called under rcu_read_lock(), as we dont take a reference */

4250

struct napi_struct *napi_by_id(unsigned int napi_id)

4251

struct napi_struct *napi_by_id(unsigned int napi_id)

4251

{

4252

{

4252

unsigned int hash = napi_id % HASH_SIZE(napi_hash);

4253

unsigned int hash = napi_id % HASH_SIZE(napi_hash);

4253

struct napi_struct *napi;

4254

struct napi_struct *napi;

4254

4255

hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)

4256

hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)

4256

if (napi->napi_id == napi_id)

4257

if (napi->napi_id == napi_id)

4257

return napi;

4258

return napi;

4258

4259

return NULL;

4260

return NULL;

4260

}

4261

}

4261

EXPORT_SYMBOL_GPL(napi_by_id);

4262

EXPORT_SYMBOL_GPL(napi_by_id);

4262

4263

void napi_hash_add(struct napi_struct *napi)

4264

void napi_hash_add(struct napi_struct *napi)

4264

{

4265

{

4265

if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {

4266

if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {

4266

4267

spin_lock(&napi_hash_lock);

4268

spin_lock(&napi_hash_lock);

4268

4269

/* 0 is not a valid id, we also skip an id that is taken

4270

/* 0 is not a valid id, we also skip an id that is taken

4270

* we expect both events to be extremely rare

4271

* we expect both events to be extremely rare

4271

*/

4272

*/

4272

napi->napi_id = 0;

4273

napi->napi_id = 0;

4273

while (!napi->napi_id) {

4274

while (!napi->napi_id) {

4274

napi->napi_id = ++napi_gen_id;

4275

napi->napi_id = ++napi_gen_id;

4275

if (napi_by_id(napi->napi_id))

4276

if (napi_by_id(napi->napi_id))

4276

napi->napi_id = 0;

4277

napi->napi_id = 0;

4277

}

4278

}

4278

4279

hlist_add_head_rcu(&napi->napi_hash_node,

4280

hlist_add_head_rcu(&napi->napi_hash_node,

4280

&napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);

4281

&napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);

4281

4282

spin_unlock(&napi_hash_lock);

4283

spin_unlock(&napi_hash_lock);

4283

}

4284

}

4284

}

4285

}

4285

EXPORT_SYMBOL_GPL(napi_hash_add);

4286

EXPORT_SYMBOL_GPL(napi_hash_add);

4286

4287

/* Warning : caller is responsible to make sure rcu grace period

4288

/* Warning : caller is responsible to make sure rcu grace period

4288

* is respected before freeing memory containing @napi

4289

* is respected before freeing memory containing @napi

4289

*/

4290

*/

4290

void napi_hash_del(struct napi_struct *napi)

4291

void napi_hash_del(struct napi_struct *napi)

4291

{

4292

{

4292

spin_lock(&napi_hash_lock);

4293

spin_lock(&napi_hash_lock);

4293

4294

if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))

4295

if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))

4295

hlist_del_rcu(&napi->napi_hash_node);

4296

hlist_del_rcu(&napi->napi_hash_node);

4296

4297

spin_unlock(&napi_hash_lock);

4298

spin_unlock(&napi_hash_lock);

4298

}

4299

}

4299

EXPORT_SYMBOL_GPL(napi_hash_del);

4300

EXPORT_SYMBOL_GPL(napi_hash_del);

4300

4301

void netif_napi_add(struct net_device *dev, struct napi_struct *napi,

4302

void netif_napi_add(struct net_device *dev, struct napi_struct *napi,

4302

int (*poll)(struct napi_struct *, int), int weight)

4303

int (*poll)(struct napi_struct *, int), int weight)

4303

{

4304

{

4304

INIT_LIST_HEAD(&napi->poll_list);

4305

INIT_LIST_HEAD(&napi->poll_list);

4305

napi->gro_count = 0;

4306

napi->gro_count = 0;

4306

napi->gro_list = NULL;

4307

napi->gro_list = NULL;

4307

napi->skb = NULL;

4308

napi->skb = NULL;

4308

napi->poll = poll;

4309

napi->poll = poll;

4309

if (weight > NAPI_POLL_WEIGHT)

4310

if (weight > NAPI_POLL_WEIGHT)

4310

pr_err_once("netif_napi_add() called with weight %d on device %s\n",

4311

pr_err_once("netif_napi_add() called with weight %d on device %s\n",

4311

weight, dev->name);

4312

weight, dev->name);

4312

napi->weight = weight;

4313

napi->weight = weight;

4313

list_add(&napi->dev_list, &dev->napi_list);

4314

list_add(&napi->dev_list, &dev->napi_list);

4314

napi->dev = dev;

4315

napi->dev = dev;

4315

#ifdef CONFIG_NETPOLL

4316

#ifdef CONFIG_NETPOLL

4316

spin_lock_init(&napi->poll_lock);

4317

spin_lock_init(&napi->poll_lock);

4317

napi->poll_owner = -1;

4318

napi->poll_owner = -1;

4318

#endif

4319

#endif

4319

set_bit(NAPI_STATE_SCHED, &napi->state);

4320

set_bit(NAPI_STATE_SCHED, &napi->state);

4320

}

4321

}

4321

EXPORT_SYMBOL(netif_napi_add);

4322

EXPORT_SYMBOL(netif_napi_add);

4322

4323

void netif_napi_del(struct napi_struct *napi)

4324

void netif_napi_del(struct napi_struct *napi)

4324

{

4325

{

4325

list_del_init(&napi->dev_list);

4326

list_del_init(&napi->dev_list);

4326

napi_free_frags(napi);

4327

napi_free_frags(napi);

4327

4328

kfree_skb_list(napi->gro_list);

4329

kfree_skb_list(napi->gro_list);

4329

napi->gro_list = NULL;

4330

napi->gro_list = NULL;

4330

napi->gro_count = 0;

4331

napi->gro_count = 0;

4331

}

4332

}

4332

EXPORT_SYMBOL(netif_napi_del);

4333

EXPORT_SYMBOL(netif_napi_del);

4333

4334

static void net_rx_action(struct softirq_action *h)

4335

static void net_rx_action(struct softirq_action *h)

4335

{

4336

{

4336

struct softnet_data *sd = &__get_cpu_var(softnet_data);

4337

struct softnet_data *sd = &__get_cpu_var(softnet_data);

4337

unsigned long time_limit = jiffies + 2;

4338

unsigned long time_limit = jiffies + 2;

4338

int budget = netdev_budget;

4339

int budget = netdev_budget;

4339

void *have;

4340

void *have;

4340

4341

local_irq_disable();

4342

local_irq_disable();

4342

4343

while (!list_empty(&sd->poll_list)) {

4344

while (!list_empty(&sd->poll_list)) {

4344

struct napi_struct *n;

4345

struct napi_struct *n;

4345

int work, weight;

4346

int work, weight;

4346

4347

/* If softirq window is exhuasted then punt.

4348

/* If softirq window is exhuasted then punt.

4348

* Allow this to run for 2 jiffies since which will allow

4349

* Allow this to run for 2 jiffies since which will allow

4349

* an average latency of 1.5/HZ.

4350

* an average latency of 1.5/HZ.

4350

*/

4351

*/

4351

if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))

4352

if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))

4352

goto softnet_break;

4353

goto softnet_break;

4353

4354

local_irq_enable();

4355

local_irq_enable();

4355

4356

/* Even though interrupts have been re-enabled, this

4357

/* Even though interrupts have been re-enabled, this

4357

* access is safe because interrupts can only add new

4358

* access is safe because interrupts can only add new

4358

* entries to the tail of this list, and only ->poll()

4359

* entries to the tail of this list, and only ->poll()

4359

* calls can remove this head entry from the list.

4360

* calls can remove this head entry from the list.

4360

*/

4361

*/

4361

n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);

4362

n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);

4362

4363

have = netpoll_poll_lock(n);

4364

have = netpoll_poll_lock(n);

4364

4365

weight = n->weight;

4366

weight = n->weight;

4366

4367

/* This NAPI_STATE_SCHED test is for avoiding a race

4368

/* This NAPI_STATE_SCHED test is for avoiding a race

4368

* with netpoll's poll_napi(). Only the entity which

4369

* with netpoll's poll_napi(). Only the entity which

4369

* obtains the lock and sees NAPI_STATE_SCHED set will

4370

* obtains the lock and sees NAPI_STATE_SCHED set will

4370

* actually make the ->poll() call. Therefore we avoid

4371

* actually make the ->poll() call. Therefore we avoid

4371

* accidentally calling ->poll() when NAPI is not scheduled.

4372

* accidentally calling ->poll() when NAPI is not scheduled.

4372

*/

4373

*/

4373

work = 0;

4374

work = 0;

4374

if (test_bit(NAPI_STATE_SCHED, &n->state)) {

4375

if (test_bit(NAPI_STATE_SCHED, &n->state)) {

4375

work = n->poll(n, weight);

4376

work = n->poll(n, weight);

4376

trace_napi_poll(n);

4377

trace_napi_poll(n);

4377

}

4378

}

4378

4379

WARN_ON_ONCE(work > weight);

4380

WARN_ON_ONCE(work > weight);

4380

4381

budget -= work;

4382

budget -= work;

4382

4383

local_irq_disable();

4384

local_irq_disable();

4384

4385

/* Drivers must not modify the NAPI state if they

4386

/* Drivers must not modify the NAPI state if they

4386

* consume the entire weight. In such cases this code

4387

* consume the entire weight. In such cases this code

4387

* still "owns" the NAPI instance and therefore can

4388

* still "owns" the NAPI instance and therefore can

4388

* move the instance around on the list at-will.

4389

* move the instance around on the list at-will.

4389

*/

4390

*/

4390

if (unlikely(work == weight)) {

4391

if (unlikely(work == weight)) {

4391

if (unlikely(napi_disable_pending(n))) {

4392

if (unlikely(napi_disable_pending(n))) {

4392

local_irq_enable();

4393

local_irq_enable();

4393

napi_complete(n);

4394

napi_complete(n);

4394

local_irq_disable();

4395

local_irq_disable();

4395

} else {

4396

} else {

4396

if (n->gro_list) {

4397

if (n->gro_list) {

4397

/* flush too old packets

4398

/* flush too old packets

4398

* If HZ < 1000, flush all packets.

4399

* If HZ < 1000, flush all packets.

4399

*/

4400

*/

4400

local_irq_enable();

4401

local_irq_enable();

4401

napi_gro_flush(n, HZ >= 1000);

4402

napi_gro_flush(n, HZ >= 1000);

4402

local_irq_disable();

4403

local_irq_disable();

4403

}

4404

}

4404

list_move_tail(&n->poll_list, &sd->poll_list);

4405

list_move_tail(&n->poll_list, &sd->poll_list);

4405

}

4406

}

4406

}

4407

}

4407

4408

netpoll_poll_unlock(have);

4409

netpoll_poll_unlock(have);

4409

}

4410

}

4410

out:

4411

out:

4411

net_rps_action_and_irq_enable(sd);

4412

net_rps_action_and_irq_enable(sd);

4412

4413

#ifdef CONFIG_NET_DMA

4414

#ifdef CONFIG_NET_DMA

4414

/*

4415

/*

4415

* There may not be any more sk_buffs coming right now, so push

4416

* There may not be any more sk_buffs coming right now, so push

4416

* any pending DMA copies to hardware

4417

* any pending DMA copies to hardware

4417

*/

4418

*/

4418

dma_issue_pending_all();

4419

dma_issue_pending_all();

4419

#endif

4420

#endif

4420

4421

return;

4422

return;

4422

4423

softnet_break:

4424

softnet_break:

4424

sd->time_squeeze++;

4425

sd->time_squeeze++;

4425

__raise_softirq_irqoff(NET_RX_SOFTIRQ);

4426

__raise_softirq_irqoff(NET_RX_SOFTIRQ);

4426

goto out;

4427

goto out;

4427

}

4428

}

4428

4429

struct netdev_adjacent {

4430

struct netdev_adjacent {

4430

struct net_device *dev;

4431

struct net_device *dev;

4431

4432

/* upper master flag, there can only be one master device per list */

4433

/* upper master flag, there can only be one master device per list */

4433

bool master;

4434

bool master;

4434

4435

/* counter for the number of times this device was added to us */

4436

/* counter for the number of times this device was added to us */

4436

u16 ref_nr;

4437

u16 ref_nr;

4437

4438

/* private field for the users */

4439

/* private field for the users */

4439

void *private;

4440

void *private;

4440

4441

struct list_head list;

4442

struct list_head list;

4442

struct rcu_head rcu;

4443

struct rcu_head rcu;

4443

};

4444

};

4444

4445

static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,

4446

static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,

4446

struct net_device *adj_dev,

4447

struct net_device *adj_dev,

4447

struct list_head *adj_list)

4448

struct list_head *adj_list)

4448

{

4449

{

4449

struct netdev_adjacent *adj;

4450

struct netdev_adjacent *adj;

4450

4451

list_for_each_entry(adj, adj_list, list) {

4452

list_for_each_entry(adj, adj_list, list) {

4452

if (adj->dev == adj_dev)

4453

if (adj->dev == adj_dev)

4453

return adj;

4454

return adj;

4454

}

4455

}

4455

return NULL;

4456

return NULL;

4456

}

4457

}

4457

4458

/**

4459

/**

4459

* netdev_has_upper_dev - Check if device is linked to an upper device

4460

* netdev_has_upper_dev - Check if device is linked to an upper device

4460

* @dev: device

4461

* @dev: device

4461

* @upper_dev: upper device to check

4462

* @upper_dev: upper device to check

4462

*

4463

*

4463

* Find out if a device is linked to specified upper device and return true

4464

* Find out if a device is linked to specified upper device and return true

4464

* in case it is. Note that this checks only immediate upper device,

4465

* in case it is. Note that this checks only immediate upper device,

4465

* not through a complete stack of devices. The caller must hold the RTNL lock.

4466

* not through a complete stack of devices. The caller must hold the RTNL lock.

4466

*/

4467

*/

4467

bool netdev_has_upper_dev(struct net_device *dev,

4468

bool netdev_has_upper_dev(struct net_device *dev,

4468

struct net_device *upper_dev)

4469

struct net_device *upper_dev)

4469

{

4470

{

4470

ASSERT_RTNL();

4471

ASSERT_RTNL();

4471

4472

return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);

4473

return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);

4473

}

4474

}

4474

EXPORT_SYMBOL(netdev_has_upper_dev);

4475

EXPORT_SYMBOL(netdev_has_upper_dev);

4475

4476

/**

4477

/**

4477

* netdev_has_any_upper_dev - Check if device is linked to some device

4478

* netdev_has_any_upper_dev - Check if device is linked to some device

4478

* @dev: device

4479

* @dev: device

4479

*

4480

*

4480

* Find out if a device is linked to an upper device and return true in case

4481

* Find out if a device is linked to an upper device and return true in case

4481

* it is. The caller must hold the RTNL lock.

4482

* it is. The caller must hold the RTNL lock.

4482

*/

4483

*/

4483

static bool netdev_has_any_upper_dev(struct net_device *dev)

4484

static bool netdev_has_any_upper_dev(struct net_device *dev)

4484

{

4485

{

4485

ASSERT_RTNL();

4486

ASSERT_RTNL();

4486

4487

return !list_empty(&dev->all_adj_list.upper);

4488

return !list_empty(&dev->all_adj_list.upper);

4488

}

4489

}

4489

4490

/**

4491

/**

4491

* netdev_master_upper_dev_get - Get master upper device

4492

* netdev_master_upper_dev_get - Get master upper device

4492

* @dev: device

4493

* @dev: device

4493

*

4494

*

4494

* Find a master upper device and return pointer to it or NULL in case

4495

* Find a master upper device and return pointer to it or NULL in case

4495

* it's not there. The caller must hold the RTNL lock.

4496

* it's not there. The caller must hold the RTNL lock.

4496

*/

4497

*/

4497

struct net_device *netdev_master_upper_dev_get(struct net_device *dev)

4498

struct net_device *netdev_master_upper_dev_get(struct net_device *dev)

4498

{

4499

{

4499

struct netdev_adjacent *upper;

4500

struct netdev_adjacent *upper;

4500

4501

ASSERT_RTNL();

4502

ASSERT_RTNL();

4502

4503

if (list_empty(&dev->adj_list.upper))

4504

if (list_empty(&dev->adj_list.upper))

4504

return NULL;

4505

return NULL;

4505

4506

upper = list_first_entry(&dev->adj_list.upper,

4507

upper = list_first_entry(&dev->adj_list.upper,

4507

struct netdev_adjacent, list);

4508

struct netdev_adjacent, list);

4508

if (likely(upper->master))

4509

if (likely(upper->master))

4509

return upper->dev;

4510

return upper->dev;

4510

return NULL;

4511

return NULL;

4511

}

4512

}

4512

EXPORT_SYMBOL(netdev_master_upper_dev_get);

4513

EXPORT_SYMBOL(netdev_master_upper_dev_get);

4513

4514

void *netdev_adjacent_get_private(struct list_head *adj_list)

4515

void *netdev_adjacent_get_private(struct list_head *adj_list)

4515

{

4516

{

4516

struct netdev_adjacent *adj;

4517

struct netdev_adjacent *adj;

4517

4518

adj = list_entry(adj_list, struct netdev_adjacent, list);

4519

adj = list_entry(adj_list, struct netdev_adjacent, list);

4519

4520

return adj->private;

4521

return adj->private;

4521

}

4522

}

4522

EXPORT_SYMBOL(netdev_adjacent_get_private);

4523

EXPORT_SYMBOL(netdev_adjacent_get_private);

4523

4524

/**

4525

/**

4525

* netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list

4526

* netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list

4526

* @dev: device

4527

* @dev: device

4527

* @iter: list_head ** of the current position

4528

* @iter: list_head ** of the current position

4528

*

4529

*

4529

* Gets the next device from the dev's upper list, starting from iter

4530

* Gets the next device from the dev's upper list, starting from iter

4530

* position. The caller must hold RCU read lock.

4531

* position. The caller must hold RCU read lock.

4531

*/

4532

*/

4532

struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,

4533

struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,

4533

struct list_head **iter)

4534

struct list_head **iter)

4534

{

4535

{

4535

struct netdev_adjacent *upper;

4536

struct netdev_adjacent *upper;

4536

4537

WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());

4538

WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());

4538

4539

upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);

4540

upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);

4540

4541

if (&upper->list == &dev->all_adj_list.upper)

4542

if (&upper->list == &dev->all_adj_list.upper)

4542

return NULL;

4543

return NULL;

4543

4544

*iter = &upper->list;

4545

*iter = &upper->list;

4545

4546

return upper->dev;

4547

return upper->dev;

4547

}

4548

}

4548

EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);

4549

EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);

4549

4550

/**

4551

/**

4551

* netdev_lower_get_next_private - Get the next ->private from the

4552

* netdev_lower_get_next_private - Get the next ->private from the

4552

* lower neighbour list

4553

* lower neighbour list

4553

* @dev: device

4554

* @dev: device

4554

* @iter: list_head ** of the current position

4555

* @iter: list_head ** of the current position

4555

*

4556

*

4556

* Gets the next netdev_adjacent->private from the dev's lower neighbour

4557

* Gets the next netdev_adjacent->private from the dev's lower neighbour

4557

* list, starting from iter position. The caller must hold either hold the

4558

* list, starting from iter position. The caller must hold either hold the

4558

* RTNL lock or its own locking that guarantees that the neighbour lower

4559

* RTNL lock or its own locking that guarantees that the neighbour lower

4559

* list will remain unchainged.

4560

* list will remain unchainged.

4560

*/

4561

*/

4561

void *netdev_lower_get_next_private(struct net_device *dev,

4562

void *netdev_lower_get_next_private(struct net_device *dev,

4562

struct list_head **iter)

4563

struct list_head **iter)

4563

{

4564

{

4564

struct netdev_adjacent *lower;

4565

struct netdev_adjacent *lower;

4565

4566

lower = list_entry(*iter, struct netdev_adjacent, list);

4567

lower = list_entry(*iter, struct netdev_adjacent, list);

4567

4568

if (&lower->list == &dev->adj_list.lower)

4569

if (&lower->list == &dev->adj_list.lower)

4569

return NULL;

4570

return NULL;

4570

4571

if (iter)

4572

if (iter)

4572

*iter = lower->list.next;

4573

*iter = lower->list.next;

4573

4574

return lower->private;

4575

return lower->private;

4575

}

4576

}

4576

EXPORT_SYMBOL(netdev_lower_get_next_private);

4577

EXPORT_SYMBOL(netdev_lower_get_next_private);

4577

4578

/**

4579

/**

4579

* netdev_lower_get_next_private_rcu - Get the next ->private from the

4580

* netdev_lower_get_next_private_rcu - Get the next ->private from the

4580

* lower neighbour list, RCU

4581

* lower neighbour list, RCU

4581

* variant

4582

* variant

4582

* @dev: device

4583

* @dev: device

4583

* @iter: list_head ** of the current position

4584

* @iter: list_head ** of the current position

4584

*

4585

*

4585

* Gets the next netdev_adjacent->private from the dev's lower neighbour

4586

* Gets the next netdev_adjacent->private from the dev's lower neighbour

4586

* list, starting from iter position. The caller must hold RCU read lock.

4587

* list, starting from iter position. The caller must hold RCU read lock.

4587

*/

4588

*/

4588

void *netdev_lower_get_next_private_rcu(struct net_device *dev,

4589

void *netdev_lower_get_next_private_rcu(struct net_device *dev,

4589

struct list_head **iter)

4590

struct list_head **iter)

4590

{

4591

{

4591

struct netdev_adjacent *lower;

4592

struct netdev_adjacent *lower;

4592

4593

WARN_ON_ONCE(!rcu_read_lock_held());

4594

WARN_ON_ONCE(!rcu_read_lock_held());

4594

4595

lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);

4596

lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);

4596

4597

if (&lower->list == &dev->adj_list.lower)

4598

if (&lower->list == &dev->adj_list.lower)

4598

return NULL;

4599

return NULL;

4599

4600

if (iter)

4601

if (iter)

4601

*iter = &lower->list;

4602

*iter = &lower->list;

4602

4603

return lower->private;

4604

return lower->private;

4604

}

4605

}

4605

EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);

4606

EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);

4606

4607

/**

4608

/**

4608

* netdev_lower_get_next - Get the next device from the lower neighbour

4609

* netdev_lower_get_next - Get the next device from the lower neighbour

4609

* list

4610

* list

4610

* @dev: device

4611

* @dev: device

4611

* @iter: list_head ** of the current position

4612

* @iter: list_head ** of the current position

4612

*

4613

*

4613

* Gets the next netdev_adjacent from the dev's lower neighbour

4614

* Gets the next netdev_adjacent from the dev's lower neighbour

4614

* list, starting from iter position. The caller must hold RTNL lock or

4615

* list, starting from iter position. The caller must hold RTNL lock or

4615

* its own locking that guarantees that the neighbour lower

4616

* its own locking that guarantees that the neighbour lower

4616

* list will remain unchainged.

4617

* list will remain unchainged.

4617

*/

4618

*/

4618

void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)

4619

void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)

4619

{

4620

{

4620

struct netdev_adjacent *lower;

4621

struct netdev_adjacent *lower;

4621

4622

lower = list_entry((*iter)->next, struct netdev_adjacent, list);

4623

lower = list_entry((*iter)->next, struct netdev_adjacent, list);

4623

4624

if (&lower->list == &dev->adj_list.lower)

4625

if (&lower->list == &dev->adj_list.lower)

4625

return NULL;

4626

return NULL;

4626

4627

*iter = &lower->list;

4628

*iter = &lower->list;

4628

4629

return lower->dev;

4630

return lower->dev;

4630

}

4631

}

4631

EXPORT_SYMBOL(netdev_lower_get_next);

4632

EXPORT_SYMBOL(netdev_lower_get_next);

4632

4633

/**

4634

/**

4634

* netdev_lower_get_first_private_rcu - Get the first ->private from the

4635

* netdev_lower_get_first_private_rcu - Get the first ->private from the

4635

* lower neighbour list, RCU

4636

* lower neighbour list, RCU

4636

* variant

4637

* variant

4637

* @dev: device

4638

* @dev: device

4638

*

4639

*

4639

* Gets the first netdev_adjacent->private from the dev's lower neighbour

4640

* Gets the first netdev_adjacent->private from the dev's lower neighbour

4640

* list. The caller must hold RCU read lock.

4641

* list. The caller must hold RCU read lock.

4641

*/

4642

*/

4642

void *netdev_lower_get_first_private_rcu(struct net_device *dev)

4643

void *netdev_lower_get_first_private_rcu(struct net_device *dev)

4643

{

4644

{

4644

struct netdev_adjacent *lower;

4645

struct netdev_adjacent *lower;

4645

4646

lower = list_first_or_null_rcu(&dev->adj_list.lower,

4647

lower = list_first_or_null_rcu(&dev->adj_list.lower,

4647

struct netdev_adjacent, list);

4648

struct netdev_adjacent, list);

4648

if (lower)

4649

if (lower)

4649

return lower->private;

4650

return lower->private;

4650

return NULL;

4651

return NULL;

4651

}

4652

}

4652

EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);

4653

EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);

4653

4654

/**

4655

/**

4655

* netdev_master_upper_dev_get_rcu - Get master upper device

4656

* netdev_master_upper_dev_get_rcu - Get master upper device

4656

* @dev: device

4657

* @dev: device

4657

*

4658

*

4658

* Find a master upper device and return pointer to it or NULL in case

4659

* Find a master upper device and return pointer to it or NULL in case

4659

* it's not there. The caller must hold the RCU read lock.

4660

* it's not there. The caller must hold the RCU read lock.

4660

*/

4661

*/

4661

struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)

4662

struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)

4662

{

4663

{

4663

struct netdev_adjacent *upper;

4664

struct netdev_adjacent *upper;

4664

4665

upper = list_first_or_null_rcu(&dev->adj_list.upper,

4666

upper = list_first_or_null_rcu(&dev->adj_list.upper,

4666

struct netdev_adjacent, list);

4667

struct netdev_adjacent, list);

4667

if (upper && likely(upper->master))

4668

if (upper && likely(upper->master))

4668

return upper->dev;

4669

return upper->dev;

4669

return NULL;

4670

return NULL;

4670

}

4671

}

4671

EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);

4672

EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);

4672

4673

static int netdev_adjacent_sysfs_add(struct net_device *dev,

4674

static int netdev_adjacent_sysfs_add(struct net_device *dev,

4674

struct net_device *adj_dev,

4675

struct net_device *adj_dev,

4675

struct list_head *dev_list)

4676

struct list_head *dev_list)

4676

{

4677

{

4677

char linkname[IFNAMSIZ+7];

4678

char linkname[IFNAMSIZ+7];

4678

sprintf(linkname, dev_list == &dev->adj_list.upper ?

4679

sprintf(linkname, dev_list == &dev->adj_list.upper ?

4679

"upper_%s" : "lower_%s", adj_dev->name);

4680

"upper_%s" : "lower_%s", adj_dev->name);

4680

return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),

4681

return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),

4681

linkname);

4682

linkname);

4682

}

4683

}

4683

static void netdev_adjacent_sysfs_del(struct net_device *dev,

4684

static void netdev_adjacent_sysfs_del(struct net_device *dev,

4684

char *name,

4685

char *name,

4685

struct list_head *dev_list)

4686

struct list_head *dev_list)

4686

{

4687

{

4687

char linkname[IFNAMSIZ+7];

4688

char linkname[IFNAMSIZ+7];

4688

sprintf(linkname, dev_list == &dev->adj_list.upper ?

4689

sprintf(linkname, dev_list == &dev->adj_list.upper ?

4689

"upper_%s" : "lower_%s", name);

4690

"upper_%s" : "lower_%s", name);

4690

sysfs_remove_link(&(dev->dev.kobj), linkname);

4691

sysfs_remove_link(&(dev->dev.kobj), linkname);

4691

}

4692

}

4692

4693

#define netdev_adjacent_is_neigh_list(dev, dev_list) \

4694

#define netdev_adjacent_is_neigh_list(dev, dev_list) \

4694

(dev_list == &dev->adj_list.upper || \

4695

(dev_list == &dev->adj_list.upper || \

4695

dev_list == &dev->adj_list.lower)

4696

dev_list == &dev->adj_list.lower)

4696

4697

static int __netdev_adjacent_dev_insert(struct net_device *dev,

4698

static int __netdev_adjacent_dev_insert(struct net_device *dev,

4698

struct net_device *adj_dev,

4699

struct net_device *adj_dev,

4699

struct list_head *dev_list,

4700

struct list_head *dev_list,

4700

void *private, bool master)

4701

void *private, bool master)

4701

{

4702

{

4702

struct netdev_adjacent *adj;

4703

struct netdev_adjacent *adj;

4703

int ret;

4704

int ret;

4704

4705

adj = __netdev_find_adj(dev, adj_dev, dev_list);

4706

adj = __netdev_find_adj(dev, adj_dev, dev_list);

4706

4707

if (adj) {

4708

if (adj) {

4708

adj->ref_nr++;

4709

adj->ref_nr++;

4709

return 0;

4710

return 0;

4710

}

4711

}

4711

4712

adj = kmalloc(sizeof(*adj), GFP_KERNEL);

4713

adj = kmalloc(sizeof(*adj), GFP_KERNEL);

4713

if (!adj)

4714

if (!adj)

4714

return -ENOMEM;

4715

return -ENOMEM;

4715

4716

adj->dev = adj_dev;

4717

adj->dev = adj_dev;

4717

adj->master = master;

4718

adj->master = master;

4718

adj->ref_nr = 1;

4719

adj->ref_nr = 1;

4719

adj->private = private;

4720

adj->private = private;

4720

dev_hold(adj_dev);

4721

dev_hold(adj_dev);

4721

4722

pr_debug("dev_hold for %s, because of link added from %s to %s\n",

4723

pr_debug("dev_hold for %s, because of link added from %s to %s\n",

4723

adj_dev->name, dev->name, adj_dev->name);

4724

adj_dev->name, dev->name, adj_dev->name);

4724

4725

if (netdev_adjacent_is_neigh_list(dev, dev_list)) {

4726

if (netdev_adjacent_is_neigh_list(dev, dev_list)) {

4726

ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);

4727

ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);

4727

if (ret)

4728

if (ret)

4728

goto free_adj;

4729

goto free_adj;

4729

}

4730

}

4730

4731

/* Ensure that master link is always the first item in list. */

4732

/* Ensure that master link is always the first item in list. */

4732

if (master) {

4733

if (master) {

4733

ret = sysfs_create_link(&(dev->dev.kobj),

4734

ret = sysfs_create_link(&(dev->dev.kobj),

4734

&(adj_dev->dev.kobj), "master");

4735

&(adj_dev->dev.kobj), "master");

4735

if (ret)

4736

if (ret)

4736

goto remove_symlinks;

4737

goto remove_symlinks;

4737

4738

list_add_rcu(&adj->list, dev_list);

4739

list_add_rcu(&adj->list, dev_list);

4739

} else {

4740

} else {

4740

list_add_tail_rcu(&adj->list, dev_list);

4741

list_add_tail_rcu(&adj->list, dev_list);

4741

}

4742

}

4742

4743

return 0;

4744

return 0;

4744

4745

remove_symlinks:

4746

remove_symlinks:

4746

if (netdev_adjacent_is_neigh_list(dev, dev_list))

4747

if (netdev_adjacent_is_neigh_list(dev, dev_list))

4747

netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);

4748

netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);

4748

free_adj:

4749

free_adj:

4749

kfree(adj);

4750

kfree(adj);

4750

dev_put(adj_dev);

4751

dev_put(adj_dev);

4751

4752

return ret;

4753

return ret;

4753

}

4754

}

4754

4755

static void __netdev_adjacent_dev_remove(struct net_device *dev,

4756

static void __netdev_adjacent_dev_remove(struct net_device *dev,

4756

struct net_device *adj_dev,

4757

struct net_device *adj_dev,

4757

struct list_head *dev_list)

4758

struct list_head *dev_list)

4758

{

4759

{

4759

struct netdev_adjacent *adj;

4760

struct netdev_adjacent *adj;

4760

4761

adj = __netdev_find_adj(dev, adj_dev, dev_list);

4762

adj = __netdev_find_adj(dev, adj_dev, dev_list);

4762

4763

if (!adj) {

4764

if (!adj) {

4764

pr_err("tried to remove device %s from %s\n",

4765

pr_err("tried to remove device %s from %s\n",

4765

dev->name, adj_dev->name);

4766

dev->name, adj_dev->name);

4766

BUG();

4767

BUG();

4767

}

4768

}

4768

4769

if (adj->ref_nr > 1) {

4770

if (adj->ref_nr > 1) {

4770

pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,

4771

pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,

4771

adj->ref_nr-1);

4772

adj->ref_nr-1);

4772

adj->ref_nr--;

4773

adj->ref_nr--;

4773

return;

4774

return;

4774

}

4775

}

4775

4776

if (adj->master)

4777

if (adj->master)

4777

sysfs_remove_link(&(dev->dev.kobj), "master");

4778

sysfs_remove_link(&(dev->dev.kobj), "master");

4778

4779

if (netdev_adjacent_is_neigh_list(dev, dev_list))

4780

if (netdev_adjacent_is_neigh_list(dev, dev_list))

4780

netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);

4781

netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);

4781

4782

list_del_rcu(&adj->list);

4783

list_del_rcu(&adj->list);

4783

pr_debug("dev_put for %s, because link removed from %s to %s\n",

4784

pr_debug("dev_put for %s, because link removed from %s to %s\n",

4784

adj_dev->name, dev->name, adj_dev->name);

4785

adj_dev->name, dev->name, adj_dev->name);

4785

dev_put(adj_dev);

4786

dev_put(adj_dev);

4786

kfree_rcu(adj, rcu);

4787

kfree_rcu(adj, rcu);

4787

}

4788

}

4788

4789

static int __netdev_adjacent_dev_link_lists(struct net_device *dev,

4790

static int __netdev_adjacent_dev_link_lists(struct net_device *dev,

4790

struct net_device *upper_dev,

4791

struct net_device *upper_dev,

4791

struct list_head *up_list,

4792

struct list_head *up_list,

4792

struct list_head *down_list,

4793

struct list_head *down_list,

4793

void *private, bool master)

4794

void *private, bool master)

4794

{

4795

{

4795

int ret;

4796

int ret;

4796

4797

ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,

4798

ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,

4798

master);

4799

master);

4799

if (ret)

4800

if (ret)

4800

return ret;

4801

return ret;

4801

4802

ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,

4803

ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,

4803

false);

4804

false);

4804

if (ret) {

4805

if (ret) {

4805

__netdev_adjacent_dev_remove(dev, upper_dev, up_list);

4806

__netdev_adjacent_dev_remove(dev, upper_dev, up_list);

4806

return ret;

4807

return ret;

4807

}

4808

}

4808

4809

return 0;

4810

return 0;

4810

}

4811

}

4811

4812

static int __netdev_adjacent_dev_link(struct net_device *dev,

4813

static int __netdev_adjacent_dev_link(struct net_device *dev,

4813

struct net_device *upper_dev)

4814

struct net_device *upper_dev)

4814

{

4815

{

4815

return __netdev_adjacent_dev_link_lists(dev, upper_dev,

4816

return __netdev_adjacent_dev_link_lists(dev, upper_dev,

4816

&dev->all_adj_list.upper,

4817

&dev->all_adj_list.upper,

4817

&upper_dev->all_adj_list.lower,

4818

&upper_dev->all_adj_list.lower,

4818

NULL, false);

4819

NULL, false);

4819

}

4820

}

4820

4821

static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,

4822

static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,

4822

struct net_device *upper_dev,

4823

struct net_device *upper_dev,

4823

struct list_head *up_list,

4824

struct list_head *up_list,

4824

struct list_head *down_list)

4825

struct list_head *down_list)

4825

{

4826

{

4826

__netdev_adjacent_dev_remove(dev, upper_dev, up_list);

4827

__netdev_adjacent_dev_remove(dev, upper_dev, up_list);

4827

__netdev_adjacent_dev_remove(upper_dev, dev, down_list);

4828

__netdev_adjacent_dev_remove(upper_dev, dev, down_list);

4828

}

4829

}

4829

4830

static void __netdev_adjacent_dev_unlink(struct net_device *dev,

4831

static void __netdev_adjacent_dev_unlink(struct net_device *dev,

4831

struct net_device *upper_dev)

4832

struct net_device *upper_dev)

4832

{

4833

{

4833

__netdev_adjacent_dev_unlink_lists(dev, upper_dev,

4834

__netdev_adjacent_dev_unlink_lists(dev, upper_dev,

4834

&dev->all_adj_list.upper,

4835

&dev->all_adj_list.upper,

4835

&upper_dev->all_adj_list.lower);

4836

&upper_dev->all_adj_list.lower);

4836

}

4837

}

4837

4838

static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,

4839

static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,

4839

struct net_device *upper_dev,

4840

struct net_device *upper_dev,

4840

void *private, bool master)

4841

void *private, bool master)

4841

{

4842

{

4842

int ret = __netdev_adjacent_dev_link(dev, upper_dev);

4843

int ret = __netdev_adjacent_dev_link(dev, upper_dev);

4843

4844

if (ret)

4845

if (ret)

4845

return ret;

4846

return ret;

4846

4847

ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,

4848

ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,

4848

&dev->adj_list.upper,

4849

&dev->adj_list.upper,

4849

&upper_dev->adj_list.lower,

4850

&upper_dev->adj_list.lower,

4850

private, master);

4851

private, master);

4851

if (ret) {

4852

if (ret) {

4852

__netdev_adjacent_dev_unlink(dev, upper_dev);

4853

__netdev_adjacent_dev_unlink(dev, upper_dev);

4853

return ret;

4854

return ret;

4854

}

4855

}

4855

4856

return 0;

4857

return 0;

4857

}

4858

}

4858

4859

static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,

4860

static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,

4860

struct net_device *upper_dev)

4861

struct net_device *upper_dev)

4861

{

4862

{

4862

__netdev_adjacent_dev_unlink(dev, upper_dev);

4863

__netdev_adjacent_dev_unlink(dev, upper_dev);

4863

__netdev_adjacent_dev_unlink_lists(dev, upper_dev,

4864

__netdev_adjacent_dev_unlink_lists(dev, upper_dev,

4864

&dev->adj_list.upper,

4865

&dev->adj_list.upper,

4865

&upper_dev->adj_list.lower);

4866

&upper_dev->adj_list.lower);

4866

}

4867

}

4867

4868

static int __netdev_upper_dev_link(struct net_device *dev,

4869

static int __netdev_upper_dev_link(struct net_device *dev,

4869

struct net_device *upper_dev, bool master,

4870

struct net_device *upper_dev, bool master,

4870

void *private)

4871

void *private)

4871

{

4872

{

4872

struct netdev_adjacent *i, *j, *to_i, *to_j;

4873

struct netdev_adjacent *i, *j, *to_i, *to_j;

4873

int ret = 0;

4874

int ret = 0;

4874

4875

ASSERT_RTNL();

4876

ASSERT_RTNL();

4876

4877

if (dev == upper_dev)

4878

if (dev == upper_dev)

4878

return -EBUSY;

4879

return -EBUSY;

4879

4880

/* To prevent loops, check if dev is not upper device to upper_dev. */

4881

/* To prevent loops, check if dev is not upper device to upper_dev. */

4881

if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))

4882

if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))

4882

return -EBUSY;

4883

return -EBUSY;

4883

4884

if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))

4885

if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))

4885

return -EEXIST;

4886

return -EEXIST;

4886

4887

if (master && netdev_master_upper_dev_get(dev))

4888

if (master && netdev_master_upper_dev_get(dev))

4888

return -EBUSY;

4889

return -EBUSY;

4889

4890

ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,

4891

ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,

4891

master);

4892

master);

4892

if (ret)

4893

if (ret)

4893

return ret;

4894

return ret;

4894

4895

/* Now that we linked these devs, make all the upper_dev's

4896

/* Now that we linked these devs, make all the upper_dev's

4896

* all_adj_list.upper visible to every dev's all_adj_list.lower an

4897

* all_adj_list.upper visible to every dev's all_adj_list.lower an

4897

* versa, and don't forget the devices itself. All of these

4898

* versa, and don't forget the devices itself. All of these

4898

* links are non-neighbours.

4899

* links are non-neighbours.

4899

*/

4900

*/

4900

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4901

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4901

list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {

4902

list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {

4902

pr_debug("Interlinking %s with %s, non-neighbour\n",

4903

pr_debug("Interlinking %s with %s, non-neighbour\n",

4903

i->dev->name, j->dev->name);

4904

i->dev->name, j->dev->name);

4904

ret = __netdev_adjacent_dev_link(i->dev, j->dev);

4905

ret = __netdev_adjacent_dev_link(i->dev, j->dev);

4905

if (ret)

4906

if (ret)

4906

goto rollback_mesh;

4907

goto rollback_mesh;

4907

}

4908

}

4908

}

4909

}

4909

4910

/* add dev to every upper_dev's upper device */

4911

/* add dev to every upper_dev's upper device */

4911

list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {

4912

list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {

4912

pr_debug("linking %s's upper device %s with %s\n",

4913

pr_debug("linking %s's upper device %s with %s\n",

4913

upper_dev->name, i->dev->name, dev->name);

4914

upper_dev->name, i->dev->name, dev->name);

4914

ret = __netdev_adjacent_dev_link(dev, i->dev);

4915

ret = __netdev_adjacent_dev_link(dev, i->dev);

4915

if (ret)

4916

if (ret)

4916

goto rollback_upper_mesh;

4917

goto rollback_upper_mesh;

4917

}

4918

}

4918

4919

/* add upper_dev to every dev's lower device */

4920

/* add upper_dev to every dev's lower device */

4920

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4921

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4921

pr_debug("linking %s's lower device %s with %s\n", dev->name,

4922

pr_debug("linking %s's lower device %s with %s\n", dev->name,

4922

i->dev->name, upper_dev->name);

4923

i->dev->name, upper_dev->name);

4923

ret = __netdev_adjacent_dev_link(i->dev, upper_dev);

4924

ret = __netdev_adjacent_dev_link(i->dev, upper_dev);

4924

if (ret)

4925

if (ret)

4925

goto rollback_lower_mesh;

4926

goto rollback_lower_mesh;

4926

}

4927

}

4927

4928

call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);

4929

call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);

4929

return 0;

4930

return 0;

4930

4931

rollback_lower_mesh:

4932

rollback_lower_mesh:

4932

to_i = i;

4933

to_i = i;

4933

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4934

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4934

if (i == to_i)

4935

if (i == to_i)

4935

break;

4936

break;

4936

__netdev_adjacent_dev_unlink(i->dev, upper_dev);

4937

__netdev_adjacent_dev_unlink(i->dev, upper_dev);

4937

}

4938

}

4938

4939

i = NULL;

4940

i = NULL;

4940

4941

rollback_upper_mesh:

4942

rollback_upper_mesh:

4942

to_i = i;

4943

to_i = i;

4943

list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {

4944

list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {

4944

if (i == to_i)

4945

if (i == to_i)

4945

break;

4946

break;

4946

__netdev_adjacent_dev_unlink(dev, i->dev);

4947

__netdev_adjacent_dev_unlink(dev, i->dev);

4947

}

4948

}

4948

4949

i = j = NULL;

4950

i = j = NULL;

4950

4951

rollback_mesh:

4952

rollback_mesh:

4952

to_i = i;

4953

to_i = i;

4953

to_j = j;

4954

to_j = j;

4954

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4955

list_for_each_entry(i, &dev->all_adj_list.lower, list) {

4955

list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {

4956

list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {

4956

if (i == to_i && j == to_j)

4957

if (i == to_i && j == to_j)

4957

break;

4958

break;

4958

__netdev_adjacent_dev_unlink(i->dev, j->dev);

4959

__netdev_adjacent_dev_unlink(i->dev, j->dev);

4959

}

4960

}

4960

if (i == to_i)

4961

if (i == to_i)

4961

break;

4962

break;

4962

}

4963

}

4963

4964

__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);

4965

__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);

4965

4966

return ret;

4967

return ret;

4967

}

4968

}

4968

4969

/**

4970

/**

4970

* netdev_upper_dev_link - Add a link to the upper device

4971

* netdev_upper_dev_link - Add a link to the upper device

4971

* @dev: device

4972

* @dev: device

4972

* @upper_dev: new upper device

4973

* @upper_dev: new upper device

4973

*

4974

*

4974

* Adds a link to device which is upper to this one. The caller must hold

4975

* Adds a link to device which is upper to this one. The caller must hold

4975

* the RTNL lock. On a failure a negative errno code is returned.

4976

* the RTNL lock. On a failure a negative errno code is returned.

4976

* On success the reference counts are adjusted and the function

4977

* On success the reference counts are adjusted and the function

4977

* returns zero.

4978

* returns zero.

4978

*/

4979

*/

4979

int netdev_upper_dev_link(struct net_device *dev,

4980

int netdev_upper_dev_link(struct net_device *dev,

4980

struct net_device *upper_dev)

4981

struct net_device *upper_dev)

4981

{

4982

{

4982

return __netdev_upper_dev_link(dev, upper_dev, false, NULL);

4983

return __netdev_upper_dev_link(dev, upper_dev, false, NULL);

4983

}

4984

}

4984

EXPORT_SYMBOL(netdev_upper_dev_link);

4985

EXPORT_SYMBOL(netdev_upper_dev_link);

4985

4986

/**

4987

/**

4987

* netdev_master_upper_dev_link - Add a master link to the upper device

4988

* netdev_master_upper_dev_link - Add a master link to the upper device

4988

* @dev: device

4989

* @dev: device

4989

* @upper_dev: new upper device

4990

* @upper_dev: new upper device

4990

*

4991

*

4991

* Adds a link to device which is upper to this one. In this case, only

4992

* Adds a link to device which is upper to this one. In this case, only

4992

* one master upper device can be linked, although other non-master devices

4993

* one master upper device can be linked, although other non-master devices

4993

* might be linked as well. The caller must hold the RTNL lock.

4994

* might be linked as well. The caller must hold the RTNL lock.

4994

* On a failure a negative errno code is returned. On success the reference

4995

* On a failure a negative errno code is returned. On success the reference

4995

* counts are adjusted and the function returns zero.

4996

* counts are adjusted and the function returns zero.

4996

*/

4997

*/

4997

int netdev_master_upper_dev_link(struct net_device *dev,

4998

int netdev_master_upper_dev_link(struct net_device *dev,

4998

struct net_device *upper_dev)

4999

struct net_device *upper_dev)

4999

{

5000

{

5000

return __netdev_upper_dev_link(dev, upper_dev, true, NULL);

5001

return __netdev_upper_dev_link(dev, upper_dev, true, NULL);

5001

}

5002

}

5002

EXPORT_SYMBOL(netdev_master_upper_dev_link);

5003

EXPORT_SYMBOL(netdev_master_upper_dev_link);

5003

5004

int netdev_master_upper_dev_link_private(struct net_device *dev,

5005

int netdev_master_upper_dev_link_private(struct net_device *dev,

5005

struct net_device *upper_dev,

5006

struct net_device *upper_dev,

5006

void *private)

5007

void *private)

5007

{

5008

{

5008

return __netdev_upper_dev_link(dev, upper_dev, true, private);

5009

return __netdev_upper_dev_link(dev, upper_dev, true, private);

5009

}

5010

}

5010

EXPORT_SYMBOL(netdev_master_upper_dev_link_private);

5011

EXPORT_SYMBOL(netdev_master_upper_dev_link_private);

5011

5012

/**

5013

/**

5013

* netdev_upper_dev_unlink - Removes a link to upper device

5014

* netdev_upper_dev_unlink - Removes a link to upper device

5014

* @dev: device

5015

* @dev: device

5015

* @upper_dev: new upper device

5016

* @upper_dev: new upper device

5016

*

5017

*

5017

* Removes a link to device which is upper to this one. The caller must hold

5018

* Removes a link to device which is upper to this one. The caller must hold

5018

* the RTNL lock.

5019

* the RTNL lock.

5019

*/

5020

*/

5020

void netdev_upper_dev_unlink(struct net_device *dev,

5021

void netdev_upper_dev_unlink(struct net_device *dev,

5021

struct net_device *upper_dev)

5022

struct net_device *upper_dev)

5022

{

5023

{

5023

struct netdev_adjacent *i, *j;

5024

struct netdev_adjacent *i, *j;

5024

ASSERT_RTNL();

5025

ASSERT_RTNL();

5025

5026

__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);

5027

__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);

5027

5028

/* Here is the tricky part. We must remove all dev's lower

5029

/* Here is the tricky part. We must remove all dev's lower

5029

* devices from all upper_dev's upper devices and vice

5030

* devices from all upper_dev's upper devices and vice

5030

* versa, to maintain the graph relationship.

5031

* versa, to maintain the graph relationship.

5031

*/

5032

*/

5032

list_for_each_entry(i, &dev->all_adj_list.lower, list)

5033

list_for_each_entry(i, &dev->all_adj_list.lower, list)

5033

list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)

5034

list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)

5034

__netdev_adjacent_dev_unlink(i->dev, j->dev);

5035

__netdev_adjacent_dev_unlink(i->dev, j->dev);

5035

5036

/* remove also the devices itself from lower/upper device

5037

/* remove also the devices itself from lower/upper device

5037

* list

5038

* list

5038

*/

5039

*/

5039

list_for_each_entry(i, &dev->all_adj_list.lower, list)

5040

list_for_each_entry(i, &dev->all_adj_list.lower, list)

5040

__netdev_adjacent_dev_unlink(i->dev, upper_dev);

5041

__netdev_adjacent_dev_unlink(i->dev, upper_dev);

5041

5042

list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)

5043

list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)

5043

__netdev_adjacent_dev_unlink(dev, i->dev);

5044

__netdev_adjacent_dev_unlink(dev, i->dev);

5044

5045

call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);

5046

call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);

5046

}

5047

}

5047

EXPORT_SYMBOL(netdev_upper_dev_unlink);

5048

EXPORT_SYMBOL(netdev_upper_dev_unlink);

5048

5049

void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)

5050

void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)

5050

{

5051

{

5051

struct netdev_adjacent *iter;

5052

struct netdev_adjacent *iter;

5052

5053

list_for_each_entry(iter, &dev->adj_list.upper, list) {

5054

list_for_each_entry(iter, &dev->adj_list.upper, list) {

5054

netdev_adjacent_sysfs_del(iter->dev, oldname,

5055

netdev_adjacent_sysfs_del(iter->dev, oldname,

5055

&iter->dev->adj_list.lower);

5056

&iter->dev->adj_list.lower);

5056

netdev_adjacent_sysfs_add(iter->dev, dev,

5057

netdev_adjacent_sysfs_add(iter->dev, dev,

5057

&iter->dev->adj_list.lower);

5058

&iter->dev->adj_list.lower);

5058

}

5059

}

5059

5060

list_for_each_entry(iter, &dev->adj_list.lower, list) {

5061

list_for_each_entry(iter, &dev->adj_list.lower, list) {

5061

netdev_adjacent_sysfs_del(iter->dev, oldname,

5062

netdev_adjacent_sysfs_del(iter->dev, oldname,

5062

&iter->dev->adj_list.upper);

5063

&iter->dev->adj_list.upper);

5063

netdev_adjacent_sysfs_add(iter->dev, dev,

5064

netdev_adjacent_sysfs_add(iter->dev, dev,

5064

&iter->dev->adj_list.upper);

5065

&iter->dev->adj_list.upper);

5065

}

5066

}

5066

}

5067

}

5067

5068

void *netdev_lower_dev_get_private(struct net_device *dev,

5069

void *netdev_lower_dev_get_private(struct net_device *dev,

5069

struct net_device *lower_dev)

5070

struct net_device *lower_dev)

5070

{

5071

{

5071

struct netdev_adjacent *lower;

5072

struct netdev_adjacent *lower;

5072

5073

if (!lower_dev)

5074

if (!lower_dev)

5074

return NULL;

5075

return NULL;

5075

lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);

5076

lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);

5076

if (!lower)

5077

if (!lower)

5077

return NULL;

5078

return NULL;

5078

5079

return lower->private;

5080

return lower->private;

5080

}

5081

}

5081

EXPORT_SYMBOL(netdev_lower_dev_get_private);

5082

EXPORT_SYMBOL(netdev_lower_dev_get_private);

5082

5083

5084

int dev_get_nest_level(struct net_device *dev,

5085

int dev_get_nest_level(struct net_device *dev,

5085

bool (*type_check)(struct net_device *dev))

5086

bool (*type_check)(struct net_device *dev))

5086

{

5087

{

5087

struct net_device *lower = NULL;

5088

struct net_device *lower = NULL;

5088

struct list_head *iter;

5089

struct list_head *iter;

5089

int max_nest = -1;

5090

int max_nest = -1;

5090

int nest;

5091

int nest;

5091

5092

ASSERT_RTNL();

5093

ASSERT_RTNL();

5093

5094

netdev_for_each_lower_dev(dev, lower, iter) {

5095

netdev_for_each_lower_dev(dev, lower, iter) {

5095

nest = dev_get_nest_level(lower, type_check);

5096

nest = dev_get_nest_level(lower, type_check);

5096

if (max_nest < nest)

5097

if (max_nest < nest)

5097

max_nest = nest;

5098

max_nest = nest;

5098

}

5099

}

5099

5100

if (type_check(dev))

5101

if (type_check(dev))

5101

max_nest++;

5102

max_nest++;

5102

5103

return max_nest;

5104

return max_nest;

5104

}

5105

}

5105

EXPORT_SYMBOL(dev_get_nest_level);

5106

EXPORT_SYMBOL(dev_get_nest_level);

5106

5107

static void dev_change_rx_flags(struct net_device *dev, int flags)

5108

static void dev_change_rx_flags(struct net_device *dev, int flags)

5108

{

5109

{

5109

const struct net_device_ops *ops = dev->netdev_ops;

5110

const struct net_device_ops *ops = dev->netdev_ops;

5110

5111

if (ops->ndo_change_rx_flags)

5112

if (ops->ndo_change_rx_flags)

5112

ops->ndo_change_rx_flags(dev, flags);

5113

ops->ndo_change_rx_flags(dev, flags);

5113

}

5114

}

5114

5115

static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)

5116

static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)

5116

{

5117

{

5117

unsigned int old_flags = dev->flags;

5118

unsigned int old_flags = dev->flags;

5118

kuid_t uid;

5119

kuid_t uid;

5119

kgid_t gid;

5120

kgid_t gid;

5120

5121

ASSERT_RTNL();

5122

ASSERT_RTNL();

5122

5123

dev->flags |= IFF_PROMISC;

5124

dev->flags |= IFF_PROMISC;

5124

dev->promiscuity += inc;

5125

dev->promiscuity += inc;

5125

if (dev->promiscuity == 0) {

5126

if (dev->promiscuity == 0) {

5126

/*

5127

/*

5127

* Avoid overflow.

5128

* Avoid overflow.

5128

* If inc causes overflow, untouch promisc and return error.

5129

* If inc causes overflow, untouch promisc and return error.

5129

*/

5130

*/

5130

if (inc < 0)

5131

if (inc < 0)

5131

dev->flags &= ~IFF_PROMISC;

5132

dev->flags &= ~IFF_PROMISC;

5132

else {

5133

else {

5133

dev->promiscuity -= inc;

5134

dev->promiscuity -= inc;

5134

pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",

5135

pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",

5135

dev->name);

5136

dev->name);

5136

return -EOVERFLOW;

5137

return -EOVERFLOW;

5137

}

5138

}

5138

}

5139

}

5139

if (dev->flags != old_flags) {

5140

if (dev->flags != old_flags) {

5140

pr_info("device %s %s promiscuous mode\n",

5141

pr_info("device %s %s promiscuous mode\n",

5141

dev->name,

5142

dev->name,

5142

dev->flags & IFF_PROMISC ? "entered" : "left");

5143

dev->flags & IFF_PROMISC ? "entered" : "left");

5143

if (audit_enabled) {

5144

if (audit_enabled) {

5144

current_uid_gid(&uid, &gid);

5145

current_uid_gid(&uid, &gid);

5145

audit_log(current->audit_context, GFP_ATOMIC,

5146

audit_log(current->audit_context, GFP_ATOMIC,

5146

AUDIT_ANOM_PROMISCUOUS,

5147

AUDIT_ANOM_PROMISCUOUS,

5147

"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",

5148

"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",

5148

dev->name, (dev->flags & IFF_PROMISC),

5149

dev->name, (dev->flags & IFF_PROMISC),

5149

(old_flags & IFF_PROMISC),

5150

(old_flags & IFF_PROMISC),

5150

from_kuid(&init_user_ns, audit_get_loginuid(current)),

5151

from_kuid(&init_user_ns, audit_get_loginuid(current)),

5151

from_kuid(&init_user_ns, uid),

5152

from_kuid(&init_user_ns, uid),

5152

from_kgid(&init_user_ns, gid),

5153

from_kgid(&init_user_ns, gid),

5153

audit_get_sessionid(current));

5154

audit_get_sessionid(current));

5154

}

5155

}

5155

5156

dev_change_rx_flags(dev, IFF_PROMISC);

5157

dev_change_rx_flags(dev, IFF_PROMISC);

5157

}

5158

}

5158

if (notify)

5159

if (notify)

5159

__dev_notify_flags(dev, old_flags, IFF_PROMISC);

5160

__dev_notify_flags(dev, old_flags, IFF_PROMISC);

5160

return 0;

5161

return 0;

5161

}

5162

}

5162

5163

/**

5164

/**

5164

* dev_set_promiscuity - update promiscuity count on a device

5165

* dev_set_promiscuity - update promiscuity count on a device

5165

* @dev: device

5166

* @dev: device

5166

* @inc: modifier

5167

* @inc: modifier

5167

*

5168

*

5168

* Add or remove promiscuity from a device. While the count in the device

5169

* Add or remove promiscuity from a device. While the count in the device

5169

* remains above zero the interface remains promiscuous. Once it hits zero

5170

* remains above zero the interface remains promiscuous. Once it hits zero

5170

* the device reverts back to normal filtering operation. A negative inc

5171

* the device reverts back to normal filtering operation. A negative inc

5171

* value is used to drop promiscuity on the device.

5172

* value is used to drop promiscuity on the device.

5172

* Return 0 if successful or a negative errno code on error.

5173

* Return 0 if successful or a negative errno code on error.

5173

*/

5174

*/

5174

int dev_set_promiscuity(struct net_device *dev, int inc)

5175

int dev_set_promiscuity(struct net_device *dev, int inc)

5175

{

5176

{

5176

unsigned int old_flags = dev->flags;

5177

unsigned int old_flags = dev->flags;

5177

int err;

5178

int err;

5178

5179

err = __dev_set_promiscuity(dev, inc, true);

5180

err = __dev_set_promiscuity(dev, inc, true);

5180

if (err < 0)

5181

if (err < 0)

5181

return err;

5182

return err;

5182

if (dev->flags != old_flags)

5183

if (dev->flags != old_flags)

5183

dev_set_rx_mode(dev);

5184

dev_set_rx_mode(dev);

5184

return err;

5185

return err;

5185

}

5186

}

5186

EXPORT_SYMBOL(dev_set_promiscuity);

5187

EXPORT_SYMBOL(dev_set_promiscuity);

5187

5188

static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)

5189

static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)

5189

{

5190

{

5190

unsigned int old_flags = dev->flags, old_gflags = dev->gflags;

5191

unsigned int old_flags = dev->flags, old_gflags = dev->gflags;

5191

5192

ASSERT_RTNL();

5193

ASSERT_RTNL();

5193

5194

dev->flags |= IFF_ALLMULTI;

5195

dev->flags |= IFF_ALLMULTI;

5195

dev->allmulti += inc;

5196

dev->allmulti += inc;

5196

if (dev->allmulti == 0) {

5197

if (dev->allmulti == 0) {

5197

/*

5198

/*

5198

* Avoid overflow.

5199

* Avoid overflow.

5199

* If inc causes overflow, untouch allmulti and return error.

5200

* If inc causes overflow, untouch allmulti and return error.

5200

*/

5201

*/

5201

if (inc < 0)

5202

if (inc < 0)

5202

dev->flags &= ~IFF_ALLMULTI;

5203

dev->flags &= ~IFF_ALLMULTI;

5203

else {

5204

else {

5204

dev->allmulti -= inc;

5205

dev->allmulti -= inc;

5205

pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",

5206

pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",

5206

dev->name);

5207

dev->name);

5207

return -EOVERFLOW;

5208

return -EOVERFLOW;

5208

}

5209

}

5209

}

5210

}

5210

if (dev->flags ^ old_flags) {

5211

if (dev->flags ^ old_flags) {

5211

dev_change_rx_flags(dev, IFF_ALLMULTI);

5212

dev_change_rx_flags(dev, IFF_ALLMULTI);

5212

dev_set_rx_mode(dev);

5213

dev_set_rx_mode(dev);

5213

if (notify)

5214

if (notify)

5214

__dev_notify_flags(dev, old_flags,

5215

__dev_notify_flags(dev, old_flags,

5215

dev->gflags ^ old_gflags);

5216

dev->gflags ^ old_gflags);

5216

}

5217

}

5217

return 0;

5218

return 0;

5218

}

5219

}

5219

5220

/**

5221

/**

5221

* dev_set_allmulti - update allmulti count on a device

5222

* dev_set_allmulti - update allmulti count on a device

5222

* @dev: device

5223

* @dev: device

5223

* @inc: modifier

5224

* @inc: modifier

5224

*

5225

*

5225

* Add or remove reception of all multicast frames to a device. While the

5226

* Add or remove reception of all multicast frames to a device. While the

5226

* count in the device remains above zero the interface remains listening

5227

* count in the device remains above zero the interface remains listening

5227

* to all interfaces. Once it hits zero the device reverts back to normal

5228

* to all interfaces. Once it hits zero the device reverts back to normal

5228

* filtering operation. A negative @inc value is used to drop the counter

5229

* filtering operation. A negative @inc value is used to drop the counter

5229

* when releasing a resource needing all multicasts.

5230

* when releasing a resource needing all multicasts.

5230

* Return 0 if successful or a negative errno code on error.

5231

* Return 0 if successful or a negative errno code on error.

5231

*/

5232

*/

5232

5233

int dev_set_allmulti(struct net_device *dev, int inc)

5234

int dev_set_allmulti(struct net_device *dev, int inc)

5234

{

5235

{

5235

return __dev_set_allmulti(dev, inc, true);

5236

return __dev_set_allmulti(dev, inc, true);

5236

}

5237

}

5237

EXPORT_SYMBOL(dev_set_allmulti);

5238

EXPORT_SYMBOL(dev_set_allmulti);

5238

5239

/*

5240

/*

5240

* Upload unicast and multicast address lists to device and

5241

* Upload unicast and multicast address lists to device and

5241

* configure RX filtering. When the device doesn't support unicast

5242

* configure RX filtering. When the device doesn't support unicast

5242

* filtering it is put in promiscuous mode while unicast addresses

5243

* filtering it is put in promiscuous mode while unicast addresses

5243

* are present.

5244

* are present.

5244

*/

5245

*/

5245

void __dev_set_rx_mode(struct net_device *dev)

5246

void __dev_set_rx_mode(struct net_device *dev)

5246

{

5247

{

5247

const struct net_device_ops *ops = dev->netdev_ops;

5248

const struct net_device_ops *ops = dev->netdev_ops;

5248

5249

/* dev_open will call this function so the list will stay sane. */

5250

/* dev_open will call this function so the list will stay sane. */

5250

if (!(dev->flags&IFF_UP))

5251

if (!(dev->flags&IFF_UP))

5251

return;

5252

return;

5252

5253

if (!netif_device_present(dev))

5254

if (!netif_device_present(dev))

5254

return;

5255

return;

5255

5256

if (!(dev->priv_flags & IFF_UNICAST_FLT)) {

5257

if (!(dev->priv_flags & IFF_UNICAST_FLT)) {

5257

/* Unicast addresses changes may only happen under the rtnl,

5258

/* Unicast addresses changes may only happen under the rtnl,

5258

* therefore calling __dev_set_promiscuity here is safe.

5259

* therefore calling __dev_set_promiscuity here is safe.

5259

*/

5260

*/

5260

if (!netdev_uc_empty(dev) && !dev->uc_promisc) {

5261

if (!netdev_uc_empty(dev) && !dev->uc_promisc) {

5261

__dev_set_promiscuity(dev, 1, false);

5262

__dev_set_promiscuity(dev, 1, false);

5262

dev->uc_promisc = true;

5263

dev->uc_promisc = true;

5263

} else if (netdev_uc_empty(dev) && dev->uc_promisc) {

5264

} else if (netdev_uc_empty(dev) && dev->uc_promisc) {

5264

__dev_set_promiscuity(dev, -1, false);

5265

__dev_set_promiscuity(dev, -1, false);

5265

dev->uc_promisc = false;

5266

dev->uc_promisc = false;

5266

}

5267

}

5267

}

5268

}

5268

5269

if (ops->ndo_set_rx_mode)

5270

if (ops->ndo_set_rx_mode)

5270

ops->ndo_set_rx_mode(dev);

5271

ops->ndo_set_rx_mode(dev);

5271

}

5272

}

5272

5273

void dev_set_rx_mode(struct net_device *dev)

5274

void dev_set_rx_mode(struct net_device *dev)

5274

{

5275

{

5275

netif_addr_lock_bh(dev);

5276

netif_addr_lock_bh(dev);

5276

__dev_set_rx_mode(dev);

5277

__dev_set_rx_mode(dev);

5277

netif_addr_unlock_bh(dev);

5278

netif_addr_unlock_bh(dev);

5278

}

5279

}

5279

5280

/**

5281

/**

5281

* dev_get_flags - get flags reported to userspace

5282

* dev_get_flags - get flags reported to userspace

5282

* @dev: device

5283

* @dev: device

5283

*

5284

*

5284

* Get the combination of flag bits exported through APIs to userspace.

5285

* Get the combination of flag bits exported through APIs to userspace.

5285

*/

5286

*/

5286

unsigned int dev_get_flags(const struct net_device *dev)

5287

unsigned int dev_get_flags(const struct net_device *dev)

5287

{

5288

{

5288

unsigned int flags;

5289

unsigned int flags;

5289

5290

flags = (dev->flags & ~(IFF_PROMISC |

5291

flags = (dev->flags & ~(IFF_PROMISC |

5291

IFF_ALLMULTI |

5292

IFF_ALLMULTI |

5292

IFF_RUNNING |

5293

IFF_RUNNING |

5293

IFF_LOWER_UP |

5294

IFF_LOWER_UP |

5294

IFF_DORMANT)) |

5295

IFF_DORMANT)) |

5295

(dev->gflags & (IFF_PROMISC |

5296

(dev->gflags & (IFF_PROMISC |

5296

IFF_ALLMULTI));

5297

IFF_ALLMULTI));

5297

5298

if (netif_running(dev)) {

5299

if (netif_running(dev)) {

5299

if (netif_oper_up(dev))

5300

if (netif_oper_up(dev))

5300

flags |= IFF_RUNNING;

5301

flags |= IFF_RUNNING;

5301

if (netif_carrier_ok(dev))

5302

if (netif_carrier_ok(dev))

5302

flags |= IFF_LOWER_UP;

5303

flags |= IFF_LOWER_UP;

5303

if (netif_dormant(dev))

5304

if (netif_dormant(dev))

5304

flags |= IFF_DORMANT;

5305

flags |= IFF_DORMANT;

5305

}

5306

}

5306

5307

return flags;

5308

return flags;

5308

}

5309

}

5309

EXPORT_SYMBOL(dev_get_flags);

5310

EXPORT_SYMBOL(dev_get_flags);

5310

5311

int __dev_change_flags(struct net_device *dev, unsigned int flags)

5312

int __dev_change_flags(struct net_device *dev, unsigned int flags)

5312

{

5313

{

5313

unsigned int old_flags = dev->flags;

5314

unsigned int old_flags = dev->flags;

5314

int ret;

5315

int ret;

5315

5316

ASSERT_RTNL();

5317

ASSERT_RTNL();

5317

5318

/*

5319

/*

5319

* Set the flags on our device.

5320

* Set the flags on our device.

5320

*/

5321

*/

5321

5322

dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |

5323

dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |

5323

IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |

5324

IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |

5324

IFF_AUTOMEDIA)) |

5325

IFF_AUTOMEDIA)) |

5325

(dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |

5326

(dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |

5326

IFF_ALLMULTI));

5327

IFF_ALLMULTI));

5327

5328

/*

5329

/*

5329

* Load in the correct multicast list now the flags have changed.

5330

* Load in the correct multicast list now the flags have changed.

5330

*/

5331

*/

5331

5332

if ((old_flags ^ flags) & IFF_MULTICAST)

5333

if ((old_flags ^ flags) & IFF_MULTICAST)

5333

dev_change_rx_flags(dev, IFF_MULTICAST);

5334

dev_change_rx_flags(dev, IFF_MULTICAST);

5334

5335

dev_set_rx_mode(dev);

5336

dev_set_rx_mode(dev);

5336

5337

/*

5338

/*

5338

* Have we downed the interface. We handle IFF_UP ourselves

5339

* Have we downed the interface. We handle IFF_UP ourselves

5339

* according to user attempts to set it, rather than blindly

5340

* according to user attempts to set it, rather than blindly

5340

* setting it.

5341

* setting it.

5341

*/

5342

*/

5342

5343

ret = 0;

5344

ret = 0;

5344

if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */

5345

if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */

5345

ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);

5346

ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);

5346

5347

if (!ret)

5348

if (!ret)

5348

dev_set_rx_mode(dev);

5349

dev_set_rx_mode(dev);

5349

}

5350

}

5350

5351

if ((flags ^ dev->gflags) & IFF_PROMISC) {

5352

if ((flags ^ dev->gflags) & IFF_PROMISC) {

5352

int inc = (flags & IFF_PROMISC) ? 1 : -1;

5353

int inc = (flags & IFF_PROMISC) ? 1 : -1;

5353

unsigned int old_flags = dev->flags;

5354

unsigned int old_flags = dev->flags;

5354

5355

dev->gflags ^= IFF_PROMISC;

5356

dev->gflags ^= IFF_PROMISC;

5356

5357

if (__dev_set_promiscuity(dev, inc, false) >= 0)

5358

if (__dev_set_promiscuity(dev, inc, false) >= 0)

5358

if (dev->flags != old_flags)

5359

if (dev->flags != old_flags)

5359

dev_set_rx_mode(dev);

5360

dev_set_rx_mode(dev);

5360

}

5361

}

5361

5362

/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI

5363

/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI

5363

is important. Some (broken) drivers set IFF_PROMISC, when

5364

is important. Some (broken) drivers set IFF_PROMISC, when

5364

IFF_ALLMULTI is requested not asking us and not reporting.

5365

IFF_ALLMULTI is requested not asking us and not reporting.

5365

*/

5366

*/

5366

if ((flags ^ dev->gflags) & IFF_ALLMULTI) {

5367

if ((flags ^ dev->gflags) & IFF_ALLMULTI) {

5367

int inc = (flags & IFF_ALLMULTI) ? 1 : -1;

5368

int inc = (flags & IFF_ALLMULTI) ? 1 : -1;

5368

5369

dev->gflags ^= IFF_ALLMULTI;

5370

dev->gflags ^= IFF_ALLMULTI;

5370

__dev_set_allmulti(dev, inc, false);

5371

__dev_set_allmulti(dev, inc, false);

5371

}

5372

}

5372

5373

return ret;

5374

return ret;

5374

}

5375

}

5375

5376

void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,

5377

void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,

5377

unsigned int gchanges)

5378

unsigned int gchanges)

5378

{

5379

{

5379

unsigned int changes = dev->flags ^ old_flags;

5380

unsigned int changes = dev->flags ^ old_flags;

5380

5381

if (gchanges)

5382

if (gchanges)

5382

rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);

5383

rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);

5383

5384

if (changes & IFF_UP) {

5385

if (changes & IFF_UP) {

5385

if (dev->flags & IFF_UP)

5386

if (dev->flags & IFF_UP)

5386

call_netdevice_notifiers(NETDEV_UP, dev);

5387

call_netdevice_notifiers(NETDEV_UP, dev);

5387

else

5388

else

5388

call_netdevice_notifiers(NETDEV_DOWN, dev);

5389

call_netdevice_notifiers(NETDEV_DOWN, dev);

5389

}

5390

}

5390

5391

if (dev->flags & IFF_UP &&

5392

if (dev->flags & IFF_UP &&

5392

(changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {

5393

(changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {

5393

struct netdev_notifier_change_info change_info;

5394

struct netdev_notifier_change_info change_info;

5394

5395

change_info.flags_changed = changes;

5396

change_info.flags_changed = changes;

5396

call_netdevice_notifiers_info(NETDEV_CHANGE, dev,

5397

call_netdevice_notifiers_info(NETDEV_CHANGE, dev,

5397

&change_info.info);

5398

&change_info.info);

5398

}

5399

}

5399

}

5400

}

5400

5401

/**

5402

/**

5402

* dev_change_flags - change device settings

5403

* dev_change_flags - change device settings

5403

* @dev: device

5404

* @dev: device

5404

* @flags: device state flags

5405

* @flags: device state flags

5405

*

5406

*

5406

* Change settings on device based state flags. The flags are

5407

* Change settings on device based state flags. The flags are

5407

* in the userspace exported format.

5408

* in the userspace exported format.

5408

*/

5409

*/

5409

int dev_change_flags(struct net_device *dev, unsigned int flags)

5410

int dev_change_flags(struct net_device *dev, unsigned int flags)

5410

{

5411

{

5411

int ret;

5412

int ret;

5412

unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;

5413

unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;

5413

5414

ret = __dev_change_flags(dev, flags);

5415

ret = __dev_change_flags(dev, flags);

5415

if (ret < 0)

5416

if (ret < 0)

5416

return ret;

5417

return ret;

5417

5418

changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);

5419

changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);

5419

__dev_notify_flags(dev, old_flags, changes);

5420

__dev_notify_flags(dev, old_flags, changes);

5420

return ret;

5421

return ret;

5421

}

5422

}

5422

EXPORT_SYMBOL(dev_change_flags);

5423

EXPORT_SYMBOL(dev_change_flags);

5423

5424

static int __dev_set_mtu(struct net_device *dev, int new_mtu)

5425

static int __dev_set_mtu(struct net_device *dev, int new_mtu)

5425

{

5426

{

5426

const struct net_device_ops *ops = dev->netdev_ops;

5427

const struct net_device_ops *ops = dev->netdev_ops;

5427

5428

if (ops->ndo_change_mtu)

5429

if (ops->ndo_change_mtu)

5429

return ops->ndo_change_mtu(dev, new_mtu);

5430

return ops->ndo_change_mtu(dev, new_mtu);

5430

5431

dev->mtu = new_mtu;

5432

dev->mtu = new_mtu;

5432

return 0;

5433

return 0;

5433

}

5434

}

5434

5435

/**

5436

/**

5436

* dev_set_mtu - Change maximum transfer unit

5437

* dev_set_mtu - Change maximum transfer unit

5437

* @dev: device

5438

* @dev: device

5438

* @new_mtu: new transfer unit

5439

* @new_mtu: new transfer unit

5439

*

5440

*

5440

* Change the maximum transfer size of the network device.

5441

* Change the maximum transfer size of the network device.

5441

*/

5442

*/

5442

int dev_set_mtu(struct net_device *dev, int new_mtu)

5443

int dev_set_mtu(struct net_device *dev, int new_mtu)

5443

{

5444

{

5444

int err, orig_mtu;

5445

int err, orig_mtu;

5445

5446

if (new_mtu == dev->mtu)

5447

if (new_mtu == dev->mtu)

5447

return 0;

5448

return 0;

5448

5449

/* MTU must be positive. */

5450

/* MTU must be positive. */

5450

if (new_mtu < 0)

5451

if (new_mtu < 0)

5451

return -EINVAL;

5452

return -EINVAL;

5452

5453

if (!netif_device_present(dev))

5454

if (!netif_device_present(dev))

5454

return -ENODEV;

5455

return -ENODEV;

5455

5456

err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);

5457

err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);

5457

err = notifier_to_errno(err);

5458

err = notifier_to_errno(err);

5458

if (err)

5459

if (err)

5459

return err;

5460

return err;

5460

5461

orig_mtu = dev->mtu;

5462

orig_mtu = dev->mtu;

5462

err = __dev_set_mtu(dev, new_mtu);

5463

err = __dev_set_mtu(dev, new_mtu);

5463

5464

if (!err) {

5465

if (!err) {

5465

err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);

5466

err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);

5466

err = notifier_to_errno(err);

5467

err = notifier_to_errno(err);

5467

if (err) {

5468

if (err) {

5468

/* setting mtu back and notifying everyone again,

5469

/* setting mtu back and notifying everyone again,

5469

* so that they have a chance to revert changes.

5470

* so that they have a chance to revert changes.

5470

*/

5471

*/

5471

__dev_set_mtu(dev, orig_mtu);

5472

__dev_set_mtu(dev, orig_mtu);

5472

call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);

5473

call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);

5473

}

5474

}

5474

}

5475

}

5475

return err;

5476

return err;

5476

}

5477

}

5477

EXPORT_SYMBOL(dev_set_mtu);

5478

EXPORT_SYMBOL(dev_set_mtu);

5478

5479

/**

5480

/**

5480

* dev_set_group - Change group this device belongs to

5481

* dev_set_group - Change group this device belongs to

5481

* @dev: device

5482

* @dev: device

5482

* @new_group: group this device should belong to

5483

* @new_group: group this device should belong to

5483

*/

5484

*/

5484

void dev_set_group(struct net_device *dev, int new_group)

5485

void dev_set_group(struct net_device *dev, int new_group)

5485

{

5486

{

5486

dev->group = new_group;

5487

dev->group = new_group;

5487

}

5488

}

5488

EXPORT_SYMBOL(dev_set_group);

5489

EXPORT_SYMBOL(dev_set_group);

5489

5490

/**

5491

/**

5491

* dev_set_mac_address - Change Media Access Control Address

5492

* dev_set_mac_address - Change Media Access Control Address

5492

* @dev: device

5493

* @dev: device

5493

* @sa: new address

5494

* @sa: new address

5494

*

5495

*

5495

* Change the hardware (MAC) address of the device

5496

* Change the hardware (MAC) address of the device

5496

*/

5497

*/

5497

int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)

5498

int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)

5498

{

5499

{

5499

const struct net_device_ops *ops = dev->netdev_ops;

5500

const struct net_device_ops *ops = dev->netdev_ops;

5500

int err;

5501

int err;

5501

5502

if (!ops->ndo_set_mac_address)

5503

if (!ops->ndo_set_mac_address)

5503

return -EOPNOTSUPP;

5504

return -EOPNOTSUPP;

5504

if (sa->sa_family != dev->type)

5505

if (sa->sa_family != dev->type)

5505

return -EINVAL;

5506

return -EINVAL;

5506

if (!netif_device_present(dev))

5507

if (!netif_device_present(dev))

5507

return -ENODEV;

5508

return -ENODEV;

5508

err = ops->ndo_set_mac_address(dev, sa);

5509

err = ops->ndo_set_mac_address(dev, sa);

5509

if (err)

5510

if (err)

5510

return err;

5511

return err;

5511

dev->addr_assign_type = NET_ADDR_SET;

5512

dev->addr_assign_type = NET_ADDR_SET;

5512

call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);

5513

call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);

5513

add_device_randomness(dev->dev_addr, dev->addr_len);

5514

add_device_randomness(dev->dev_addr, dev->addr_len);

5514

return 0;

5515

return 0;

5515

}

5516

}

5516

EXPORT_SYMBOL(dev_set_mac_address);

5517

EXPORT_SYMBOL(dev_set_mac_address);

5517

5518

/**

5519

/**

5519

* dev_change_carrier - Change device carrier

5520

* dev_change_carrier - Change device carrier

5520

* @dev: device

5521

* @dev: device

5521

* @new_carrier: new value

5522

* @new_carrier: new value

5522

*

5523

*

5523

* Change device carrier

5524

* Change device carrier

5524

*/

5525

*/

5525

int dev_change_carrier(struct net_device *dev, bool new_carrier)

5526

int dev_change_carrier(struct net_device *dev, bool new_carrier)

5526

{

5527

{

5527

const struct net_device_ops *ops = dev->netdev_ops;

5528

const struct net_device_ops *ops = dev->netdev_ops;

5528

5529

if (!ops->ndo_change_carrier)

5530

if (!ops->ndo_change_carrier)

5530

return -EOPNOTSUPP;

5531

return -EOPNOTSUPP;

5531

if (!netif_device_present(dev))

5532

if (!netif_device_present(dev))

5532

return -ENODEV;

5533

return -ENODEV;

5533

return ops->ndo_change_carrier(dev, new_carrier);

5534

return ops->ndo_change_carrier(dev, new_carrier);

5534

}

5535

}

5535

EXPORT_SYMBOL(dev_change_carrier);

5536

EXPORT_SYMBOL(dev_change_carrier);

5536

5537

/**

5538

/**

5538

* dev_get_phys_port_id - Get device physical port ID

5539

* dev_get_phys_port_id - Get device physical port ID

5539

* @dev: device

5540

* @dev: device

5540

* @ppid: port ID

5541

* @ppid: port ID

5541

*

5542

*

5542

* Get device physical port ID

5543

* Get device physical port ID

5543

*/

5544

*/

5544

int dev_get_phys_port_id(struct net_device *dev,

5545

int dev_get_phys_port_id(struct net_device *dev,

5545

struct netdev_phys_port_id *ppid)

5546

struct netdev_phys_port_id *ppid)

5546

{

5547

{

5547

const struct net_device_ops *ops = dev->netdev_ops;

5548

const struct net_device_ops *ops = dev->netdev_ops;

5548

5549

if (!ops->ndo_get_phys_port_id)

5550

if (!ops->ndo_get_phys_port_id)

5550

return -EOPNOTSUPP;

5551

return -EOPNOTSUPP;

5551

return ops->ndo_get_phys_port_id(dev, ppid);

5552

return ops->ndo_get_phys_port_id(dev, ppid);

5552

}

5553

}

5553

EXPORT_SYMBOL(dev_get_phys_port_id);

5554

EXPORT_SYMBOL(dev_get_phys_port_id);

5554

5555

/**

5556

/**

5556

* dev_new_index - allocate an ifindex

5557

* dev_new_index - allocate an ifindex

5557

* @net: the applicable net namespace

5558

* @net: the applicable net namespace

5558

*

5559

*

5559

* Returns a suitable unique value for a new device interface

5560

* Returns a suitable unique value for a new device interface

5560

* number. The caller must hold the rtnl semaphore or the

5561

* number. The caller must hold the rtnl semaphore or the

5561

* dev_base_lock to be sure it remains unique.

5562

* dev_base_lock to be sure it remains unique.

5562

*/

5563

*/

5563

static int dev_new_index(struct net *net)

5564

static int dev_new_index(struct net *net)

5564

{

5565

{

5565

int ifindex = net->ifindex;

5566

int ifindex = net->ifindex;

5566

for (;;) {

5567

for (;;) {

5567

if (++ifindex <= 0)

5568

if (++ifindex <= 0)

5568

ifindex = 1;

5569

ifindex = 1;

5569

if (!__dev_get_by_index(net, ifindex))

5570

if (!__dev_get_by_index(net, ifindex))

5570

return net->ifindex = ifindex;

5571

return net->ifindex = ifindex;

5571

}

5572

}

5572

}

5573

}

5573

5574

/* Delayed registration/unregisteration */

5575

/* Delayed registration/unregisteration */

5575

static LIST_HEAD(net_todo_list);

5576

static LIST_HEAD(net_todo_list);

5576

DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);

5577

DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);

5577

5578

static void net_set_todo(struct net_device *dev)

5579

static void net_set_todo(struct net_device *dev)

5579

{

5580

{

5580

list_add_tail(&dev->todo_list, &net_todo_list);

5581

list_add_tail(&dev->todo_list, &net_todo_list);

5581

dev_net(dev)->dev_unreg_count++;

5582

dev_net(dev)->dev_unreg_count++;

5582

}

5583

}

5583

5584

static void rollback_registered_many(struct list_head *head)

5585

static void rollback_registered_many(struct list_head *head)

5585

{

5586

{

5586

struct net_device *dev, *tmp;

5587

struct net_device *dev, *tmp;

5587

LIST_HEAD(close_head);

5588

LIST_HEAD(close_head);

5588

5589

BUG_ON(dev_boot_phase);

5590

BUG_ON(dev_boot_phase);

5590

ASSERT_RTNL();

5591

ASSERT_RTNL();

5591

5592

list_for_each_entry_safe(dev, tmp, head, unreg_list) {

5593

list_for_each_entry_safe(dev, tmp, head, unreg_list) {

5593

/* Some devices call without registering

5594

/* Some devices call without registering

5594

* for initialization unwind. Remove those

5595

* for initialization unwind. Remove those

5595

* devices and proceed with the remaining.

5596

* devices and proceed with the remaining.

5596

*/

5597

*/

5597

if (dev->reg_state == NETREG_UNINITIALIZED) {

5598

if (dev->reg_state == NETREG_UNINITIALIZED) {

5598

pr_debug("unregister_netdevice: device %s/%p never was registered\n",

5599

pr_debug("unregister_netdevice: device %s/%p never was registered\n",

5599

dev->name, dev);

5600

dev->name, dev);

5600

5601

WARN_ON(1);

5602

WARN_ON(1);

5602

list_del(&dev->unreg_list);

5603

list_del(&dev->unreg_list);

5603

continue;

5604

continue;

5604

}

5605

}

5605

dev->dismantle = true;

5606

dev->dismantle = true;

5606

BUG_ON(dev->reg_state != NETREG_REGISTERED);

5607

BUG_ON(dev->reg_state != NETREG_REGISTERED);

5607

}

5608

}

5608

5609

/* If device is running, close it first. */

5610

/* If device is running, close it first. */

5610

list_for_each_entry(dev, head, unreg_list)

5611

list_for_each_entry(dev, head, unreg_list)

5611

list_add_tail(&dev->close_list, &close_head);

5612

list_add_tail(&dev->close_list, &close_head);

5612

dev_close_many(&close_head);

5613

dev_close_many(&close_head);

5613

5614

list_for_each_entry(dev, head, unreg_list) {

5615

list_for_each_entry(dev, head, unreg_list) {

5615

/* And unlink it from device chain. */

5616

/* And unlink it from device chain. */

5616

unlist_netdevice(dev);

5617

unlist_netdevice(dev);

5617

5618

dev->reg_state = NETREG_UNREGISTERING;

5619

dev->reg_state = NETREG_UNREGISTERING;

5619

}

5620

}

5620

5621

synchronize_net();

5622

synchronize_net();

5622

5623

list_for_each_entry(dev, head, unreg_list) {

5624

list_for_each_entry(dev, head, unreg_list) {

5624

/* Shutdown queueing discipline. */

5625

/* Shutdown queueing discipline. */

5625

dev_shutdown(dev);

5626

dev_shutdown(dev);

5626

5627

5628

/* Notify protocols, that we are about to destroy

5629

/* Notify protocols, that we are about to destroy

5629

this device. They should clean all the things.

5630

this device. They should clean all the things.

5630

*/

5631

*/

5631

call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

5632

call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

5632

5633

if (!dev->rtnl_link_ops ||

5634

if (!dev->rtnl_link_ops ||

5634

dev->rtnl_link_state == RTNL_LINK_INITIALIZED)

5635

dev->rtnl_link_state == RTNL_LINK_INITIALIZED)

5635

rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);

5636

rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);

5636

5637

/*

5638

/*

5638

* Flush the unicast and multicast chains

5639

* Flush the unicast and multicast chains

5639

*/

5640

*/

5640

dev_uc_flush(dev);

5641

dev_uc_flush(dev);

5641

dev_mc_flush(dev);

5642

dev_mc_flush(dev);

5642

5643

if (dev->netdev_ops->ndo_uninit)

5644

if (dev->netdev_ops->ndo_uninit)

5644

dev->netdev_ops->ndo_uninit(dev);

5645

dev->netdev_ops->ndo_uninit(dev);

5645

5646

/* Notifier chain MUST detach us all upper devices. */

5647

/* Notifier chain MUST detach us all upper devices. */

5647

WARN_ON(netdev_has_any_upper_dev(dev));

5648

WARN_ON(netdev_has_any_upper_dev(dev));

5648

5649

/* Remove entries from kobject tree */

5650

/* Remove entries from kobject tree */

5650

netdev_unregister_kobject(dev);

5651

netdev_unregister_kobject(dev);

5651

#ifdef CONFIG_XPS

5652

#ifdef CONFIG_XPS

5652

/* Remove XPS queueing entries */

5653

/* Remove XPS queueing entries */

5653

netif_reset_xps_queues_gt(dev, 0);

5654

netif_reset_xps_queues_gt(dev, 0);

5654

#endif

5655

#endif

5655

}

5656

}

5656

5657

synchronize_net();

5658

synchronize_net();

5658

5659

list_for_each_entry(dev, head, unreg_list)

5660

list_for_each_entry(dev, head, unreg_list)

5660

dev_put(dev);

5661

dev_put(dev);

5661

}

5662

}

5662

5663

static void rollback_registered(struct net_device *dev)

5664

static void rollback_registered(struct net_device *dev)

5664

{

5665

{

5665

LIST_HEAD(single);

5666

LIST_HEAD(single);

5666

5667

list_add(&dev->unreg_list, &single);

5668

list_add(&dev->unreg_list, &single);

5668

rollback_registered_many(&single);

5669

rollback_registered_many(&single);

5669

list_del(&single);

5670

list_del(&single);

5670

}

5671

}

5671

5672

static netdev_features_t netdev_fix_features(struct net_device *dev,

5673

static netdev_features_t netdev_fix_features(struct net_device *dev,

5673

netdev_features_t features)

5674

netdev_features_t features)

5674

{

5675

{

5675

/* Fix illegal checksum combinations */

5676

/* Fix illegal checksum combinations */

5676

if ((features & NETIF_F_HW_CSUM) &&

5677

if ((features & NETIF_F_HW_CSUM) &&

5677

(features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {

5678

(features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {

5678

netdev_warn(dev, "mixed HW and IP checksum settings.\n");

5679

netdev_warn(dev, "mixed HW and IP checksum settings.\n");

5679

features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);

5680

features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);

5680

}

5681

}

5681

5682

/* TSO requires that SG is present as well. */

5683

/* TSO requires that SG is present as well. */

5683

if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {

5684

if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {

5684

netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");

5685

netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");

5685

features &= ~NETIF_F_ALL_TSO;

5686

features &= ~NETIF_F_ALL_TSO;

5686

}

5687

}

5687

5688

if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&

5689

if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&

5689

!(features & NETIF_F_IP_CSUM)) {

5690

!(features & NETIF_F_IP_CSUM)) {

5690

netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");

5691

netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");

5691

features &= ~NETIF_F_TSO;

5692

features &= ~NETIF_F_TSO;

5692

features &= ~NETIF_F_TSO_ECN;

5693

features &= ~NETIF_F_TSO_ECN;

5693

}

5694

}

5694

5695

if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&

5696

if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&

5696

!(features & NETIF_F_IPV6_CSUM)) {

5697

!(features & NETIF_F_IPV6_CSUM)) {

5697

netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");

5698

netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");

5698

features &= ~NETIF_F_TSO6;

5699

features &= ~NETIF_F_TSO6;

5699

}

5700

}

5700

5701

/* TSO ECN requires that TSO is present as well. */

5702

/* TSO ECN requires that TSO is present as well. */

5702

if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)

5703

if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)

5703

features &= ~NETIF_F_TSO_ECN;

5704

features &= ~NETIF_F_TSO_ECN;

5704

5705

/* Software GSO depends on SG. */

5706

/* Software GSO depends on SG. */

5706

if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {

5707

if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {

5707

netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");

5708

netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");

5708

features &= ~NETIF_F_GSO;

5709

features &= ~NETIF_F_GSO;

5709

}

5710

}

5710

5711

/* UFO needs SG and checksumming */

5712

/* UFO needs SG and checksumming */

5712

if (features & NETIF_F_UFO) {

5713

if (features & NETIF_F_UFO) {

5713

/* maybe split UFO into V4 and V6? */

5714

/* maybe split UFO into V4 and V6? */

5714

if (!((features & NETIF_F_GEN_CSUM) ||

5715

if (!((features & NETIF_F_GEN_CSUM) ||

5715

(features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))

5716

(features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))

5716

== (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {

5717

== (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {

5717

netdev_dbg(dev,

5718

netdev_dbg(dev,

5718

"Dropping NETIF_F_UFO since no checksum offload features.\n");

5719

"Dropping NETIF_F_UFO since no checksum offload features.\n");

5719

features &= ~NETIF_F_UFO;

5720

features &= ~NETIF_F_UFO;

5720

}

5721

}

5721

5722

if (!(features & NETIF_F_SG)) {

5723

if (!(features & NETIF_F_SG)) {

5723

netdev_dbg(dev,

5724

netdev_dbg(dev,

5724

"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");

5725

"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");

5725

features &= ~NETIF_F_UFO;

5726

features &= ~NETIF_F_UFO;

5726

}

5727

}

5727

}

5728

}

5728

5729

return features;

5730

return features;

5730

}

5731

}

5731

5732

int __netdev_update_features(struct net_device *dev)

5733

int __netdev_update_features(struct net_device *dev)

5733

{

5734

{

5734

netdev_features_t features;

5735

netdev_features_t features;

5735

int err = 0;

5736

int err = 0;

5736

5737

ASSERT_RTNL();

5738

ASSERT_RTNL();

5738

5739

features = netdev_get_wanted_features(dev);

5740

features = netdev_get_wanted_features(dev);

5740

5741

if (dev->netdev_ops->ndo_fix_features)

5742

if (dev->netdev_ops->ndo_fix_features)

5742

features = dev->netdev_ops->ndo_fix_features(dev, features);

5743

features = dev->netdev_ops->ndo_fix_features(dev, features);

5743

5744

/* driver might be less strict about feature dependencies */

5745

/* driver might be less strict about feature dependencies */

5745

features = netdev_fix_features(dev, features);

5746

features = netdev_fix_features(dev, features);

5746

5747

if (dev->features == features)

5748

if (dev->features == features)

5748

return 0;

5749

return 0;

5749

5750

netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",

5751

netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",

5751

&dev->features, &features);

5752

&dev->features, &features);

5752

5753

if (dev->netdev_ops->ndo_set_features)

5754

if (dev->netdev_ops->ndo_set_features)

5754

err = dev->netdev_ops->ndo_set_features(dev, features);

5755

err = dev->netdev_ops->ndo_set_features(dev, features);

5755

5756

if (unlikely(err < 0)) {

5757

if (unlikely(err < 0)) {

5757

netdev_err(dev,

5758

netdev_err(dev,

5758

"set_features() failed (%d); wanted %pNF, left %pNF\n",

5759

"set_features() failed (%d); wanted %pNF, left %pNF\n",

5759

err, &features, &dev->features);

5760

err, &features, &dev->features);

5760

return -1;

5761

return -1;

5761

}

5762

}

5762

5763

if (!err)

5764

if (!err)

5764

dev->features = features;

5765

dev->features = features;

5765

5766

return 1;

5767

return 1;

5767

}

5768

}

5768

5769

/**

5770

/**

5770

* netdev_update_features - recalculate device features

5771

* netdev_update_features - recalculate device features

5771

* @dev: the device to check

5772

* @dev: the device to check

5772

*

5773

*

5773

* Recalculate dev->features set and send notifications if it

5774

* Recalculate dev->features set and send notifications if it

5774

* has changed. Should be called after driver or hardware dependent

5775

* has changed. Should be called after driver or hardware dependent

5775

* conditions might have changed that influence the features.

5776

* conditions might have changed that influence the features.

5776

*/

5777

*/

5777

void netdev_update_features(struct net_device *dev)

5778

void netdev_update_features(struct net_device *dev)

5778

{

5779

{

5779

if (__netdev_update_features(dev))

5780

if (__netdev_update_features(dev))

5780

netdev_features_change(dev);

5781

netdev_features_change(dev);

5781

}

5782

}

5782

EXPORT_SYMBOL(netdev_update_features);

5783

EXPORT_SYMBOL(netdev_update_features);

5783

5784

/**

5785

/**

5785

* netdev_change_features - recalculate device features

5786

* netdev_change_features - recalculate device features

5786

* @dev: the device to check

5787

* @dev: the device to check

5787

*

5788

*

5788

* Recalculate dev->features set and send notifications even

5789

* Recalculate dev->features set and send notifications even

5789

* if they have not changed. Should be called instead of

5790

* if they have not changed. Should be called instead of

5790

* netdev_update_features() if also dev->vlan_features might

5791

* netdev_update_features() if also dev->vlan_features might

5791

* have changed to allow the changes to be propagated to stacked

5792

* have changed to allow the changes to be propagated to stacked

5792

* VLAN devices.

5793

* VLAN devices.

5793

*/

5794

*/

5794

void netdev_change_features(struct net_device *dev)

5795

void netdev_change_features(struct net_device *dev)

5795

{

5796

{

5796

__netdev_update_features(dev);

5797

__netdev_update_features(dev);

5797

netdev_features_change(dev);

5798

netdev_features_change(dev);

5798

}

5799

}

5799

EXPORT_SYMBOL(netdev_change_features);

5800

EXPORT_SYMBOL(netdev_change_features);

5800

5801

/**

5802

/**

5802

* netif_stacked_transfer_operstate - transfer operstate

5803

* netif_stacked_transfer_operstate - transfer operstate

5803

* @rootdev: the root or lower level device to transfer state from

5804

* @rootdev: the root or lower level device to transfer state from

5804

* @dev: the device to transfer operstate to

5805

* @dev: the device to transfer operstate to

5805

*

5806

*

5806

* Transfer operational state from root to device. This is normally

5807

* Transfer operational state from root to device. This is normally

5807

* called when a stacking relationship exists between the root

5808

* called when a stacking relationship exists between the root

5808

* device and the device(a leaf device).

5809

* device and the device(a leaf device).

5809

*/

5810

*/

5810

void netif_stacked_transfer_operstate(const struct net_device *rootdev,

5811

void netif_stacked_transfer_operstate(const struct net_device *rootdev,

5811

struct net_device *dev)

5812

struct net_device *dev)

5812

{

5813

{

5813

if (rootdev->operstate == IF_OPER_DORMANT)

5814

if (rootdev->operstate == IF_OPER_DORMANT)

5814

netif_dormant_on(dev);

5815

netif_dormant_on(dev);

5815

else

5816

else

5816

netif_dormant_off(dev);

5817

netif_dormant_off(dev);

5817

5818

if (netif_carrier_ok(rootdev)) {

5819

if (netif_carrier_ok(rootdev)) {

5819

if (!netif_carrier_ok(dev))

5820

if (!netif_carrier_ok(dev))

5820

netif_carrier_on(dev);

5821

netif_carrier_on(dev);

5821

} else {

5822

} else {

5822

if (netif_carrier_ok(dev))

5823

if (netif_carrier_ok(dev))

5823

netif_carrier_off(dev);

5824

netif_carrier_off(dev);

5824

}

5825

}

5825

}

5826

}

5826

EXPORT_SYMBOL(netif_stacked_transfer_operstate);

5827

EXPORT_SYMBOL(netif_stacked_transfer_operstate);

5827

5828

#ifdef CONFIG_SYSFS

5829

#ifdef CONFIG_SYSFS

5829

static int netif_alloc_rx_queues(struct net_device *dev)

5830

static int netif_alloc_rx_queues(struct net_device *dev)

5830

{

5831

{

5831

unsigned int i, count = dev->num_rx_queues;

5832

unsigned int i, count = dev->num_rx_queues;

5832

struct netdev_rx_queue *rx;

5833

struct netdev_rx_queue *rx;

5833

5834

BUG_ON(count < 1);

5835

BUG_ON(count < 1);

5835

5836

rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);

5837

rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);

5837

if (!rx)

5838

if (!rx)

5838

return -ENOMEM;

5839

return -ENOMEM;

5839

5840

dev->_rx = rx;

5841

dev->_rx = rx;

5841

5842

for (i = 0; i < count; i++)

5843

for (i = 0; i < count; i++)

5843

rx[i].dev = dev;

5844

rx[i].dev = dev;

5844

return 0;

5845

return 0;

5845

}

5846

}

5846

#endif

5847

#endif

5847

5848

static void netdev_init_one_queue(struct net_device *dev,

5849

static void netdev_init_one_queue(struct net_device *dev,

5849

struct netdev_queue *queue, void *_unused)

5850

struct netdev_queue *queue, void *_unused)

5850

{

5851

{

5851

/* Initialize queue lock */

5852

/* Initialize queue lock */

5852

spin_lock_init(&queue->_xmit_lock);

5853

spin_lock_init(&queue->_xmit_lock);

5853

netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);

5854

netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);

5854

queue->xmit_lock_owner = -1;

5855

queue->xmit_lock_owner = -1;

5855

netdev_queue_numa_node_write(queue, NUMA_NO_NODE);

5856

netdev_queue_numa_node_write(queue, NUMA_NO_NODE);

5856

queue->dev = dev;

5857

queue->dev = dev;

5857

#ifdef CONFIG_BQL

5858

#ifdef CONFIG_BQL

5858

dql_init(&queue->dql, HZ);

5859

dql_init(&queue->dql, HZ);

5859

#endif

5860

#endif

5860

}

5861

}

5861

5862

static void netif_free_tx_queues(struct net_device *dev)

5863

static void netif_free_tx_queues(struct net_device *dev)

5863

{

5864

{

5864

if (is_vmalloc_addr(dev->_tx))

5865

if (is_vmalloc_addr(dev->_tx))

5865

vfree(dev->_tx);

5866

vfree(dev->_tx);

5866

else

5867

else

5867

kfree(dev->_tx);

5868

kfree(dev->_tx);

5868

}

5869

}

5869

5870

static int netif_alloc_netdev_queues(struct net_device *dev)

5871

static int netif_alloc_netdev_queues(struct net_device *dev)

5871

{

5872

{

5872

unsigned int count = dev->num_tx_queues;

5873

unsigned int count = dev->num_tx_queues;

5873

struct netdev_queue *tx;

5874

struct netdev_queue *tx;

5874

size_t sz = count * sizeof(*tx);

5875

size_t sz = count * sizeof(*tx);

5875

5876

BUG_ON(count < 1 || count > 0xffff);

5877

BUG_ON(count < 1 || count > 0xffff);

5877

5878

tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);

5879

tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);

5879

if (!tx) {

5880

if (!tx) {

5880

tx = vzalloc(sz);

5881

tx = vzalloc(sz);

5881

if (!tx)

5882

if (!tx)

5882

return -ENOMEM;

5883

return -ENOMEM;

5883

}

5884

}

5884

dev->_tx = tx;

5885

dev->_tx = tx;

5885

5886

netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);

5887

netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);

5887

spin_lock_init(&dev->tx_global_lock);

5888

spin_lock_init(&dev->tx_global_lock);

5888

5889

return 0;

5890

return 0;

5890

}

5891

}

5891

5892

/**

5893

/**

5893

* register_netdevice - register a network device

5894

* register_netdevice - register a network device

5894

* @dev: device to register

5895

* @dev: device to register

5895

*

5896

*

5896

* Take a completed network device structure and add it to the kernel

5897

* Take a completed network device structure and add it to the kernel

5897

* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier

5898

* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier

5898

* chain. 0 is returned on success. A negative errno code is returned

5899

* chain. 0 is returned on success. A negative errno code is returned

5899

* on a failure to set up the device, or if the name is a duplicate.

5900

* on a failure to set up the device, or if the name is a duplicate.

5900

*

5901

*

5901

* Callers must hold the rtnl semaphore. You may want

5902

* Callers must hold the rtnl semaphore. You may want

5902

* register_netdev() instead of this.

5903

* register_netdev() instead of this.

5903

*

5904

*

5904

* BUGS:

5905

* BUGS:

5905

* The locking appears insufficient to guarantee two parallel registers

5906

* The locking appears insufficient to guarantee two parallel registers

5906

* will not get the same name.

5907

* will not get the same name.

5907

*/

5908

*/

5908

5909

int register_netdevice(struct net_device *dev)

5910

int register_netdevice(struct net_device *dev)

5910

{

5911

{

5911

int ret;

5912

int ret;

5912

struct net *net = dev_net(dev);

5913

struct net *net = dev_net(dev);

5913

5914

BUG_ON(dev_boot_phase);

5915

BUG_ON(dev_boot_phase);

5915

ASSERT_RTNL();

5916

ASSERT_RTNL();

5916

5917

might_sleep();

5918

might_sleep();

5918

5919

/* When net_device's are persistent, this will be fatal. */

5920

/* When net_device's are persistent, this will be fatal. */

5920

BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);

5921

BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);

5921

BUG_ON(!net);

5922

BUG_ON(!net);

5922

5923

spin_lock_init(&dev->addr_list_lock);

5924

spin_lock_init(&dev->addr_list_lock);

5924

netdev_set_addr_lockdep_class(dev);

5925

netdev_set_addr_lockdep_class(dev);

5925

5926

dev->iflink = -1;

5927

dev->iflink = -1;

5927

5928

ret = dev_get_valid_name(net, dev, dev->name);

5929

ret = dev_get_valid_name(net, dev, dev->name);

5929

if (ret < 0)

5930

if (ret < 0)

5930

goto out;

5931

goto out;

5931

5932

/* Init, if this function is available */

5933

/* Init, if this function is available */

5933

if (dev->netdev_ops->ndo_init) {

5934

if (dev->netdev_ops->ndo_init) {

5934

ret = dev->netdev_ops->ndo_init(dev);

5935

ret = dev->netdev_ops->ndo_init(dev);

5935

if (ret) {

5936

if (ret) {

5936

if (ret > 0)

5937

if (ret > 0)

5937

ret = -EIO;

5938

ret = -EIO;

5938

goto out;

5939

goto out;

5939

}

5940

}

5940

}

5941

}

5941

5942

if (((dev->hw_features | dev->features) &

5943

if (((dev->hw_features | dev->features) &

5943

NETIF_F_HW_VLAN_CTAG_FILTER) &&

5944

NETIF_F_HW_VLAN_CTAG_FILTER) &&

5944

(!dev->netdev_ops->ndo_vlan_rx_add_vid ||

5945

(!dev->netdev_ops->ndo_vlan_rx_add_vid ||

5945

!dev->netdev_ops->ndo_vlan_rx_kill_vid)) {

5946

!dev->netdev_ops->ndo_vlan_rx_kill_vid)) {

5946

netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");

5947

netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");

5947

ret = -EINVAL;

5948

ret = -EINVAL;

5948

goto err_uninit;

5949

goto err_uninit;

5949

}

5950

}

5950

5951

ret = -EBUSY;

5952

ret = -EBUSY;

5952

if (!dev->ifindex)

5953

if (!dev->ifindex)

5953

dev->ifindex = dev_new_index(net);

5954

dev->ifindex = dev_new_index(net);

5954

else if (__dev_get_by_index(net, dev->ifindex))

5955

else if (__dev_get_by_index(net, dev->ifindex))

5955

goto err_uninit;

5956

goto err_uninit;

5956

5957

if (dev->iflink == -1)

5958

if (dev->iflink == -1)

5958

dev->iflink = dev->ifindex;

5959

dev->iflink = dev->ifindex;

5959

5960

/* Transfer changeable features to wanted_features and enable

5961

/* Transfer changeable features to wanted_features and enable

5961

* software offloads (GSO and GRO).

5962

* software offloads (GSO and GRO).

5962

*/

5963

*/

5963

dev->hw_features |= NETIF_F_SOFT_FEATURES;

5964

dev->hw_features |= NETIF_F_SOFT_FEATURES;

5964

dev->features |= NETIF_F_SOFT_FEATURES;

5965

dev->features |= NETIF_F_SOFT_FEATURES;

5965

dev->wanted_features = dev->features & dev->hw_features;

5966

dev->wanted_features = dev->features & dev->hw_features;

5966

5967

if (!(dev->flags & IFF_LOOPBACK)) {

5968

if (!(dev->flags & IFF_LOOPBACK)) {

5968

dev->hw_features |= NETIF_F_NOCACHE_COPY;

5969

dev->hw_features |= NETIF_F_NOCACHE_COPY;

5969

}

5970

}

5970

5971

/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.

5972

/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.

5972

*/

5973

*/

5973

dev->vlan_features |= NETIF_F_HIGHDMA;

5974

dev->vlan_features |= NETIF_F_HIGHDMA;

5974

5975

/* Make NETIF_F_SG inheritable to tunnel devices.

5976

/* Make NETIF_F_SG inheritable to tunnel devices.

5976

*/

5977

*/

5977

dev->hw_enc_features |= NETIF_F_SG;

5978

dev->hw_enc_features |= NETIF_F_SG;

5978

5979

/* Make NETIF_F_SG inheritable to MPLS.

5980

/* Make NETIF_F_SG inheritable to MPLS.

5980

*/

5981

*/

5981

dev->mpls_features |= NETIF_F_SG;

5982

dev->mpls_features |= NETIF_F_SG;

5982

5983

ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);

5984

ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);

5984

ret = notifier_to_errno(ret);

5985

ret = notifier_to_errno(ret);

5985

if (ret)

5986

if (ret)

5986

goto err_uninit;

5987

goto err_uninit;

5987

5988

ret = netdev_register_kobject(dev);

5989

ret = netdev_register_kobject(dev);

5989

if (ret)

5990

if (ret)

5990

goto err_uninit;

5991

goto err_uninit;

5991

dev->reg_state = NETREG_REGISTERED;

5992

dev->reg_state = NETREG_REGISTERED;

5992

5993

__netdev_update_features(dev);

5994

__netdev_update_features(dev);

5994

5995

/*

5996

/*

5996

* Default initial state at registry is that the

5997

* Default initial state at registry is that the

5997

* device is present.

5998

* device is present.

5998

*/

5999

*/

5999

6000

set_bit(__LINK_STATE_PRESENT, &dev->state);

6001

set_bit(__LINK_STATE_PRESENT, &dev->state);

6001

6002

linkwatch_init_dev(dev);

6003

linkwatch_init_dev(dev);

6003

6004

dev_init_scheduler(dev);

6005

dev_init_scheduler(dev);

6005

dev_hold(dev);

6006

dev_hold(dev);

6006

list_netdevice(dev);

6007

list_netdevice(dev);

6007

add_device_randomness(dev->dev_addr, dev->addr_len);

6008

add_device_randomness(dev->dev_addr, dev->addr_len);

6008

6009

/* If the device has permanent device address, driver should

6010

/* If the device has permanent device address, driver should

6010

* set dev_addr and also addr_assign_type should be set to

6011

* set dev_addr and also addr_assign_type should be set to

6011

* NET_ADDR_PERM (default value).

6012

* NET_ADDR_PERM (default value).

6012

*/

6013

*/

6013

if (dev->addr_assign_type == NET_ADDR_PERM)

6014

if (dev->addr_assign_type == NET_ADDR_PERM)

6014

memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);

6015

memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);

6015

6016

/* Notify protocols, that a new device appeared. */

6017

/* Notify protocols, that a new device appeared. */

6017

ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);

6018

ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);

6018

ret = notifier_to_errno(ret);

6019

ret = notifier_to_errno(ret);

6019

if (ret) {

6020

if (ret) {

6020

rollback_registered(dev);

6021

rollback_registered(dev);

6021

dev->reg_state = NETREG_UNREGISTERED;

6022

dev->reg_state = NETREG_UNREGISTERED;

6022

}

6023

}

6023

/*

6024

/*

6024

* Prevent userspace races by waiting until the network

6025

* Prevent userspace races by waiting until the network

6025

* device is fully setup before sending notifications.

6026

* device is fully setup before sending notifications.

6026

*/

6027

*/

6027

if (!dev->rtnl_link_ops ||

6028

if (!dev->rtnl_link_ops ||

6028

dev->rtnl_link_state == RTNL_LINK_INITIALIZED)

6029

dev->rtnl_link_state == RTNL_LINK_INITIALIZED)

6029

rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);

6030

rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);

6030

6031

out:

6032

out:

6032

return ret;

6033

return ret;

6033

6034

err_uninit:

6035

err_uninit:

6035

if (dev->netdev_ops->ndo_uninit)

6036

if (dev->netdev_ops->ndo_uninit)

6036

dev->netdev_ops->ndo_uninit(dev);

6037

dev->netdev_ops->ndo_uninit(dev);

6037

goto out;

6038

goto out;

6038

}

6039

}

6039

EXPORT_SYMBOL(register_netdevice);

6040

EXPORT_SYMBOL(register_netdevice);

6040

6041

/**

6042

/**

6042

* init_dummy_netdev - init a dummy network device for NAPI

6043

* init_dummy_netdev - init a dummy network device for NAPI

6043

* @dev: device to init

6044

* @dev: device to init

6044

*

6045

*

6045

* This takes a network device structure and initialize the minimum

6046

* This takes a network device structure and initialize the minimum

6046

* amount of fields so it can be used to schedule NAPI polls without

6047

* amount of fields so it can be used to schedule NAPI polls without

6047

* registering a full blown interface. This is to be used by drivers

6048

* registering a full blown interface. This is to be used by drivers

6048

* that need to tie several hardware interfaces to a single NAPI

6049

* that need to tie several hardware interfaces to a single NAPI

6049

* poll scheduler due to HW limitations.

6050

* poll scheduler due to HW limitations.

6050

*/

6051

*/

6051

int init_dummy_netdev(struct net_device *dev)

6052

int init_dummy_netdev(struct net_device *dev)

6052

{

6053

{

6053

/* Clear everything. Note we don't initialize spinlocks

6054

/* Clear everything. Note we don't initialize spinlocks

6054

* are they aren't supposed to be taken by any of the

6055

* are they aren't supposed to be taken by any of the

6055

* NAPI code and this dummy netdev is supposed to be

6056

* NAPI code and this dummy netdev is supposed to be

6056

* only ever used for NAPI polls

6057

* only ever used for NAPI polls

6057

*/

6058

*/

6058

memset(dev, 0, sizeof(struct net_device));

6059

memset(dev, 0, sizeof(struct net_device));

6059

6060

/* make sure we BUG if trying to hit standard

6061

/* make sure we BUG if trying to hit standard

6061

* register/unregister code path

6062

* register/unregister code path

6062

*/

6063

*/

6063

dev->reg_state = NETREG_DUMMY;

6064

dev->reg_state = NETREG_DUMMY;

6064

6065

/* NAPI wants this */

6066

/* NAPI wants this */

6066

INIT_LIST_HEAD(&dev->napi_list);

6067

INIT_LIST_HEAD(&dev->napi_list);

6067

6068

/* a dummy interface is started by default */

6069

/* a dummy interface is started by default */

6069

set_bit(__LINK_STATE_PRESENT, &dev->state);

6070

set_bit(__LINK_STATE_PRESENT, &dev->state);

6070

set_bit(__LINK_STATE_START, &dev->state);

6071

set_bit(__LINK_STATE_START, &dev->state);

6071

6072

/* Note : We dont allocate pcpu_refcnt for dummy devices,

6073

/* Note : We dont allocate pcpu_refcnt for dummy devices,

6073

* because users of this 'device' dont need to change

6074

* because users of this 'device' dont need to change

6074

* its refcount.

6075

* its refcount.

6075

*/

6076

*/

6076

6077

return 0;

6078

return 0;

6078

}

6079

}

6079

EXPORT_SYMBOL_GPL(init_dummy_netdev);

6080

EXPORT_SYMBOL_GPL(init_dummy_netdev);

6080

6081

6082

/**

6083

/**

6083

* register_netdev - register a network device

6084

* register_netdev - register a network device

6084

* @dev: device to register

6085

* @dev: device to register

6085

*

6086

*

6086

* Take a completed network device structure and add it to the kernel

6087

* Take a completed network device structure and add it to the kernel

6087

* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier

6088

* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier

6088

* chain. 0 is returned on success. A negative errno code is returned

6089

* chain. 0 is returned on success. A negative errno code is returned

6089

* on a failure to set up the device, or if the name is a duplicate.

6090

* on a failure to set up the device, or if the name is a duplicate.

6090

*

6091

*

6091

* This is a wrapper around register_netdevice that takes the rtnl semaphore

6092

* This is a wrapper around register_netdevice that takes the rtnl semaphore

6092

* and expands the device name if you passed a format string to

6093

* and expands the device name if you passed a format string to

6093

* alloc_netdev.

6094

* alloc_netdev.

6094

*/

6095

*/

6095

int register_netdev(struct net_device *dev)

6096

int register_netdev(struct net_device *dev)

6096

{

6097

{

6097

int err;

6098

int err;

6098

6099

rtnl_lock();

6100

rtnl_lock();

6100

err = register_netdevice(dev);

6101

err = register_netdevice(dev);

6101

rtnl_unlock();

6102

rtnl_unlock();

6102

return err;

6103

return err;

6103

}

6104

}

6104

EXPORT_SYMBOL(register_netdev);

6105

EXPORT_SYMBOL(register_netdev);

6105

6106

int netdev_refcnt_read(const struct net_device *dev)

6107

int netdev_refcnt_read(const struct net_device *dev)

6107

{

6108

{

6108

int i, refcnt = 0;

6109

int i, refcnt = 0;

6109

6110

for_each_possible_cpu(i)

6111

for_each_possible_cpu(i)

6111

refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);

6112

refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);

6112

return refcnt;

6113

return refcnt;

6113

}

6114

}

6114

EXPORT_SYMBOL(netdev_refcnt_read);

6115

EXPORT_SYMBOL(netdev_refcnt_read);

6115

6116

/**

6117

/**

6117

* netdev_wait_allrefs - wait until all references are gone.

6118

* netdev_wait_allrefs - wait until all references are gone.

6118

* @dev: target net_device

6119

* @dev: target net_device

6119

*

6120

*

6120

* This is called when unregistering network devices.

6121

* This is called when unregistering network devices.

6121

*

6122

*

6122

* Any protocol or device that holds a reference should register

6123

* Any protocol or device that holds a reference should register

6123

* for netdevice notification, and cleanup and put back the

6124

* for netdevice notification, and cleanup and put back the

6124

* reference if they receive an UNREGISTER event.

6125

* reference if they receive an UNREGISTER event.

6125

* We can get stuck here if buggy protocols don't correctly

6126

* We can get stuck here if buggy protocols don't correctly

6126

* call dev_put.

6127

* call dev_put.

6127

*/

6128

*/

6128

static void netdev_wait_allrefs(struct net_device *dev)

6129

static void netdev_wait_allrefs(struct net_device *dev)

6129

{

6130

{

6130

unsigned long rebroadcast_time, warning_time;

6131

unsigned long rebroadcast_time, warning_time;

6131

int refcnt;

6132

int refcnt;

6132

6133

linkwatch_forget_dev(dev);

6134

linkwatch_forget_dev(dev);

6134

6135

rebroadcast_time = warning_time = jiffies;

6136

rebroadcast_time = warning_time = jiffies;

6136

refcnt = netdev_refcnt_read(dev);

6137

refcnt = netdev_refcnt_read(dev);

6137

6138

while (refcnt != 0) {

6139

while (refcnt != 0) {

6139

if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {

6140

if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {

6140

rtnl_lock();

6141

rtnl_lock();

6141

6142

/* Rebroadcast unregister notification */

6143

/* Rebroadcast unregister notification */

6143

call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

6144

call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

6144

6145

__rtnl_unlock();

6146

__rtnl_unlock();

6146

rcu_barrier();

6147

rcu_barrier();

6147

rtnl_lock();

6148

rtnl_lock();

6148

6149

call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);

6150

call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);

6150

if (test_bit(__LINK_STATE_LINKWATCH_PENDING,

6151

if (test_bit(__LINK_STATE_LINKWATCH_PENDING,

6151

&dev->state)) {

6152

&dev->state)) {

6152

/* We must not have linkwatch events

6153

/* We must not have linkwatch events

6153

* pending on unregister. If this

6154

* pending on unregister. If this

6154

* happens, we simply run the queue

6155

* happens, we simply run the queue

6155

* unscheduled, resulting in a noop

6156

* unscheduled, resulting in a noop

6156

* for this device.

6157

* for this device.

6157

*/

6158

*/

6158

linkwatch_run_queue();

6159

linkwatch_run_queue();

6159

}

6160

}

6160

6161

__rtnl_unlock();

6162

__rtnl_unlock();

6162

6163

rebroadcast_time = jiffies;

6164

rebroadcast_time = jiffies;

6164

}

6165

}

6165

6166

msleep(250);

6167

msleep(250);

6167

6168

refcnt = netdev_refcnt_read(dev);

6169

refcnt = netdev_refcnt_read(dev);

6169

6170

if (time_after(jiffies, warning_time + 10 * HZ)) {

6171

if (time_after(jiffies, warning_time + 10 * HZ)) {

6171

pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",

6172

pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",

6172

dev->name, refcnt);

6173

dev->name, refcnt);

6173

warning_time = jiffies;

6174

warning_time = jiffies;

6174

}

6175

}

6175

}

6176

}

6176

}

6177

}

6177

6178

/* The sequence is:

6179

/* The sequence is:

6179

*

6180

*

6180

* rtnl_lock();

6181

* rtnl_lock();

6181

* ...

6182

* ...

6182

* register_netdevice(x1);

6183

* register_netdevice(x1);

6183

* register_netdevice(x2);

6184

* register_netdevice(x2);

6184

* ...

6185

* ...

6185

* unregister_netdevice(y1);

6186

* unregister_netdevice(y1);

6186

* unregister_netdevice(y2);

6187

* unregister_netdevice(y2);

6187

* ...

6188

* ...

6188

* rtnl_unlock();

6189

* rtnl_unlock();

6189

* free_netdev(y1);

6190

* free_netdev(y1);

6190

* free_netdev(y2);

6191

* free_netdev(y2);

6191

*

6192

*

6192

* We are invoked by rtnl_unlock().

6193

* We are invoked by rtnl_unlock().

6193

* This allows us to deal with problems:

6194

* This allows us to deal with problems:

6194

* 1) We can delete sysfs objects which invoke hotplug

6195

* 1) We can delete sysfs objects which invoke hotplug

6195

* without deadlocking with linkwatch via keventd.

6196

* without deadlocking with linkwatch via keventd.

6196

* 2) Since we run with the RTNL semaphore not held, we can sleep

6197

* 2) Since we run with the RTNL semaphore not held, we can sleep

6197

* safely in order to wait for the netdev refcnt to drop to zero.

6198

* safely in order to wait for the netdev refcnt to drop to zero.

6198

*

6199

*

6199

* We must not return until all unregister events added during

6200

* We must not return until all unregister events added during

6200

* the interval the lock was held have been completed.

6201

* the interval the lock was held have been completed.

6201

*/

6202

*/

6202

void netdev_run_todo(void)

6203

void netdev_run_todo(void)

6203

{

6204

{

6204

struct list_head list;

6205

struct list_head list;

6205

6206

/* Snapshot list, allow later requests */

6207

/* Snapshot list, allow later requests */

6207

list_replace_init(&net_todo_list, &list);

6208

list_replace_init(&net_todo_list, &list);

6208

6209

__rtnl_unlock();

6210

__rtnl_unlock();

6210

6211

6212

/* Wait for rcu callbacks to finish before next phase */

6213

/* Wait for rcu callbacks to finish before next phase */

6213

if (!list_empty(&list))

6214

if (!list_empty(&list))

6214

rcu_barrier();

6215

rcu_barrier();

6215

6216

while (!list_empty(&list)) {

6217

while (!list_empty(&list)) {

6217

struct net_device *dev

6218

struct net_device *dev

6218

= list_first_entry(&list, struct net_device, todo_list);

6219

= list_first_entry(&list, struct net_device, todo_list);

6219

list_del(&dev->todo_list);

6220

list_del(&dev->todo_list);

6220

6221

rtnl_lock();

6222

rtnl_lock();

6222

call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);

6223

call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);

6223

__rtnl_unlock();

6224

__rtnl_unlock();

6224

6225

if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {

6226

if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {

6226

pr_err("network todo '%s' but state %d\n",

6227

pr_err("network todo '%s' but state %d\n",

6227

dev->name, dev->reg_state);

6228

dev->name, dev->reg_state);

6228

dump_stack();

6229

dump_stack();

6229

continue;

6230

continue;

6230

}

6231

}

6231

6232

dev->reg_state = NETREG_UNREGISTERED;

6233

dev->reg_state = NETREG_UNREGISTERED;

6233

6234

on_each_cpu(flush_backlog, dev, 1);

6235

on_each_cpu(flush_backlog, dev, 1);

6235

6236

netdev_wait_allrefs(dev);

6237

netdev_wait_allrefs(dev);

6237

6238

/* paranoia */

6239

/* paranoia */

6239

BUG_ON(netdev_refcnt_read(dev));

6240

BUG_ON(netdev_refcnt_read(dev));

6240

WARN_ON(rcu_access_pointer(dev->ip_ptr));

6241

WARN_ON(rcu_access_pointer(dev->ip_ptr));

6241

WARN_ON(rcu_access_pointer(dev->ip6_ptr));

6242

WARN_ON(rcu_access_pointer(dev->ip6_ptr));

6242

WARN_ON(dev->dn_ptr);

6243

WARN_ON(dev->dn_ptr);

6243

6244

if (dev->destructor)

6245

if (dev->destructor)

6245

dev->destructor(dev);

6246

dev->destructor(dev);

6246

6247

/* Report a network device has been unregistered */

6248

/* Report a network device has been unregistered */

6248

rtnl_lock();

6249

rtnl_lock();

6249

dev_net(dev)->dev_unreg_count--;

6250

dev_net(dev)->dev_unreg_count--;

6250

__rtnl_unlock();

6251

__rtnl_unlock();

6251

wake_up(&netdev_unregistering_wq);

6252

wake_up(&netdev_unregistering_wq);

6252

6253

/* Free network device */

6254

/* Free network device */

6254

kobject_put(&dev->dev.kobj);

6255

kobject_put(&dev->dev.kobj);

6255

}

6256

}

6256

}

6257

}

6257

6258

/* Convert net_device_stats to rtnl_link_stats64. They have the same

6259

/* Convert net_device_stats to rtnl_link_stats64. They have the same

6259

* fields in the same order, with only the type differing.

6260

* fields in the same order, with only the type differing.

6260

*/

6261

*/

6261

void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,

6262

void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,

6262

const struct net_device_stats *netdev_stats)

6263

const struct net_device_stats *netdev_stats)

6263

{

6264

{

6264

#if BITS_PER_LONG == 64

6265

#if BITS_PER_LONG == 64

6265

BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));

6266

BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));

6266

memcpy(stats64, netdev_stats, sizeof(*stats64));

6267

memcpy(stats64, netdev_stats, sizeof(*stats64));

6267

#else

6268

#else

6268

size_t i, n = sizeof(*stats64) / sizeof(u64);

6269

size_t i, n = sizeof(*stats64) / sizeof(u64);

6269

const unsigned long *src = (const unsigned long *)netdev_stats;

6270

const unsigned long *src = (const unsigned long *)netdev_stats;

6270

u64 *dst = (u64 *)stats64;

6271

u64 *dst = (u64 *)stats64;

6271

6272

BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=

6273

BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=

6273

sizeof(*stats64) / sizeof(u64));

6274

sizeof(*stats64) / sizeof(u64));

6274

for (i = 0; i < n; i++)

6275

for (i = 0; i < n; i++)

6275

dst[i] = src[i];

6276

dst[i] = src[i];

6276

#endif

6277

#endif

6277

}

6278

}

6278

EXPORT_SYMBOL(netdev_stats_to_stats64);

6279

EXPORT_SYMBOL(netdev_stats_to_stats64);

6279

6280

/**

6281

/**

6281

* dev_get_stats - get network device statistics

6282

* dev_get_stats - get network device statistics

6282

* @dev: device to get statistics from

6283

* @dev: device to get statistics from

6283

* @storage: place to store stats

6284

* @storage: place to store stats

6284

*

6285

*

6285

* Get network statistics from device. Return @storage.

6286

* Get network statistics from device. Return @storage.

6286

* The device driver may provide its own method by setting

6287

* The device driver may provide its own method by setting

6287

* dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;

6288

* dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;

6288

* otherwise the internal statistics structure is used.

6289

* otherwise the internal statistics structure is used.

6289

*/

6290

*/

6290

struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,

6291

struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,

6291

struct rtnl_link_stats64 *storage)

6292

struct rtnl_link_stats64 *storage)

6292

{

6293

{

6293

const struct net_device_ops *ops = dev->netdev_ops;

6294

const struct net_device_ops *ops = dev->netdev_ops;

6294

6295

if (ops->ndo_get_stats64) {

6296

if (ops->ndo_get_stats64) {

6296

memset(storage, 0, sizeof(*storage));

6297

memset(storage, 0, sizeof(*storage));

6297

ops->ndo_get_stats64(dev, storage);

6298

ops->ndo_get_stats64(dev, storage);

6298

} else if (ops->ndo_get_stats) {

6299

} else if (ops->ndo_get_stats) {

6299

netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));

6300

netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));

6300

} else {

6301

} else {

6301

netdev_stats_to_stats64(storage, &dev->stats);

6302

netdev_stats_to_stats64(storage, &dev->stats);

6302

}

6303

}

6303

storage->rx_dropped += atomic_long_read(&dev->rx_dropped);

6304

storage->rx_dropped += atomic_long_read(&dev->rx_dropped);

6304

return storage;

6305

return storage;

6305

}

6306

}

6306

EXPORT_SYMBOL(dev_get_stats);

6307

EXPORT_SYMBOL(dev_get_stats);

6307

6308

struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)

6309

struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)

6309

{

6310

{

6310

struct netdev_queue *queue = dev_ingress_queue(dev);

6311

struct netdev_queue *queue = dev_ingress_queue(dev);

6311

6312

#ifdef CONFIG_NET_CLS_ACT

6313

#ifdef CONFIG_NET_CLS_ACT

6313

if (queue)

6314

if (queue)

6314

return queue;

6315

return queue;

6315

queue = kzalloc(sizeof(*queue), GFP_KERNEL);

6316

queue = kzalloc(sizeof(*queue), GFP_KERNEL);

6316

if (!queue)

6317

if (!queue)

6317

return NULL;

6318

return NULL;

6318

netdev_init_one_queue(dev, queue, NULL);

6319

netdev_init_one_queue(dev, queue, NULL);

6319

queue->qdisc = &noop_qdisc;

6320

queue->qdisc = &noop_qdisc;

6320

queue->qdisc_sleeping = &noop_qdisc;

6321

queue->qdisc_sleeping = &noop_qdisc;

6321

rcu_assign_pointer(dev->ingress_queue, queue);

6322

rcu_assign_pointer(dev->ingress_queue, queue);

6322

#endif

6323

#endif

6323

return queue;

6324

return queue;

6324

}

6325

}

6325

6326

static const struct ethtool_ops default_ethtool_ops;

6327

static const struct ethtool_ops default_ethtool_ops;

6327

6328

void netdev_set_default_ethtool_ops(struct net_device *dev,

6329

void netdev_set_default_ethtool_ops(struct net_device *dev,

6329

const struct ethtool_ops *ops)

6330

const struct ethtool_ops *ops)

6330

{

6331

{

6331

if (dev->ethtool_ops == &default_ethtool_ops)

6332

if (dev->ethtool_ops == &default_ethtool_ops)

6332

dev->ethtool_ops = ops;

6333

dev->ethtool_ops = ops;

6333

}

6334

}

6334

EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);

6335

EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);

6335

6336

void netdev_freemem(struct net_device *dev)

6337

void netdev_freemem(struct net_device *dev)

6337

{

6338

{

6338

char *addr = (char *)dev - dev->padded;

6339

char *addr = (char *)dev - dev->padded;

6339

6340

if (is_vmalloc_addr(addr))

6341

if (is_vmalloc_addr(addr))

6341

vfree(addr);

6342

vfree(addr);

6342

else

6343

else

6343

kfree(addr);

6344

kfree(addr);

6344

}

6345

}

6345

6346

/**

6347

/**

6347

* alloc_netdev_mqs - allocate network device

6348

* alloc_netdev_mqs - allocate network device

6348

* @sizeof_priv: size of private data to allocate space for

6349

* @sizeof_priv: size of private data to allocate space for

6349

* @name: device name format string

6350

* @name: device name format string

6350

* @setup: callback to initialize device

6351

* @setup: callback to initialize device

6351

* @txqs: the number of TX subqueues to allocate

6352

* @txqs: the number of TX subqueues to allocate

6352

* @rxqs: the number of RX subqueues to allocate

6353

* @rxqs: the number of RX subqueues to allocate

6353

*

6354

*

6354

* Allocates a struct net_device with private data area for driver use

6355

* Allocates a struct net_device with private data area for driver use

6355

* and performs basic initialization. Also allocates subqueue structs

6356

* and performs basic initialization. Also allocates subqueue structs

6356

* for each queue on the device.

6357

* for each queue on the device.

6357

*/

6358

*/

6358

struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,

6359

struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,

6359

void (*setup)(struct net_device *),

6360

void (*setup)(struct net_device *),

6360

unsigned int txqs, unsigned int rxqs)

6361

unsigned int txqs, unsigned int rxqs)

6361

{

6362

{

6362

struct net_device *dev;

6363

struct net_device *dev;

6363

size_t alloc_size;

6364

size_t alloc_size;

6364

struct net_device *p;

6365

struct net_device *p;

6365

6366

BUG_ON(strlen(name) >= sizeof(dev->name));

6367

BUG_ON(strlen(name) >= sizeof(dev->name));

6367

6368

if (txqs < 1) {

6369

if (txqs < 1) {

6369

pr_err("alloc_netdev: Unable to allocate device with zero queues\n");

6370

pr_err("alloc_netdev: Unable to allocate device with zero queues\n");

6370

return NULL;

6371

return NULL;

6371

}

6372

}

6372

6373

#ifdef CONFIG_SYSFS

6374

#ifdef CONFIG_SYSFS

6374

if (rxqs < 1) {

6375

if (rxqs < 1) {

6375

pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");

6376

pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");

6376

return NULL;

6377

return NULL;

6377

}

6378

}

6378

#endif

6379

#endif

6379

6380

alloc_size = sizeof(struct net_device);

6381

alloc_size = sizeof(struct net_device);

6381

if (sizeof_priv) {

6382

if (sizeof_priv) {

6382

/* ensure 32-byte alignment of private area */

6383

/* ensure 32-byte alignment of private area */

6383

alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);

6384

alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);

6384

alloc_size += sizeof_priv;

6385

alloc_size += sizeof_priv;

6385

}

6386

}

6386

/* ensure 32-byte alignment of whole construct */

6387

/* ensure 32-byte alignment of whole construct */

6387

alloc_size += NETDEV_ALIGN - 1;

6388

alloc_size += NETDEV_ALIGN - 1;

6388

6389

p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);

6390

p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);

6390

if (!p)

6391

if (!p)

6391

p = vzalloc(alloc_size);

6392

p = vzalloc(alloc_size);

6392

if (!p)

6393

if (!p)

6393

return NULL;

6394

return NULL;

6394

6395

dev = PTR_ALIGN(p, NETDEV_ALIGN);

6396

dev = PTR_ALIGN(p, NETDEV_ALIGN);

6396

dev->padded = (char *)dev - (char *)p;

6397

dev->padded = (char *)dev - (char *)p;

6397

6398

dev->pcpu_refcnt = alloc_percpu(int);

6399

dev->pcpu_refcnt = alloc_percpu(int);

6399

if (!dev->pcpu_refcnt)

6400

if (!dev->pcpu_refcnt)

6400

goto free_dev;

6401

goto free_dev;

6401

6402

if (dev_addr_init(dev))

6403

if (dev_addr_init(dev))

6403

goto free_pcpu;

6404

goto free_pcpu;

6404

6405

dev_mc_init(dev);

6406

dev_mc_init(dev);

6406

dev_uc_init(dev);

6407

dev_uc_init(dev);

6407

6408

dev_net_set(dev, &init_net);

6409

dev_net_set(dev, &init_net);

6409

6410

dev->gso_max_size = GSO_MAX_SIZE;

6411

dev->gso_max_size = GSO_MAX_SIZE;

6411

dev->gso_max_segs = GSO_MAX_SEGS;

6412

dev->gso_max_segs = GSO_MAX_SEGS;

6412

6413

INIT_LIST_HEAD(&dev->napi_list);

6414

INIT_LIST_HEAD(&dev->napi_list);

6414

INIT_LIST_HEAD(&dev->unreg_list);

6415

INIT_LIST_HEAD(&dev->unreg_list);

6415

INIT_LIST_HEAD(&dev->close_list);

6416

INIT_LIST_HEAD(&dev->close_list);

6416

INIT_LIST_HEAD(&dev->link_watch_list);

6417

INIT_LIST_HEAD(&dev->link_watch_list);

6417

INIT_LIST_HEAD(&dev->adj_list.upper);

6418

INIT_LIST_HEAD(&dev->adj_list.upper);

6418

INIT_LIST_HEAD(&dev->adj_list.lower);

6419

INIT_LIST_HEAD(&dev->adj_list.lower);

6419

INIT_LIST_HEAD(&dev->all_adj_list.upper);

6420

INIT_LIST_HEAD(&dev->all_adj_list.upper);

6420

INIT_LIST_HEAD(&dev->all_adj_list.lower);

6421

INIT_LIST_HEAD(&dev->all_adj_list.lower);

6421

dev->priv_flags = IFF_XMIT_DST_RELEASE;

6422

dev->priv_flags = IFF_XMIT_DST_RELEASE;

6422

setup(dev);

6423

setup(dev);

6423

6424

dev->num_tx_queues = txqs;

6425

dev->num_tx_queues = txqs;

6425

dev->real_num_tx_queues = txqs;

6426

dev->real_num_tx_queues = txqs;

6426

if (netif_alloc_netdev_queues(dev))

6427

if (netif_alloc_netdev_queues(dev))

6427

goto free_all;

6428

goto free_all;

6428

6429

#ifdef CONFIG_SYSFS

6430

#ifdef CONFIG_SYSFS

6430

dev->num_rx_queues = rxqs;

6431

dev->num_rx_queues = rxqs;

6431

dev->real_num_rx_queues = rxqs;

6432

dev->real_num_rx_queues = rxqs;

6432

if (netif_alloc_rx_queues(dev))

6433

if (netif_alloc_rx_queues(dev))

6433

goto free_all;

6434

goto free_all;

6434

#endif

6435

#endif

6435

6436

strcpy(dev->name, name);

6437

strcpy(dev->name, name);

6437

dev->group = INIT_NETDEV_GROUP;

6438

dev->group = INIT_NETDEV_GROUP;

6438

if (!dev->ethtool_ops)

6439

if (!dev->ethtool_ops)

6439

dev->ethtool_ops = &default_ethtool_ops;

6440

dev->ethtool_ops = &default_ethtool_ops;

6440

return dev;

6441

return dev;

6441

6442

free_all:

6443

free_all:

6443

free_netdev(dev);

6444

free_netdev(dev);

6444

return NULL;

6445

return NULL;

6445

6446

free_pcpu:

6447

free_pcpu:

6447

free_percpu(dev->pcpu_refcnt);

6448

free_percpu(dev->pcpu_refcnt);

6448

netif_free_tx_queues(dev);

6449

netif_free_tx_queues(dev);

6449

#ifdef CONFIG_SYSFS

6450

#ifdef CONFIG_SYSFS

6450

kfree(dev->_rx);

6451

kfree(dev->_rx);

6451

#endif

6452

#endif

6452

6453

free_dev:

6454

free_dev:

6454

netdev_freemem(dev);

6455

netdev_freemem(dev);

6455

return NULL;

6456

return NULL;

6456

}

6457

}

6457

EXPORT_SYMBOL(alloc_netdev_mqs);

6458

EXPORT_SYMBOL(alloc_netdev_mqs);

6458

6459

/**

6460

/**

6460

* free_netdev - free network device

6461

* free_netdev - free network device

6461

* @dev: device

6462

* @dev: device

6462

*

6463

*

6463

* This function does the last stage of destroying an allocated device

6464

* This function does the last stage of destroying an allocated device

6464

* interface. The reference to the device object is released.

6465

* interface. The reference to the device object is released.

6465

* If this is the last reference then it will be freed.

6466

* If this is the last reference then it will be freed.

6466

*/

6467

*/

6467

void free_netdev(struct net_device *dev)

6468

void free_netdev(struct net_device *dev)

6468

{

6469

{

6469

struct napi_struct *p, *n;

6470

struct napi_struct *p, *n;

6470

6471

release_net(dev_net(dev));

6472

release_net(dev_net(dev));

6472

6473

netif_free_tx_queues(dev);

6474

netif_free_tx_queues(dev);

6474

#ifdef CONFIG_SYSFS

6475

#ifdef CONFIG_SYSFS

6475

kfree(dev->_rx);

6476

kfree(dev->_rx);

6476

#endif

6477

#endif

6477

6478

kfree(rcu_dereference_protected(dev->ingress_queue, 1));

6479

kfree(rcu_dereference_protected(dev->ingress_queue, 1));

6479

6480

/* Flush device addresses */

6481

/* Flush device addresses */

6481

dev_addr_flush(dev);

6482

dev_addr_flush(dev);

6482

6483

list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)

6484

list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)

6484

netif_napi_del(p);

6485

netif_napi_del(p);

6485

6486

free_percpu(dev->pcpu_refcnt);

6487

free_percpu(dev->pcpu_refcnt);

6487

dev->pcpu_refcnt = NULL;

6488

dev->pcpu_refcnt = NULL;

6488

6489

/* Compatibility with error handling in drivers */

6490

/* Compatibility with error handling in drivers */

6490

if (dev->reg_state == NETREG_UNINITIALIZED) {

6491

if (dev->reg_state == NETREG_UNINITIALIZED) {

6491

netdev_freemem(dev);

6492

netdev_freemem(dev);

6492

return;

6493

return;

6493

}

6494

}

6494

6495

BUG_ON(dev->reg_state != NETREG_UNREGISTERED);

6496

BUG_ON(dev->reg_state != NETREG_UNREGISTERED);

6496

dev->reg_state = NETREG_RELEASED;

6497

dev->reg_state = NETREG_RELEASED;

6497

6498

/* will free via device release */

6499

/* will free via device release */

6499

put_device(&dev->dev);

6500

put_device(&dev->dev);

6500

}

6501

}

6501

EXPORT_SYMBOL(free_netdev);

6502

EXPORT_SYMBOL(free_netdev);

6502

6503

/**

6504

/**

6504

* synchronize_net - Synchronize with packet receive processing

6505

* synchronize_net - Synchronize with packet receive processing

6505

*

6506

*

6506

* Wait for packets currently being received to be done.

6507

* Wait for packets currently being received to be done.

6507

* Does not block later packets from starting.

6508

* Does not block later packets from starting.

6508

*/

6509

*/

6509

void synchronize_net(void)

6510

void synchronize_net(void)

6510

{

6511

{

6511

might_sleep();

6512

might_sleep();

6512

if (rtnl_is_locked())

6513

if (rtnl_is_locked())

6513

synchronize_rcu_expedited();

6514

synchronize_rcu_expedited();

6514

else

6515

else

6515

synchronize_rcu();

6516

synchronize_rcu();

6516

}

6517

}

6517

EXPORT_SYMBOL(synchronize_net);

6518

EXPORT_SYMBOL(synchronize_net);

6518

6519

/**

6520

/**

6520

* unregister_netdevice_queue - remove device from the kernel

6521

* unregister_netdevice_queue - remove device from the kernel

6521

* @dev: device

6522

* @dev: device

6522

* @head: list

6523

* @head: list

6523

*

6524

*

6524

* This function shuts down a device interface and removes it

6525

* This function shuts down a device interface and removes it

6525

* from the kernel tables.

6526

* from the kernel tables.

6526

* If head not NULL, device is queued to be unregistered later.

6527

* If head not NULL, device is queued to be unregistered later.

6527

*

6528

*

6528

* Callers must hold the rtnl semaphore. You may want

6529

* Callers must hold the rtnl semaphore. You may want

6529

* unregister_netdev() instead of this.

6530

* unregister_netdev() instead of this.

6530

*/

6531

*/

6531

6532

void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)

6533

void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)

6533

{

6534

{

6534

ASSERT_RTNL();

6535

ASSERT_RTNL();

6535

6536

if (head) {

6537

if (head) {

6537

list_move_tail(&dev->unreg_list, head);

6538

list_move_tail(&dev->unreg_list, head);

6538

} else {

6539

} else {

6539

rollback_registered(dev);

6540

rollback_registered(dev);

6540

/* Finish processing unregister after unlock */

6541

/* Finish processing unregister after unlock */

6541

net_set_todo(dev);

6542

net_set_todo(dev);

6542

}

6543

}

6543

}

6544

}

6544

EXPORT_SYMBOL(unregister_netdevice_queue);

6545

EXPORT_SYMBOL(unregister_netdevice_queue);

6545

6546

/**

6547

/**

6547

* unregister_netdevice_many - unregister many devices

6548

* unregister_netdevice_many - unregister many devices

6548

* @head: list of devices

6549

* @head: list of devices

6549

*/

6550

*/

6550

void unregister_netdevice_many(struct list_head *head)

6551

void unregister_netdevice_many(struct list_head *head)

6551

{

6552

{

6552

struct net_device *dev;

6553

struct net_device *dev;

6553

6554

if (!list_empty(head)) {

6555

if (!list_empty(head)) {

6555

rollback_registered_many(head);

6556

rollback_registered_many(head);

6556

list_for_each_entry(dev, head, unreg_list)

6557

list_for_each_entry(dev, head, unreg_list)

6557

net_set_todo(dev);

6558

net_set_todo(dev);

6558

}

6559

}

6559

}

6560

}

6560

EXPORT_SYMBOL(unregister_netdevice_many);

6561

EXPORT_SYMBOL(unregister_netdevice_many);

6561

6562

/**

6563

/**

6563

* unregister_netdev - remove device from the kernel

6564

* unregister_netdev - remove device from the kernel

6564

* @dev: device

6565

* @dev: device

6565

*

6566

*

6566

* This function shuts down a device interface and removes it

6567

* This function shuts down a device interface and removes it

6567

* from the kernel tables.

6568

* from the kernel tables.

6568

*

6569

*

6569

* This is just a wrapper for unregister_netdevice that takes

6570

* This is just a wrapper for unregister_netdevice that takes

6570

* the rtnl semaphore. In general you want to use this and not

6571

* the rtnl semaphore. In general you want to use this and not

6571

* unregister_netdevice.

6572

* unregister_netdevice.

6572

*/

6573

*/

6573

void unregister_netdev(struct net_device *dev)

6574

void unregister_netdev(struct net_device *dev)

6574

{

6575

{

6575

rtnl_lock();

6576

rtnl_lock();

6576

unregister_netdevice(dev);

6577

unregister_netdevice(dev);

6577

rtnl_unlock();

6578

rtnl_unlock();

6578

}

6579

}

6579

EXPORT_SYMBOL(unregister_netdev);

6580

EXPORT_SYMBOL(unregister_netdev);

6580

6581

/**

6582

/**

6582

* dev_change_net_namespace - move device to different nethost namespace

6583

* dev_change_net_namespace - move device to different nethost namespace

6583

* @dev: device

6584

* @dev: device

6584

* @net: network namespace

6585

* @net: network namespace

6585

* @pat: If not NULL name pattern to try if the current device name

6586

* @pat: If not NULL name pattern to try if the current device name

6586

* is already taken in the destination network namespace.

6587

* is already taken in the destination network namespace.

6587

*

6588

*

6588

* This function shuts down a device interface and moves it

6589

* This function shuts down a device interface and moves it

6589

* to a new network namespace. On success 0 is returned, on

6590

* to a new network namespace. On success 0 is returned, on

6590

* a failure a netagive errno code is returned.

6591

* a failure a netagive errno code is returned.

6591

*

6592

*

6592

* Callers must hold the rtnl semaphore.

6593

* Callers must hold the rtnl semaphore.

6593

*/

6594

*/

6594

6595

int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)

6596

int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)

6596

{

6597

{

6597

int err;

6598

int err;

6598

6599

ASSERT_RTNL();

6600

ASSERT_RTNL();

6600

6601

/* Don't allow namespace local devices to be moved. */

6602

/* Don't allow namespace local devices to be moved. */

6602

err = -EINVAL;

6603

err = -EINVAL;

6603

if (dev->features & NETIF_F_NETNS_LOCAL)

6604

if (dev->features & NETIF_F_NETNS_LOCAL)

6604

goto out;

6605

goto out;

6605

6606

/* Ensure the device has been registrered */

6607

/* Ensure the device has been registrered */

6607

if (dev->reg_state != NETREG_REGISTERED)

6608

if (dev->reg_state != NETREG_REGISTERED)

6608

goto out;

6609

goto out;

6609

6610

/* Get out if there is nothing todo */

6611

/* Get out if there is nothing todo */

6611

err = 0;

6612

err = 0;

6612

if (net_eq(dev_net(dev), net))

6613

if (net_eq(dev_net(dev), net))

6613

goto out;

6614

goto out;

6614

6615

/* Pick the destination device name, and ensure

6616

/* Pick the destination device name, and ensure

6616

* we can use it in the destination network namespace.

6617

* we can use it in the destination network namespace.

6617

*/

6618

*/

6618

err = -EEXIST;

6619

err = -EEXIST;

6619

if (__dev_get_by_name(net, dev->name)) {

6620

if (__dev_get_by_name(net, dev->name)) {

6620

/* We get here if we can't use the current device name */

6621

/* We get here if we can't use the current device name */

6621

if (!pat)

6622

if (!pat)

6622

goto out;

6623

goto out;

6623

if (dev_get_valid_name(net, dev, pat) < 0)

6624

if (dev_get_valid_name(net, dev, pat) < 0)

6624

goto out;

6625

goto out;

6625

}

6626

}

6626

6627

/*

6628

/*

6628

* And now a mini version of register_netdevice unregister_netdevice.

6629

* And now a mini version of register_netdevice unregister_netdevice.

6629

*/

6630

*/

6630

6631

/* If device is running close it first. */

6632

/* If device is running close it first. */

6632

dev_close(dev);

6633

dev_close(dev);

6633

6634

/* And unlink it from device chain */

6635

/* And unlink it from device chain */

6635

err = -ENODEV;

6636

err = -ENODEV;

6636

unlist_netdevice(dev);

6637

unlist_netdevice(dev);

6637

6638

synchronize_net();

6639

synchronize_net();

6639

6640

/* Shutdown queueing discipline. */

6641

/* Shutdown queueing discipline. */

6641

dev_shutdown(dev);

6642

dev_shutdown(dev);

6642

6643

/* Notify protocols, that we are about to destroy

6644

/* Notify protocols, that we are about to destroy

6644

this device. They should clean all the things.

6645

this device. They should clean all the things.

6645

6646

Note that dev->reg_state stays at NETREG_REGISTERED.

6647

Note that dev->reg_state stays at NETREG_REGISTERED.

6647

This is wanted because this way 8021q and macvlan know

6648

This is wanted because this way 8021q and macvlan know

6648

the device is just moving and can keep their slaves up.

6649

the device is just moving and can keep their slaves up.

6649

*/

6650

*/

6650

call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

6651

call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

6651

rcu_barrier();

6652

rcu_barrier();

6652

call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);

6653

call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);

6653

rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);

6654

rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);

6654

6655

/*

6656

/*

6656

* Flush the unicast and multicast chains

6657

* Flush the unicast and multicast chains

6657

*/

6658

*/

6658

dev_uc_flush(dev);

6659

dev_uc_flush(dev);

6659

dev_mc_flush(dev);

6660

dev_mc_flush(dev);

6660

6661

/* Send a netdev-removed uevent to the old namespace */

6662

/* Send a netdev-removed uevent to the old namespace */

6662

kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);

6663

kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);

6663

6664

/* Actually switch the network namespace */

6665

/* Actually switch the network namespace */

6665

dev_net_set(dev, net);

6666

dev_net_set(dev, net);

6666

6667

/* If there is an ifindex conflict assign a new one */

6668

/* If there is an ifindex conflict assign a new one */

6668

if (__dev_get_by_index(net, dev->ifindex)) {

6669

if (__dev_get_by_index(net, dev->ifindex)) {

6669

int iflink = (dev->iflink == dev->ifindex);

6670

int iflink = (dev->iflink == dev->ifindex);

6670

dev->ifindex = dev_new_index(net);

6671

dev->ifindex = dev_new_index(net);

6671

if (iflink)

6672

if (iflink)

6672

dev->iflink = dev->ifindex;

6673

dev->iflink = dev->ifindex;

6673

}

6674

}

6674

6675

/* Send a netdev-add uevent to the new namespace */

6676

/* Send a netdev-add uevent to the new namespace */

6676

kobject_uevent(&dev->dev.kobj, KOBJ_ADD);

6677

kobject_uevent(&dev->dev.kobj, KOBJ_ADD);

6677

6678

/* Fixup kobjects */

6679

/* Fixup kobjects */

6679

err = device_rename(&dev->dev, dev->name);

6680

err = device_rename(&dev->dev, dev->name);

6680

WARN_ON(err);

6681

WARN_ON(err);

6681

6682

/* Add the device back in the hashes */

6683

/* Add the device back in the hashes */

6683

list_netdevice(dev);

6684

list_netdevice(dev);

6684

6685

/* Notify protocols, that a new device appeared. */

6686

/* Notify protocols, that a new device appeared. */

6686

call_netdevice_notifiers(NETDEV_REGISTER, dev);

6687

call_netdevice_notifiers(NETDEV_REGISTER, dev);

6687

6688

/*

6689

/*

6689

* Prevent userspace races by waiting until the network

6690

* Prevent userspace races by waiting until the network

6690

* device is fully setup before sending notifications.

6691

* device is fully setup before sending notifications.

6691

*/

6692

*/

6692

rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);

6693

rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);

6693

6694

synchronize_net();

6695

synchronize_net();

6695

err = 0;

6696

err = 0;

6696

out:

6697

out:

6697

return err;

6698

return err;

6698

}

6699

}

6699

EXPORT_SYMBOL_GPL(dev_change_net_namespace);

6700

EXPORT_SYMBOL_GPL(dev_change_net_namespace);

6700

6701

static int dev_cpu_callback(struct notifier_block *nfb,

6702

static int dev_cpu_callback(struct notifier_block *nfb,

6702

unsigned long action,

6703

unsigned long action,

6703

void *ocpu)

6704

void *ocpu)

6704

{

6705

{

6705

struct sk_buff **list_skb;

6706

struct sk_buff **list_skb;

6706

struct sk_buff *skb;

6707

struct sk_buff *skb;

6707

unsigned int cpu, oldcpu = (unsigned long)ocpu;

6708

unsigned int cpu, oldcpu = (unsigned long)ocpu;

6708

struct softnet_data *sd, *oldsd;

6709

struct softnet_data *sd, *oldsd;

6709

6710

if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)

6711

if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)

6711

return NOTIFY_OK;

6712

return NOTIFY_OK;

6712

6713

local_irq_disable();

6714

local_irq_disable();

6714

cpu = smp_processor_id();

6715

cpu = smp_processor_id();

6715

sd = &per_cpu(softnet_data, cpu);

6716

sd = &per_cpu(softnet_data, cpu);

6716

oldsd = &per_cpu(softnet_data, oldcpu);

6717

oldsd = &per_cpu(softnet_data, oldcpu);

6717

6718

/* Find end of our completion_queue. */

6719

/* Find end of our completion_queue. */

6719

list_skb = &sd->completion_queue;

6720

list_skb = &sd->completion_queue;

6720

while (*list_skb)

6721

while (*list_skb)

6721

list_skb = &(*list_skb)->next;

6722

list_skb = &(*list_skb)->next;

6722

/* Append completion queue from offline CPU. */

6723

/* Append completion queue from offline CPU. */

6723

*list_skb = oldsd->completion_queue;

6724

*list_skb = oldsd->completion_queue;

6724

oldsd->completion_queue = NULL;

6725

oldsd->completion_queue = NULL;

6725

6726

/* Append output queue from offline CPU. */

6727

/* Append output queue from offline CPU. */

6727

if (oldsd->output_queue) {

6728

if (oldsd->output_queue) {

6728

*sd->output_queue_tailp = oldsd->output_queue;

6729

*sd->output_queue_tailp = oldsd->output_queue;

6729

sd->output_queue_tailp = oldsd->output_queue_tailp;

6730

sd->output_queue_tailp = oldsd->output_queue_tailp;

6730

oldsd->output_queue = NULL;

6731

oldsd->output_queue = NULL;

6731

oldsd->output_queue_tailp = &oldsd->output_queue;

6732

oldsd->output_queue_tailp = &oldsd->output_queue;

6732

}

6733

}

6733

/* Append NAPI poll list from offline CPU. */

6734

/* Append NAPI poll list from offline CPU. */

6734

if (!list_empty(&oldsd->poll_list)) {

6735

if (!list_empty(&oldsd->poll_list)) {

6735

list_splice_init(&oldsd->poll_list, &sd->poll_list);

6736

list_splice_init(&oldsd->poll_list, &sd->poll_list);

6736

raise_softirq_irqoff(NET_RX_SOFTIRQ);

6737

raise_softirq_irqoff(NET_RX_SOFTIRQ);

6737

}

6738

}

6738

6739

raise_softirq_irqoff(NET_TX_SOFTIRQ);

6740

raise_softirq_irqoff(NET_TX_SOFTIRQ);

6740

local_irq_enable();

6741

local_irq_enable();

6741

6742

/* Process offline CPU's input_pkt_queue */

6743

/* Process offline CPU's input_pkt_queue */

6743

while ((skb = __skb_dequeue(&oldsd->process_queue))) {

6744

while ((skb = __skb_dequeue(&oldsd->process_queue))) {

6744

netif_rx_internal(skb);

6745

netif_rx_internal(skb);

6745

input_queue_head_incr(oldsd);

6746

input_queue_head_incr(oldsd);

6746

}

6747

}

6747

while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {

6748

while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {

6748

netif_rx_internal(skb);

6749

netif_rx_internal(skb);

6749

input_queue_head_incr(oldsd);

6750

input_queue_head_incr(oldsd);

6750

}

6751

}

6751

6752

return NOTIFY_OK;

6753

return NOTIFY_OK;

6753

}

6754

}

6754

6755

6756

/**

6757

/**

6757

* netdev_increment_features - increment feature set by one

6758

* netdev_increment_features - increment feature set by one

6758

* @all: current feature set

6759

* @all: current feature set

6759

* @one: new feature set

6760

* @one: new feature set

6760

* @mask: mask feature set

6761

* @mask: mask feature set

6761

*

6762

*

6762

* Computes a new feature set after adding a device with feature set

6763

* Computes a new feature set after adding a device with feature set

6763

* @one to the master device with current feature set @all. Will not

6764

* @one to the master device with current feature set @all. Will not

6764

* enable anything that is off in @mask. Returns the new feature set.

6765

* enable anything that is off in @mask. Returns the new feature set.

6765

*/

6766

*/

6766

netdev_features_t netdev_increment_features(netdev_features_t all,

6767

netdev_features_t netdev_increment_features(netdev_features_t all,

6767

netdev_features_t one, netdev_features_t mask)

6768

netdev_features_t one, netdev_features_t mask)

6768

{

6769

{

6769

if (mask & NETIF_F_GEN_CSUM)

6770

if (mask & NETIF_F_GEN_CSUM)

6770

mask |= NETIF_F_ALL_CSUM;

6771

mask |= NETIF_F_ALL_CSUM;

6771

mask |= NETIF_F_VLAN_CHALLENGED;

6772

mask |= NETIF_F_VLAN_CHALLENGED;

6772

6773

all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;

6774

all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;

6774

all &= one | ~NETIF_F_ALL_FOR_ALL;

6775

all &= one | ~NETIF_F_ALL_FOR_ALL;

6775

6776

/* If one device supports hw checksumming, set for all. */

6777

/* If one device supports hw checksumming, set for all. */

6777

if (all & NETIF_F_GEN_CSUM)

6778

if (all & NETIF_F_GEN_CSUM)

6778

all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);

6779

all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);

6779

6780

return all;

6781

return all;

6781

}

6782

}

6782

EXPORT_SYMBOL(netdev_increment_features);

6783

EXPORT_SYMBOL(netdev_increment_features);

6783

6784

static struct hlist_head * __net_init netdev_create_hash(void)

6785

static struct hlist_head * __net_init netdev_create_hash(void)

6785

{

6786

{

6786

int i;

6787

int i;

6787

struct hlist_head *hash;

6788

struct hlist_head *hash;

6788

6789

hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);

6790

hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);

6790

if (hash != NULL)

6791

if (hash != NULL)

6791

for (i = 0; i < NETDEV_HASHENTRIES; i++)

6792

for (i = 0; i < NETDEV_HASHENTRIES; i++)

6792

INIT_HLIST_HEAD(&hash[i]);

6793

INIT_HLIST_HEAD(&hash[i]);

6793

6794

return hash;

6795

return hash;

6795

}

6796

}

6796

6797

/* Initialize per network namespace state */

6798

/* Initialize per network namespace state */

6798

static int __net_init netdev_init(struct net *net)

6799

static int __net_init netdev_init(struct net *net)

6799

{

6800

{

6800

if (net != &init_net)

6801

if (net != &init_net)

6801

INIT_LIST_HEAD(&net->dev_base_head);

6802

INIT_LIST_HEAD(&net->dev_base_head);

6802

6803

net->dev_name_head = netdev_create_hash();

6804

net->dev_name_head = netdev_create_hash();

6804

if (net->dev_name_head == NULL)

6805

if (net->dev_name_head == NULL)

6805

goto err_name;

6806

goto err_name;

6806

6807

net->dev_index_head = netdev_create_hash();

6808

net->dev_index_head = netdev_create_hash();

6808

if (net->dev_index_head == NULL)

6809

if (net->dev_index_head == NULL)

6809

goto err_idx;

6810

goto err_idx;

6810

6811

return 0;

6812

return 0;

6812

6813

err_idx:

6814

err_idx:

6814

kfree(net->dev_name_head);

6815

kfree(net->dev_name_head);

6815

err_name:

6816

err_name:

6816

return -ENOMEM;

6817

return -ENOMEM;

6817

}

6818

}

6818

6819

/**

6820

/**

6820

* netdev_drivername - network driver for the device

6821

* netdev_drivername - network driver for the device

6821

* @dev: network device

6822

* @dev: network device

6822

*

6823

*

6823

* Determine network driver for device.

6824

* Determine network driver for device.

6824

*/

6825

*/

6825

const char *netdev_drivername(const struct net_device *dev)

6826

const char *netdev_drivername(const struct net_device *dev)

6826

{

6827

{

6827

const struct device_driver *driver;

6828

const struct device_driver *driver;

6828

const struct device *parent;

6829

const struct device *parent;

6829

const char *empty = "";

6830

const char *empty = "";

6830

6831

parent = dev->dev.parent;

6832

parent = dev->dev.parent;

6832

if (!parent)

6833

if (!parent)

6833

return empty;

6834

return empty;

6834

6835

driver = parent->driver;

6836

driver = parent->driver;

6836

if (driver && driver->name)

6837

if (driver && driver->name)

6837

return driver->name;

6838

return driver->name;

6838

return empty;

6839

return empty;

6839

}

6840

}

6840

6841

static int __netdev_printk(const char *level, const struct net_device *dev,

6842

static int __netdev_printk(const char *level, const struct net_device *dev,

6842

struct va_format *vaf)

6843

struct va_format *vaf)

6843

{

6844

{

6844

int r;

6845

int r;

6845

6846

if (dev && dev->dev.parent) {

6847

if (dev && dev->dev.parent) {

6847

r = dev_printk_emit(level[1] - '0',

6848

r = dev_printk_emit(level[1] - '0',

6848

dev->dev.parent,

6849

dev->dev.parent,

6849

"%s %s %s: %pV",

6850

"%s %s %s: %pV",

6850

dev_driver_string(dev->dev.parent),

6851

dev_driver_string(dev->dev.parent),

6851

dev_name(dev->dev.parent),

6852

dev_name(dev->dev.parent),

6852

netdev_name(dev), vaf);

6853

netdev_name(dev), vaf);

6853

} else if (dev) {

6854

} else if (dev) {

6854

r = printk("%s%s: %pV", level, netdev_name(dev), vaf);

6855

r = printk("%s%s: %pV", level, netdev_name(dev), vaf);

6855

} else {

6856

} else {

6856

r = printk("%s(NULL net_device): %pV", level, vaf);

6857

r = printk("%s(NULL net_device): %pV", level, vaf);

6857

}

6858

}

6858

6859

return r;

6860

return r;

6860

}

6861

}

6861

6862

int netdev_printk(const char *level, const struct net_device *dev,

6863

int netdev_printk(const char *level, const struct net_device *dev,

6863

const char *format, ...)

6864

const char *format, ...)

6864

{

6865

{

6865

struct va_format vaf;

6866

struct va_format vaf;

6866

va_list args;

6867

va_list args;

6867

int r;

6868

int r;

6868

6869

va_start(args, format);

6870

va_start(args, format);

6870

6871

vaf.fmt = format;

6872

vaf.fmt = format;

6872

vaf.va = &args;

6873

vaf.va = &args;

6873

6874

r = __netdev_printk(level, dev, &vaf);

6875

r = __netdev_printk(level, dev, &vaf);

6875

6876

va_end(args);

6877

va_end(args);

6877

6878

return r;

6879

return r;

6879

}

6880

}

6880

EXPORT_SYMBOL(netdev_printk);

6881

EXPORT_SYMBOL(netdev_printk);

6881

6882

#define define_netdev_printk_level(func, level) \

6883

#define define_netdev_printk_level(func, level) \

6883

int func(const struct net_device *dev, const char *fmt, ...) \

6884

int func(const struct net_device *dev, const char *fmt, ...) \

6884

{ \

6885

{ \

6885

int r; \

6886

int r; \

6886

struct va_format vaf; \

6887

struct va_format vaf; \

6887

va_list args; \

6888

va_list args; \

6888

\

6889

\

6889

va_start(args, fmt); \

6890

va_start(args, fmt); \

6890

\

6891

\

6891

vaf.fmt = fmt; \

6892

vaf.fmt = fmt; \

6892

vaf.va = &args; \

6893

vaf.va = &args; \

6893

\

6894

\

6894

r = __netdev_printk(level, dev, &vaf); \

6895

r = __netdev_printk(level, dev, &vaf); \

6895

\

6896

\

6896

va_end(args); \

6897

va_end(args); \

6897

\

6898

\

6898

return r; \

6899

return r; \

6899

} \

6900

} \

6900

EXPORT_SYMBOL(func);

6901

EXPORT_SYMBOL(func);

6901

6902

define_netdev_printk_level(netdev_emerg, KERN_EMERG);

6903

define_netdev_printk_level(netdev_emerg, KERN_EMERG);

6903

define_netdev_printk_level(netdev_alert, KERN_ALERT);

6904

define_netdev_printk_level(netdev_alert, KERN_ALERT);

6904

define_netdev_printk_level(netdev_crit, KERN_CRIT);

6905

define_netdev_printk_level(netdev_crit, KERN_CRIT);

6905

define_netdev_printk_level(netdev_err, KERN_ERR);

6906

define_netdev_printk_level(netdev_err, KERN_ERR);

6906

define_netdev_printk_level(netdev_warn, KERN_WARNING);

6907

define_netdev_printk_level(netdev_warn, KERN_WARNING);

6907

define_netdev_printk_level(netdev_notice, KERN_NOTICE);

6908

define_netdev_printk_level(netdev_notice, KERN_NOTICE);

6908

define_netdev_printk_level(netdev_info, KERN_INFO);

6909

define_netdev_printk_level(netdev_info, KERN_INFO);

6909

6910

static void __net_exit netdev_exit(struct net *net)

6911

static void __net_exit netdev_exit(struct net *net)

6911

{

6912

{

6912

kfree(net->dev_name_head);

6913

kfree(net->dev_name_head);

6913

kfree(net->dev_index_head);

6914

kfree(net->dev_index_head);

6914

}

6915

}

6915

6916

static struct pernet_operations __net_initdata netdev_net_ops = {

6917

static struct pernet_operations __net_initdata netdev_net_ops = {

6917

.init = netdev_init,

6918

.init = netdev_init,

6918

.exit = netdev_exit,

6919

.exit = netdev_exit,

6919

};

6920

};

6920

6921

static void __net_exit default_device_exit(struct net *net)

6922

static void __net_exit default_device_exit(struct net *net)

6922

{

6923

{

6923

struct net_device *dev, *aux;

6924

struct net_device *dev, *aux;

6924

/*

6925

/*

6925

* Push all migratable network devices back to the

6926

* Push all migratable network devices back to the

6926

* initial network namespace

6927

* initial network namespace

6927

*/

6928

*/

6928

rtnl_lock();

6929

rtnl_lock();

6929

for_each_netdev_safe(net, dev, aux) {

6930

for_each_netdev_safe(net, dev, aux) {

6930

int err;

6931

int err;

6931

char fb_name[IFNAMSIZ];

6932

char fb_name[IFNAMSIZ];

6932

6933

/* Ignore unmoveable devices (i.e. loopback) */

6934

/* Ignore unmoveable devices (i.e. loopback) */

6934

if (dev->features & NETIF_F_NETNS_LOCAL)

6935

if (dev->features & NETIF_F_NETNS_LOCAL)

6935

continue;

6936

continue;

6936

6937

/* Leave virtual devices for the generic cleanup */

6938

/* Leave virtual devices for the generic cleanup */

6938

if (dev->rtnl_link_ops)

6939

if (dev->rtnl_link_ops)

6939

continue;

6940

continue;

6940

6941

/* Push remaining network devices to init_net */

6942

/* Push remaining network devices to init_net */

6942

snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);

6943

snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);

6943

err = dev_change_net_namespace(dev, &init_net, fb_name);

6944

err = dev_change_net_namespace(dev, &init_net, fb_name);

6944

if (err) {

6945

if (err) {

6945

pr_emerg("%s: failed to move %s to init_net: %d\n",

6946

pr_emerg("%s: failed to move %s to init_net: %d\n",

6946

__func__, dev->name, err);

6947

__func__, dev->name, err);

6947

BUG();

6948

BUG();

6948

}

6949

}

6949

}

6950

}

6950

rtnl_unlock();

6951

rtnl_unlock();

6951

}

6952

}

6952

6953

static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)

6954

static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)

6954

{

6955

{

6955

/* Return with the rtnl_lock held when there are no network

6956

/* Return with the rtnl_lock held when there are no network

6956

* devices unregistering in any network namespace in net_list.

6957

* devices unregistering in any network namespace in net_list.

6957

*/

6958

*/

6958

struct net *net;

6959

struct net *net;

6959

bool unregistering;

6960

bool unregistering;

6960

DEFINE_WAIT(wait);

6961

DEFINE_WAIT(wait);

6961

6962

for (;;) {

6963

for (;;) {

6963

prepare_to_wait(&netdev_unregistering_wq, &wait,

6964

prepare_to_wait(&netdev_unregistering_wq, &wait,

6964

TASK_UNINTERRUPTIBLE);

6965

TASK_UNINTERRUPTIBLE);

6965

unregistering = false;

6966

unregistering = false;

6966

rtnl_lock();

6967

rtnl_lock();

6967

list_for_each_entry(net, net_list, exit_list) {

6968

list_for_each_entry(net, net_list, exit_list) {

6968

if (net->dev_unreg_count > 0) {

6969

if (net->dev_unreg_count > 0) {

6969

unregistering = true;

6970

unregistering = true;

6970

break;

6971

break;

6971

}

6972

}

6972

}

6973

}

6973

if (!unregistering)

6974

if (!unregistering)

6974

break;

6975

break;

6975

__rtnl_unlock();

6976

__rtnl_unlock();

6976

schedule();

6977

schedule();

6977

}

6978

}

6978

finish_wait(&netdev_unregistering_wq, &wait);

6979

finish_wait(&netdev_unregistering_wq, &wait);

6979

}

6980

}

6980

6981

static void __net_exit default_device_exit_batch(struct list_head *net_list)

6982

static void __net_exit default_device_exit_batch(struct list_head *net_list)

6982

{

6983

{

6983

/* At exit all network devices most be removed from a network

6984

/* At exit all network devices most be removed from a network

6984

* namespace. Do this in the reverse order of registration.

6985

* namespace. Do this in the reverse order of registration.

6985

* Do this across as many network namespaces as possible to

6986

* Do this across as many network namespaces as possible to

6986

* improve batching efficiency.

6987

* improve batching efficiency.

6987

*/

6988

*/

6988

struct net_device *dev;

6989

struct net_device *dev;

6989

struct net *net;

6990

struct net *net;

6990

LIST_HEAD(dev_kill_list);

6991

LIST_HEAD(dev_kill_list);

6991

6992

/* To prevent network device cleanup code from dereferencing

6993

/* To prevent network device cleanup code from dereferencing

6993

* loopback devices or network devices that have been freed

6994

* loopback devices or network devices that have been freed

6994

* wait here for all pending unregistrations to complete,

6995

* wait here for all pending unregistrations to complete,

6995

* before unregistring the loopback device and allowing the

6996

* before unregistring the loopback device and allowing the

6996

* network namespace be freed.

6997

* network namespace be freed.

6997

*

6998

*

6998

* The netdev todo list containing all network devices

6999

* The netdev todo list containing all network devices

6999

* unregistrations that happen in default_device_exit_batch

7000

* unregistrations that happen in default_device_exit_batch

7000

* will run in the rtnl_unlock() at the end of

7001

* will run in the rtnl_unlock() at the end of

7001

* default_device_exit_batch.

7002

* default_device_exit_batch.

7002

*/

7003

*/

7003

rtnl_lock_unregistering(net_list);

7004

rtnl_lock_unregistering(net_list);

7004

list_for_each_entry(net, net_list, exit_list) {

7005

list_for_each_entry(net, net_list, exit_list) {

7005

for_each_netdev_reverse(net, dev) {

7006

for_each_netdev_reverse(net, dev) {

7006

if (dev->rtnl_link_ops)

7007

if (dev->rtnl_link_ops)

7007

dev->rtnl_link_ops->dellink(dev, &dev_kill_list);

7008

dev->rtnl_link_ops->dellink(dev, &dev_kill_list);

7008

else

7009

else

7009

unregister_netdevice_queue(dev, &dev_kill_list);

7010

unregister_netdevice_queue(dev, &dev_kill_list);

7010

}

7011

}

7011

}

7012

}

7012

unregister_netdevice_many(&dev_kill_list);

7013

unregister_netdevice_many(&dev_kill_list);

7013

list_del(&dev_kill_list);

7014

list_del(&dev_kill_list);

7014

rtnl_unlock();

7015

rtnl_unlock();

7015

}

7016

}

7016

7017

static struct pernet_operations __net_initdata default_device_ops = {

7018

static struct pernet_operations __net_initdata default_device_ops = {

7018

.exit = default_device_exit,

7019

.exit = default_device_exit,

7019

.exit_batch = default_device_exit_batch,

7020

.exit_batch = default_device_exit_batch,

7020

};

7021

};

7021

7022

/*

7023

/*

7023

* Initialize the DEV module. At boot time this walks the device list and

7024

* Initialize the DEV module. At boot time this walks the device list and

7024

* unhooks any devices that fail to initialise (normally hardware not

7025

* unhooks any devices that fail to initialise (normally hardware not

7025

* present) and leaves us with a valid list of present and active devices.

7026

* present) and leaves us with a valid list of present and active devices.

7026

*

7027

*

7027

*/

7028

*/

7028

7029

/*

7030

/*

7030

* This is called single threaded during boot, so no need

7031

* This is called single threaded during boot, so no need

7031

* to take the rtnl semaphore.

7032

* to take the rtnl semaphore.

7032

*/

7033

*/

7033

static int __init net_dev_init(void)

7034

static int __init net_dev_init(void)

7034

{

7035

{

7035

int i, rc = -ENOMEM;

7036

int i, rc = -ENOMEM;

7036

7037

BUG_ON(!dev_boot_phase);

7038

BUG_ON(!dev_boot_phase);

7038

7039

if (dev_proc_init())

7040

if (dev_proc_init())

7040

goto out;

7041

goto out;

7041

7042

if (netdev_kobject_init())

7043

if (netdev_kobject_init())

7043

goto out;

7044

goto out;

7044

7045

INIT_LIST_HEAD(&ptype_all);

7046

INIT_LIST_HEAD(&ptype_all);

7046

for (i = 0; i < PTYPE_HASH_SIZE; i++)

7047

for (i = 0; i < PTYPE_HASH_SIZE; i++)

7047

INIT_LIST_HEAD(&ptype_base[i]);

7048

INIT_LIST_HEAD(&ptype_base[i]);

7048

7049

INIT_LIST_HEAD(&offload_base);

7050

INIT_LIST_HEAD(&offload_base);

7050

7051

if (register_pernet_subsys(&netdev_net_ops))

7052

if (register_pernet_subsys(&netdev_net_ops))

7052

goto out;

7053

goto out;

7053

7054

/*

7055

/*

7055

* Initialise the packet receive queues.

7056

* Initialise the packet receive queues.

7056

*/

7057

*/

7057

7058

for_each_possible_cpu(i) {

7059

for_each_possible_cpu(i) {

7059

struct softnet_data *sd = &per_cpu(softnet_data, i);

7060

struct softnet_data *sd = &per_cpu(softnet_data, i);

7060

7061

skb_queue_head_init(&sd->input_pkt_queue);

7062

skb_queue_head_init(&sd->input_pkt_queue);

7062

skb_queue_head_init(&sd->process_queue);

7063

skb_queue_head_init(&sd->process_queue);

7063

INIT_LIST_HEAD(&sd->poll_list);

7064

INIT_LIST_HEAD(&sd->poll_list);

7064

sd->output_queue_tailp = &sd->output_queue;

7065

sd->output_queue_tailp = &sd->output_queue;

7065

#ifdef CONFIG_RPS

7066

#ifdef CONFIG_RPS

7066

sd->csd.func = rps_trigger_softirq;

7067

sd->csd.func = rps_trigger_softirq;

7067

sd->csd.info = sd;

7068

sd->csd.info = sd;

7068

sd->cpu = i;

7069

sd->cpu = i;

7069

#endif

7070

#endif

7070

7071

sd->backlog.poll = process_backlog;

7072

sd->backlog.poll = process_backlog;

7072

sd->backlog.weight = weight_p;

7073

sd->backlog.weight = weight_p;

7073

}

7074

}

7074

7075

dev_boot_phase = 0;

7076

dev_boot_phase = 0;

7076

7077

/* The loopback device is special if any other network devices

7078

/* The loopback device is special if any other network devices

7078

* is present in a network namespace the loopback device must

7079

* is present in a network namespace the loopback device must

7079

* be present. Since we now dynamically allocate and free the

7080

* be present. Since we now dynamically allocate and free the

7080

* loopback device ensure this invariant is maintained by

7081

* loopback device ensure this invariant is maintained by

7081

* keeping the loopback device as the first device on the

7082

* keeping the loopback device as the first device on the

7082

* list of network devices. Ensuring the loopback devices

7083

* list of network devices. Ensuring the loopback devices

7083

* is the first device that appears and the last network device

7084

* is the first device that appears and the last network device

7084

* that disappears.

7085

* that disappears.

7085

*/

7086

*/

7086

if (register_pernet_device(&loopback_net_ops))

7087

if (register_pernet_device(&loopback_net_ops))

7087

goto out;

7088

goto out;

7088

7089

if (register_pernet_device(&default_device_ops))

7090

if (register_pernet_device(&default_device_ops))

7090

goto out;

7091

goto out;

7091

7092

open_softirq(NET_TX_SOFTIRQ, net_tx_action);

7093

open_softirq(NET_TX_SOFTIRQ, net_tx_action);

7093

open_softirq(NET_RX_SOFTIRQ, net_rx_action);

7094

open_softirq(NET_RX_SOFTIRQ, net_rx_action);

7094

7095

hotcpu_notifier(dev_cpu_callback, 0);

7096

hotcpu_notifier(dev_cpu_callback, 0);

7096

dst_init();

7097

dst_init();

7097

rc = 0;

7098

rc = 0;

7098

out:

7099

out:

7099

return rc;

7100

return rc;

7100

}

7101

}

7101

7102

subsys_initcall(net_dev_init);

7103

subsys_initcall(net_dev_init);

7103

7104

GITLAB

net: gro: make sure skb->cb[] initial content has not to be zero

 /*
  * 	NET3	Protocol independent device support routines.
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
  *
  *	Derived from the non IP parts of dev.c 1.0.19
  * 		Authors:	Ross Biro
  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
  *
  *	Additional Authors:
  *		Florian la Roche <rzsfl@rz.uni-sb.de>
  *		Alan Cox <gw4pts@gw4pts.ampr.org>
  *		David Hinds <dahinds@users.sourceforge.net>
  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  *		Adam Sulmicki <adam@cfar.umd.edu>
  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  *
  *	Changes:
  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  *              			to 2 if register_netdev gets called
  *              			before net_dev_init & also removed a
  *              			few lines of code in the process.
  *		Alan Cox	:	device private ioctl copies fields back.
  *		Alan Cox	:	Transmit queue code does relevant
  *					stunts to keep the queue safe.
  *		Alan Cox	:	Fixed double lock.
  *		Alan Cox	:	Fixed promisc NULL pointer trap
  *		????????	:	Support the full private ioctl range
  *		Alan Cox	:	Moved ioctl permission check into
  *					drivers
  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
  *		Alan Cox	:	100 backlog just doesn't cut it when
  *					you start doing multicast video 8)
  *		Alan Cox	:	Rewrote net_bh and list manager.
  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
  *		Alan Cox	:	Took out transmit every packet pass
  *					Saved a few bytes in the ioctl handler
  *		Alan Cox	:	Network driver sets packet type before
  *					calling netif_rx. Saves a function
  *					call a packet.
  *		Alan Cox	:	Hashed net_bh()
  *		Richard Kooijman:	Timestamp fixes.
  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
  *		Alan Cox	:	Device lock protection.
  *		Alan Cox	: 	Fixed nasty side effect of device close
  *					changes.
  *		Rudi Cilibrasi	:	Pass the right thing to
  *					set_mac_address()
  *		Dave Miller	:	32bit quantity for the device lock to
  *					make it work out on a Sparc.
  *		Bjorn Ekwall	:	Added KERNELD hack.
  *		Alan Cox	:	Cleaned up the backlog initialise.
  *		Craig Metz	:	SIOCGIFCONF fix if space for under
  *					1 device.
  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
  *					is no device open function.
  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
  *		Cyrus Durgin	:	Cleaned for KMOD
  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
  *					A network device unload needs to purge
  *					the backlog queue.
  *	Paul Rusty Russell	:	SIOCSIFNAME
  *              Pekka Riikonen  :	Netdev boot-time settings code
  *              Andrew Morton   :       Make unregister_netdevice wait
  *              			indefinitely on dev->refcnt
  * 		J Hadi Salim	:	- Backlog queue sampling
  *				        - netif_rx() feedback
  */
 #include <asm/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/hash.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/mutex.h>
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/if_ether.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/notifier.h>
 #include <linux/skbuff.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <linux/rtnetlink.h>
 #include <linux/stat.h>
 #include <net/dst.h>
 #include <net/pkt_sched.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/netpoll.h>
 #include <linux/rcupdate.h>
 #include <linux/delay.h>
 #include <net/iw_handler.h>
 #include <asm/current.h>
 #include <linux/audit.h>
 #include <linux/dmaengine.h>
 #include <linux/err.h>
 #include <linux/ctype.h>
 #include <linux/if_arp.h>
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <net/ip.h>
 #include <linux/ipv6.h>
 #include <linux/in.h>
 #include <linux/jhash.h>
 #include <linux/random.h>
 #include <trace/events/napi.h>
 #include <trace/events/net.h>
 #include <trace/events/skb.h>
 #include <linux/pci.h>
 #include <linux/inetdevice.h>
 #include <linux/cpu_rmap.h>
 #include <linux/static_key.h>
 #include <linux/hashtable.h>
 #include <linux/vmalloc.h>
 #include <linux/if_macvlan.h>
 #include "net-sysfs.h"
 /* Instead of increasing this, you should create a hash table. */
 #define MAX_GRO_SKBS 8
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 static DEFINE_SPINLOCK(ptype_lock);
 static DEFINE_SPINLOCK(offload_lock);
 struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 struct list_head ptype_all __read_mostly;	/* Taps */
 static struct list_head offload_base __read_mostly;
 static int netif_rx_internal(struct sk_buff *skb);
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
  * semaphore.
  *
  * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
  *
  * Writers must hold the rtnl semaphore while they loop through the
  * dev_base_head list, and hold dev_base_lock for writing when they do the
  * actual updates.  This allows pure readers to access the list even
  * while a writer is preparing to update it.
  *
  * To put it another way, dev_base_lock is held for writing only to
  * protect against pure readers; the rtnl semaphore provides the
  * protection against other writers.
  *
  * See, for example usages, register_netdevice() and
  * unregister_netdevice(), which must be called with the rtnl
  * semaphore held.
  */
 DEFINE_RWLOCK(dev_base_lock);
 EXPORT_SYMBOL(dev_base_lock);
 /* protects napi_hash addition/deletion and napi_gen_id */
 static DEFINE_SPINLOCK(napi_hash_lock);
 static unsigned int napi_gen_id;
 static DEFINE_HASHTABLE(napi_hash, 8);
 static seqcount_t devnet_rename_seq;
 static inline void dev_base_seq_inc(struct net *net)
 {
 	while (++net->dev_base_seq == 0);
 }
 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 {
 	unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 	return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
 }
 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 {
 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
 }
 static inline void rps_lock(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
 	spin_lock(&sd->input_pkt_queue.lock);
 #endif
 }
 static inline void rps_unlock(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
 	spin_unlock(&sd->input_pkt_queue.lock);
 #endif
 }
 /* Device list insertion */
 static void list_netdevice(struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
 	ASSERT_RTNL();
 	write_lock_bh(&dev_base_lock);
 	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
 	hlist_add_head_rcu(&dev->index_hlist,
 			   dev_index_hash(net, dev->ifindex));
 	write_unlock_bh(&dev_base_lock);
 	dev_base_seq_inc(net);
 }
 /* Device list removal
  * caller must respect a RCU grace period before freeing/reusing dev
  */
 static void unlist_netdevice(struct net_device *dev)
 {
 	ASSERT_RTNL();
 	/* Unlink dev from the device chain */
 	write_lock_bh(&dev_base_lock);
 	list_del_rcu(&dev->dev_list);
 	hlist_del_rcu(&dev->name_hlist);
 	hlist_del_rcu(&dev->index_hlist);
 	write_unlock_bh(&dev_base_lock);
 	dev_base_seq_inc(dev_net(dev));
 }
 /*
  *	Our notifier list
  */
 static RAW_NOTIFIER_HEAD(netdev_chain);
 /*
  *	Device drivers call our routines to queue packets here. We empty the
  *	queue in the local softnet handler.
  */
 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 EXPORT_PER_CPU_SYMBOL(softnet_data);
 #ifdef CONFIG_LOCKDEP
 /*
  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
  * according to dev->type
  */
 static const unsigned short netdev_lock_type[] =
 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 	 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
 	 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
 	 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
 static const char *const netdev_lock_name[] =
 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 	 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
 	 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
 	 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 {
 	int i;
 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 		if (netdev_lock_type[i] == dev_type)
 			return i;
 	/* the last key is used by default */
 	return ARRAY_SIZE(netdev_lock_type) - 1;
 }
 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 						 unsigned short dev_type)
 {
 	int i;
 	i = netdev_lock_pos(dev_type);
 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 				   netdev_lock_name[i]);
 }
 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 {
 	int i;
 	i = netdev_lock_pos(dev->type);
 	lockdep_set_class_and_name(&dev->addr_list_lock,
 				   &netdev_addr_lock_key[i],
 				   netdev_lock_name[i]);
 }
 #else
 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 						 unsigned short dev_type)
 {
 }
 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 {
 }
 #endif
 /*******************************************************************************
 		Protocol management and registration routines
 *******************************************************************************/
 /*
  *	Add a protocol ID to the list. Now that the input handler is
  *	smarter we can dispense with all the messy stuff that used to be
  *	here.
  *
  *	BEWARE!!! Protocol handlers, mangling input packets,
  *	MUST BE last in hash buckets and checking protocol handlers
  *	MUST start from promiscuous ptype_all chain in net_bh.
  *	It is true now, do not change it.
  *	Explanation follows: if protocol handler, mangling packet, will
  *	be the first on list, it is not able to sense, that packet
  *	is cloned and should be copied-on-write, so that it will
  *	change it and subsequent readers will get broken packet.
  *							--ANK (980803)
  */
 static inline struct list_head *ptype_head(const struct packet_type *pt)
 {
 	if (pt->type == htons(ETH_P_ALL))
 		return &ptype_all;
 	else
 		return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 }
 /**
  *	dev_add_pack - add packet handler
  *	@pt: packet type declaration
  *
  *	Add a protocol handler to the networking stack. The passed &packet_type
  *	is linked into kernel lists and may not be freed until it has been
  *	removed from the kernel lists.
  *
  *	This call does not sleep therefore it can not
  *	guarantee all CPU's that are in middle of receiving packets
  *	will see the new packet type (until the next received packet).
  */
 void dev_add_pack(struct packet_type *pt)
 {
 	struct list_head *head = ptype_head(pt);
 	spin_lock(&ptype_lock);
 	list_add_rcu(&pt->list, head);
 	spin_unlock(&ptype_lock);
 }
 EXPORT_SYMBOL(dev_add_pack);
 /**
  *	__dev_remove_pack	 - remove packet handler
  *	@pt: packet type declaration
  *
  *	Remove a protocol handler that was previously added to the kernel
  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
  *	from the kernel lists and can be freed or reused once this function
  *	returns.
  *
  *      The packet type might still be in use by receivers
  *	and must not be freed until after all the CPU's have gone
  *	through a quiescent state.
  */
 void __dev_remove_pack(struct packet_type *pt)
 {
 	struct list_head *head = ptype_head(pt);
 	struct packet_type *pt1;
 	spin_lock(&ptype_lock);
 	list_for_each_entry(pt1, head, list) {
 		if (pt == pt1) {
 			list_del_rcu(&pt->list);
 			goto out;
 		}
 	}
 	pr_warn("dev_remove_pack: %p not found\n", pt);
 out:
 	spin_unlock(&ptype_lock);
 }
 EXPORT_SYMBOL(__dev_remove_pack);
 /**
  *	dev_remove_pack	 - remove packet handler
  *	@pt: packet type declaration
  *
  *	Remove a protocol handler that was previously added to the kernel
  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
  *	from the kernel lists and can be freed or reused once this function
  *	returns.
  *
  *	This call sleeps to guarantee that no CPU is looking at the packet
  *	type after return.
  */
 void dev_remove_pack(struct packet_type *pt)
 {
 	__dev_remove_pack(pt);
 	synchronize_net();
 }
 EXPORT_SYMBOL(dev_remove_pack);
 /**
  *	dev_add_offload - register offload handlers
  *	@po: protocol offload declaration
  *
  *	Add protocol offload handlers to the networking stack. The passed
  *	&proto_offload is linked into kernel lists and may not be freed until
  *	it has been removed from the kernel lists.
  *
  *	This call does not sleep therefore it can not
  *	guarantee all CPU's that are in middle of receiving packets
  *	will see the new offload handlers (until the next received packet).
  */
 void dev_add_offload(struct packet_offload *po)
 {
 	struct list_head *head = &offload_base;
 	spin_lock(&offload_lock);
 	list_add_rcu(&po->list, head);
 	spin_unlock(&offload_lock);
 }
 EXPORT_SYMBOL(dev_add_offload);
 /**
  *	__dev_remove_offload	 - remove offload handler
  *	@po: packet offload declaration
  *
  *	Remove a protocol offload handler that was previously added to the
  *	kernel offload handlers by dev_add_offload(). The passed &offload_type
  *	is removed from the kernel lists and can be freed or reused once this
  *	function returns.
  *
  *      The packet type might still be in use by receivers
  *	and must not be freed until after all the CPU's have gone
  *	through a quiescent state.
  */
 static void __dev_remove_offload(struct packet_offload *po)
 {
 	struct list_head *head = &offload_base;
 	struct packet_offload *po1;
 	spin_lock(&offload_lock);
 	list_for_each_entry(po1, head, list) {
 		if (po == po1) {
 			list_del_rcu(&po->list);
 			goto out;
 		}
 	}
 	pr_warn("dev_remove_offload: %p not found\n", po);
 out:
 	spin_unlock(&offload_lock);
 }
 /**
  *	dev_remove_offload	 - remove packet offload handler
  *	@po: packet offload declaration
  *
  *	Remove a packet offload handler that was previously added to the kernel
  *	offload handlers by dev_add_offload(). The passed &offload_type is
  *	removed from the kernel lists and can be freed or reused once this
  *	function returns.
  *
  *	This call sleeps to guarantee that no CPU is looking at the packet
  *	type after return.
  */
 void dev_remove_offload(struct packet_offload *po)
 {
 	__dev_remove_offload(po);
 	synchronize_net();
 }
 EXPORT_SYMBOL(dev_remove_offload);
 /******************************************************************************
 		      Device Boot-time Settings Routines
 *******************************************************************************/
 /* Boot time configuration table */
 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 /**
  *	netdev_boot_setup_add	- add new setup entry
  *	@name: name of the device
  *	@map: configured settings for the device
  *
  *	Adds new setup entry to the dev_boot_setup list.  The function
  *	returns 0 on error and 1 on success.  This is a generic routine to
  *	all netdevices.
  */
 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 {
 	struct netdev_boot_setup *s;
 	int i;
 	s = dev_boot_setup;
 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 			memset(s[i].name, 0, sizeof(s[i].name));
 			strlcpy(s[i].name, name, IFNAMSIZ);
 			memcpy(&s[i].map, map, sizeof(s[i].map));
 			break;
 		}
 	}
 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 }
 /**
  *	netdev_boot_setup_check	- check boot time settings
  *	@dev: the netdevice
  *
  * 	Check boot time settings for the device.
  *	The found settings are set for the device to be used
  *	later in the device probing.
  *	Returns 0 if no settings found, 1 if they are.
  */
 int netdev_boot_setup_check(struct net_device *dev)
 {
 	struct netdev_boot_setup *s = dev_boot_setup;
 	int i;
 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 		    !strcmp(dev->name, s[i].name)) {
 			dev->irq 	= s[i].map.irq;
 			dev->base_addr 	= s[i].map.base_addr;
 			dev->mem_start 	= s[i].map.mem_start;
 			dev->mem_end 	= s[i].map.mem_end;
 			return 1;
 		}
 	}
 	return 0;
 }
 EXPORT_SYMBOL(netdev_boot_setup_check);
 /**
  *	netdev_boot_base	- get address from boot time settings
  *	@prefix: prefix for network device
  *	@unit: id for network device
  *
  * 	Check boot time settings for the base address of device.
  *	The found settings are set for the device to be used
  *	later in the device probing.
  *	Returns 0 if no settings found.
  */
 unsigned long netdev_boot_base(const char *prefix, int unit)
 {
 	const struct netdev_boot_setup *s = dev_boot_setup;
 	char name[IFNAMSIZ];
 	int i;
 	sprintf(name, "%s%d", prefix, unit);
 	/*
 	 * If device already registered then return base of 1
 	 * to indicate not to probe for this interface
 	 */
 	if (__dev_get_by_name(&init_net, name))
 		return 1;
 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 		if (!strcmp(name, s[i].name))
 			return s[i].map.base_addr;
 	return 0;
 }
 /*
  * Saves at boot time configured settings for any netdevice.
  */
 int __init netdev_boot_setup(char *str)
 {
 	int ints[5];
 	struct ifmap map;
 	str = get_options(str, ARRAY_SIZE(ints), ints);
 	if (!str || !*str)
 		return 0;
 	/* Save settings */
 	memset(&map, 0, sizeof(map));
 	if (ints[0] > 0)
 		map.irq = ints[1];
 	if (ints[0] > 1)
 		map.base_addr = ints[2];
 	if (ints[0] > 2)
 		map.mem_start = ints[3];
 	if (ints[0] > 3)
 		map.mem_end = ints[4];
 	/* Add new entry to the list */
 	return netdev_boot_setup_add(str, &map);
 }
 __setup("netdev=", netdev_boot_setup);
 /*******************************************************************************
 			    Device Interface Subroutines
 *******************************************************************************/
 /**
  *	__dev_get_by_name	- find a device by its name
  *	@net: the applicable net namespace
  *	@name: name to find
  *
  *	Find an interface by name. Must be called under RTNL semaphore
  *	or @dev_base_lock. If the name is found a pointer to the device
  *	is returned. If the name is not found then %NULL is returned. The
  *	reference counters are not incremented so the caller must be
  *	careful with locks.
  */
 struct net_device *__dev_get_by_name(struct net *net, const char *name)
 {
 	struct net_device *dev;
 	struct hlist_head *head = dev_name_hash(net, name);
 	hlist_for_each_entry(dev, head, name_hlist)
 		if (!strncmp(dev->name, name, IFNAMSIZ))
 			return dev;
 	return NULL;
 }
 EXPORT_SYMBOL(__dev_get_by_name);
 /**
  *	dev_get_by_name_rcu	- find a device by its name
  *	@net: the applicable net namespace
  *	@name: name to find
  *
  *	Find an interface by name.
  *	If the name is found a pointer to the device is returned.
  * 	If the name is not found then %NULL is returned.
  *	The reference counters are not incremented so the caller must be
  *	careful with locks. The caller must hold RCU lock.
  */
 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
 {
 	struct net_device *dev;
 	struct hlist_head *head = dev_name_hash(net, name);
 	hlist_for_each_entry_rcu(dev, head, name_hlist)
 		if (!strncmp(dev->name, name, IFNAMSIZ))
 			return dev;
 	return NULL;
 }
 EXPORT_SYMBOL(dev_get_by_name_rcu);
 /**
  *	dev_get_by_name		- find a device by its name
  *	@net: the applicable net namespace
  *	@name: name to find
  *
  *	Find an interface by name. This can be called from any
  *	context and does its own locking. The returned handle has
  *	the usage count incremented and the caller must use dev_put() to
  *	release it when it is no longer needed. %NULL is returned if no
  *	matching device is found.
  */
 struct net_device *dev_get_by_name(struct net *net, const char *name)
 {
 	struct net_device *dev;
 	rcu_read_lock();
 	dev = dev_get_by_name_rcu(net, name);
 	if (dev)
 		dev_hold(dev);
 	rcu_read_unlock();
 	return dev;
 }
 EXPORT_SYMBOL(dev_get_by_name);
 /**
  *	__dev_get_by_index - find a device by its ifindex
  *	@net: the applicable net namespace
  *	@ifindex: index of device
  *
  *	Search for an interface by index. Returns %NULL if the device
  *	is not found or a pointer to the device. The device has not
  *	had its reference counter increased so the caller must be careful
  *	about locking. The caller must hold either the RTNL semaphore
  *	or @dev_base_lock.
  */
 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 {
 	struct net_device *dev;
 	struct hlist_head *head = dev_index_hash(net, ifindex);
 	hlist_for_each_entry(dev, head, index_hlist)
 		if (dev->ifindex == ifindex)
 			return dev;
 	return NULL;
 }
 EXPORT_SYMBOL(__dev_get_by_index);
 /**
  *	dev_get_by_index_rcu - find a device by its ifindex
  *	@net: the applicable net namespace
  *	@ifindex: index of device
  *
  *	Search for an interface by index. Returns %NULL if the device
  *	is not found or a pointer to the device. The device has not
  *	had its reference counter increased so the caller must be careful
  *	about locking. The caller must hold RCU lock.
  */
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
 {
 	struct net_device *dev;
 	struct hlist_head *head = dev_index_hash(net, ifindex);
 	hlist_for_each_entry_rcu(dev, head, index_hlist)
 		if (dev->ifindex == ifindex)
 			return dev;
 	return NULL;
 }
 EXPORT_SYMBOL(dev_get_by_index_rcu);
 /**
  *	dev_get_by_index - find a device by its ifindex
  *	@net: the applicable net namespace
  *	@ifindex: index of device
  *
  *	Search for an interface by index. Returns NULL if the device
  *	is not found or a pointer to the device. The device returned has
  *	had a reference added and the pointer is safe until the user calls
  *	dev_put to indicate they have finished with it.
  */
 struct net_device *dev_get_by_index(struct net *net, int ifindex)
 {
 	struct net_device *dev;
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, ifindex);
 	if (dev)
 		dev_hold(dev);
 	rcu_read_unlock();
 	return dev;
 }
 EXPORT_SYMBOL(dev_get_by_index);
 /**
  *	netdev_get_name - get a netdevice name, knowing its ifindex.
  *	@net: network namespace
  *	@name: a pointer to the buffer where the name will be stored.
  *	@ifindex: the ifindex of the interface to get the name from.
  *
  *	The use of raw_seqcount_begin() and cond_resched() before
  *	retrying is required as we want to give the writers a chance
  *	to complete when CONFIG_PREEMPT is not set.
  */
 int netdev_get_name(struct net *net, char *name, int ifindex)
 {
 	struct net_device *dev;
 	unsigned int seq;
 retry:
 	seq = raw_seqcount_begin(&devnet_rename_seq);
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, ifindex);
 	if (!dev) {
 		rcu_read_unlock();
 		return -ENODEV;
 	}
 	strcpy(name, dev->name);
 	rcu_read_unlock();
 	if (read_seqcount_retry(&devnet_rename_seq, seq)) {
 		cond_resched();
 		goto retry;
 	}
 	return 0;
 }
 /**
  *	dev_getbyhwaddr_rcu - find a device by its hardware address
  *	@net: the applicable net namespace
  *	@type: media type of device
  *	@ha: hardware address
  *
  *	Search for an interface by MAC address. Returns NULL if the device
  *	is not found or a pointer to the device.
  *	The caller must hold RCU or RTNL.
  *	The returned device has not had its ref count increased
  *	and the caller must therefore be careful about locking
  *
  */
 struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
 				       const char *ha)
 {
 	struct net_device *dev;
 	for_each_netdev_rcu(net, dev)
 		if (dev->type == type &&
 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
 			return dev;
 	return NULL;
 }
 EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 {
 	struct net_device *dev;
 	ASSERT_RTNL();
 	for_each_netdev(net, dev)
 		if (dev->type == type)
 			return dev;
 	return NULL;
 }
 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 {
 	struct net_device *dev, *ret = NULL;
 	rcu_read_lock();
 	for_each_netdev_rcu(net, dev)
 		if (dev->type == type) {
 			dev_hold(dev);
 			ret = dev;
 			break;
 		}
 	rcu_read_unlock();
 	return ret;
 }
 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 /**
  *	dev_get_by_flags_rcu - find any device with given flags
  *	@net: the applicable net namespace
  *	@if_flags: IFF_* values
  *	@mask: bitmask of bits in if_flags to check
  *
  *	Search for any interface with the given flags. Returns NULL if a device
  *	is not found or a pointer to the device. Must be called inside
  *	rcu_read_lock(), and result refcount is unchanged.
  */
 struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
 				    unsigned short mask)
 {
 	struct net_device *dev, *ret;
 	ret = NULL;
 	for_each_netdev_rcu(net, dev) {
 		if (((dev->flags ^ if_flags) & mask) == 0) {
 			ret = dev;
 			break;
 		}
 	}
 	return ret;
 }
 EXPORT_SYMBOL(dev_get_by_flags_rcu);
 /**
  *	dev_valid_name - check if name is okay for network device
  *	@name: name string
  *
  *	Network device names need to be valid file names to
  *	to allow sysfs to work.  We also disallow any kind of
  *	whitespace.
  */
 bool dev_valid_name(const char *name)
 {
 	if (*name == '\0')
 		return false;
 	if (strlen(name) >= IFNAMSIZ)
 		return false;
 	if (!strcmp(name, ".") || !strcmp(name, ".."))
 		return false;
 	while (*name) {
 		if (*name == '/' || isspace(*name))
 			return false;
 		name++;
 	}
 	return true;
 }
 EXPORT_SYMBOL(dev_valid_name);
 /**
  *	__dev_alloc_name - allocate a name for a device
  *	@net: network namespace to allocate the device name in
  *	@name: name format string
  *	@buf:  scratch buffer and result name string
  *
  *	Passed a format string - eg "lt%d" it will try and find a suitable
  *	id. It scans list of devices to build up a free map, then chooses
  *	the first empty slot. The caller must hold the dev_base or rtnl lock
  *	while allocating the name and adding the device in order to avoid
  *	duplicates.
  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
  *	Returns the number of the unit assigned or a negative errno code.
  */
 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 {
 	int i = 0;
 	const char *p;
 	const int max_netdevices = 8*PAGE_SIZE;
 	unsigned long *inuse;
 	struct net_device *d;
 	p = strnchr(name, IFNAMSIZ-1, '%');
 	if (p) {
 		/*
 		 * Verify the string as this thing may have come from
 		 * the user.  There must be either one "%d" and no other "%"
 		 * characters.
 		 */
 		if (p[1] != 'd' || strchr(p + 2, '%'))
 			return -EINVAL;
 		/* Use one page as a bit array of possible slots */
 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
 		if (!inuse)
 			return -ENOMEM;
 		for_each_netdev(net, d) {
 			if (!sscanf(d->name, name, &i))
 				continue;
 			if (i < 0 || i >= max_netdevices)
 				continue;
 			/*  avoid cases where sscanf is not exact inverse of printf */
 			snprintf(buf, IFNAMSIZ, name, i);
 			if (!strncmp(buf, d->name, IFNAMSIZ))
 				set_bit(i, inuse);
 		}
 		i = find_first_zero_bit(inuse, max_netdevices);
 		free_page((unsigned long) inuse);
 	}
 	if (buf != name)
 		snprintf(buf, IFNAMSIZ, name, i);
 	if (!__dev_get_by_name(net, buf))
 		return i;
 	/* It is possible to run out of possible slots
 	 * when the name is long and there isn't enough space left
 	 * for the digits, or if all bits are used.
 	 */
 	return -ENFILE;
 }
 /**
  *	dev_alloc_name - allocate a name for a device
  *	@dev: device
  *	@name: name format string
  *
  *	Passed a format string - eg "lt%d" it will try and find a suitable
  *	id. It scans list of devices to build up a free map, then chooses
  *	the first empty slot. The caller must hold the dev_base or rtnl lock
  *	while allocating the name and adding the device in order to avoid
  *	duplicates.
  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
  *	Returns the number of the unit assigned or a negative errno code.
  */
 int dev_alloc_name(struct net_device *dev, const char *name)
 {
 	char buf[IFNAMSIZ];
 	struct net *net;
 	int ret;
 	BUG_ON(!dev_net(dev));
 	net = dev_net(dev);
 	ret = __dev_alloc_name(net, name, buf);
 	if (ret >= 0)
 		strlcpy(dev->name, buf, IFNAMSIZ);
 	return ret;
 }
 EXPORT_SYMBOL(dev_alloc_name);
 static int dev_alloc_name_ns(struct net *net,
 			     struct net_device *dev,
 			     const char *name)
 {
 	char buf[IFNAMSIZ];
 	int ret;
 	ret = __dev_alloc_name(net, name, buf);
 	if (ret >= 0)
 		strlcpy(dev->name, buf, IFNAMSIZ);
 	return ret;
 }
 static int dev_get_valid_name(struct net *net,
 			      struct net_device *dev,
 			      const char *name)
 {
 	BUG_ON(!net);
 	if (!dev_valid_name(name))
 		return -EINVAL;
 	if (strchr(name, '%'))
 		return dev_alloc_name_ns(net, dev, name);
 	else if (__dev_get_by_name(net, name))
 		return -EEXIST;
 	else if (dev->name != name)
 		strlcpy(dev->name, name, IFNAMSIZ);
 	return 0;
 }
 /**
  *	dev_change_name - change name of a device
  *	@dev: device
  *	@newname: name (or format string) must be at least IFNAMSIZ
  *
  *	Change name of a device, can pass format strings "eth%d".
  *	for wildcarding.
  */
 int dev_change_name(struct net_device *dev, const char *newname)
 {
 	char oldname[IFNAMSIZ];
 	int err = 0;
 	int ret;
 	struct net *net;
 	ASSERT_RTNL();
 	BUG_ON(!dev_net(dev));
 	net = dev_net(dev);
 	if (dev->flags & IFF_UP)
 		return -EBUSY;
 	write_seqcount_begin(&devnet_rename_seq);
 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
 		write_seqcount_end(&devnet_rename_seq);
 		return 0;
 	}
 	memcpy(oldname, dev->name, IFNAMSIZ);
 	err = dev_get_valid_name(net, dev, newname);
 	if (err < 0) {
 		write_seqcount_end(&devnet_rename_seq);
 		return err;
 	}
 rollback:
 	ret = device_rename(&dev->dev, dev->name);
 	if (ret) {
 		memcpy(dev->name, oldname, IFNAMSIZ);
 		write_seqcount_end(&devnet_rename_seq);
 		return ret;
 	}
 	write_seqcount_end(&devnet_rename_seq);
 	netdev_adjacent_rename_links(dev, oldname);
 	write_lock_bh(&dev_base_lock);
 	hlist_del_rcu(&dev->name_hlist);
 	write_unlock_bh(&dev_base_lock);
 	synchronize_rcu();
 	write_lock_bh(&dev_base_lock);
 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
 	write_unlock_bh(&dev_base_lock);
 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
 	ret = notifier_to_errno(ret);
 	if (ret) {
 		/* err >= 0 after dev_alloc_name() or stores the first errno */
 		if (err >= 0) {
 			err = ret;
 			write_seqcount_begin(&devnet_rename_seq);
 			memcpy(dev->name, oldname, IFNAMSIZ);
 			memcpy(oldname, newname, IFNAMSIZ);
 			goto rollback;
 		} else {
 			pr_err("%s: name change rollback failed: %d\n",
 			       dev->name, ret);
 		}
 	}
 	return err;
 }
 /**
  *	dev_set_alias - change ifalias of a device
  *	@dev: device
  *	@alias: name up to IFALIASZ
  *	@len: limit of bytes to copy from info
  *
  *	Set ifalias for a device,
  */
 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 {
 	char *new_ifalias;
 	ASSERT_RTNL();
 	if (len >= IFALIASZ)
 		return -EINVAL;
 	if (!len) {
 		kfree(dev->ifalias);
 		dev->ifalias = NULL;
 		return 0;
 	}
 	new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
 	if (!new_ifalias)
 		return -ENOMEM;
 	dev->ifalias = new_ifalias;
 	strlcpy(dev->ifalias, alias, len+1);
 	return len;
 }
 /**
  *	netdev_features_change - device changes features
  *	@dev: device to cause notification
  *
  *	Called to indicate a device has changed features.
  */
 void netdev_features_change(struct net_device *dev)
 {
 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
 }
 EXPORT_SYMBOL(netdev_features_change);
 /**
  *	netdev_state_change - device changes state
  *	@dev: device to cause notification
  *
  *	Called to indicate a device has changed state. This function calls
  *	the notifier chains for netdev_chain and sends a NEWLINK message
  *	to the routing socket.
  */
 void netdev_state_change(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
 	}
 }
 EXPORT_SYMBOL(netdev_state_change);
 /**
  * 	netdev_notify_peers - notify network peers about existence of @dev
  * 	@dev: network device
  *
  * Generate traffic such that interested network peers are aware of
  * @dev, such as by generating a gratuitous ARP. This may be used when
  * a device wants to inform the rest of the network about some sort of
  * reconfiguration such as a failover event or virtual machine
  * migration.
  */
 void netdev_notify_peers(struct net_device *dev)
 {
 	rtnl_lock();
 	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
 	rtnl_unlock();
 }
 EXPORT_SYMBOL(netdev_notify_peers);
 static int __dev_open(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int ret;
 	ASSERT_RTNL();
 	if (!netif_device_present(dev))
 		return -ENODEV;
 	/* Block netpoll from trying to do any rx path servicing.
 	 * If we don't do this there is a chance ndo_poll_controller
 	 * or ndo_poll may be running while we open the device
 	 */
 	netpoll_rx_disable(dev);
 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
 	ret = notifier_to_errno(ret);
 	if (ret)
 		return ret;
 	set_bit(__LINK_STATE_START, &dev->state);
 	if (ops->ndo_validate_addr)
 		ret = ops->ndo_validate_addr(dev);
 	if (!ret && ops->ndo_open)
 		ret = ops->ndo_open(dev);
 	netpoll_rx_enable(dev);
 	if (ret)
 		clear_bit(__LINK_STATE_START, &dev->state);
 	else {
 		dev->flags |= IFF_UP;
 		net_dmaengine_get();
 		dev_set_rx_mode(dev);
 		dev_activate(dev);
 		add_device_randomness(dev->dev_addr, dev->addr_len);
 	}
 	return ret;
 }
 /**
  *	dev_open	- prepare an interface for use.
  *	@dev:	device to open
  *
  *	Takes a device from down to up state. The device's private open
  *	function is invoked and then the multicast lists are loaded. Finally
  *	the device is moved into the up state and a %NETDEV_UP message is
  *	sent to the netdev notifier chain.
  *
  *	Calling this function on an active interface is a nop. On a failure
  *	a negative errno code is returned.
  */
 int dev_open(struct net_device *dev)
 {
 	int ret;
 	if (dev->flags & IFF_UP)
 		return 0;
 	ret = __dev_open(dev);
 	if (ret < 0)
 		return ret;
 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
 	call_netdevice_notifiers(NETDEV_UP, dev);
 	return ret;
 }
 EXPORT_SYMBOL(dev_open);
 static int __dev_close_many(struct list_head *head)
 {
 	struct net_device *dev;
 	ASSERT_RTNL();
 	might_sleep();
 	list_for_each_entry(dev, head, close_list) {
 		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
 		clear_bit(__LINK_STATE_START, &dev->state);
 		/* Synchronize to scheduled poll. We cannot touch poll list, it
 		 * can be even on different cpu. So just clear netif_running().
 		 *
 		 * dev->stop() will invoke napi_disable() on all of it's
 		 * napi_struct instances on this device.
 		 */
 		smp_mb__after_clear_bit(); /* Commit netif_running(). */
 	}
 	dev_deactivate_many(head);
 	list_for_each_entry(dev, head, close_list) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 		/*
 		 *	Call the device specific close. This cannot fail.
 		 *	Only if device is UP
 		 *
 		 *	We allow it to be called even after a DETACH hot-plug
 		 *	event.
 		 */
 		if (ops->ndo_stop)
 			ops->ndo_stop(dev);
 		dev->flags &= ~IFF_UP;
 		net_dmaengine_put();
 	}
 	return 0;
 }
 static int __dev_close(struct net_device *dev)
 {
 	int retval;
 	LIST_HEAD(single);
 	/* Temporarily disable netpoll until the interface is down */
 	netpoll_rx_disable(dev);
 	list_add(&dev->close_list, &single);
 	retval = __dev_close_many(&single);
 	list_del(&single);
 	netpoll_rx_enable(dev);
 	return retval;
 }
 static int dev_close_many(struct list_head *head)
 {
 	struct net_device *dev, *tmp;
 	/* Remove the devices that don't need to be closed */
 	list_for_each_entry_safe(dev, tmp, head, close_list)
 		if (!(dev->flags & IFF_UP))
 			list_del_init(&dev->close_list);
 	__dev_close_many(head);
 	list_for_each_entry_safe(dev, tmp, head, close_list) {
 		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
 		call_netdevice_notifiers(NETDEV_DOWN, dev);
 		list_del_init(&dev->close_list);
 	}
 	return 0;
 }
 /**
  *	dev_close - shutdown an interface.
  *	@dev: device to shutdown
  *
  *	This function moves an active device into down state. A
  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
  *	chain.
  */
 int dev_close(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
 		LIST_HEAD(single);
 		/* Block netpoll rx while the interface is going down */
 		netpoll_rx_disable(dev);
 		list_add(&dev->close_list, &single);
 		dev_close_many(&single);
 		list_del(&single);
 		netpoll_rx_enable(dev);
 	}
 	return 0;
 }
 EXPORT_SYMBOL(dev_close);
 /**
  *	dev_disable_lro - disable Large Receive Offload on a device
  *	@dev: device
  *
  *	Disable Large Receive Offload (LRO) on a net device.  Must be
  *	called under RTNL.  This is needed if received packets may be
  *	forwarded to another interface.
  */
 void dev_disable_lro(struct net_device *dev)
 {
 	/*
 	 * If we're trying to disable lro on a vlan device
 	 * use the underlying physical device instead
 	 */
 	if (is_vlan_dev(dev))
 		dev = vlan_dev_real_dev(dev);
 	/* the same for macvlan devices */
 	if (netif_is_macvlan(dev))
 		dev = macvlan_dev_real_dev(dev);
 	dev->wanted_features &= ~NETIF_F_LRO;
 	netdev_update_features(dev);
 	if (unlikely(dev->features & NETIF_F_LRO))
 		netdev_WARN(dev, "failed to disable LRO!\n");
 }
 EXPORT_SYMBOL(dev_disable_lro);
 static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
 				   struct net_device *dev)
 {
 	struct netdev_notifier_info info;
 	netdev_notifier_info_init(&info, dev);
 	return nb->notifier_call(nb, val, &info);
 }
 static int dev_boot_phase = 1;
 /**
  *	register_netdevice_notifier - register a network notifier block
  *	@nb: notifier
  *
  *	Register a notifier to be called when network device events occur.
  *	The notifier passed is linked into the kernel structures and must
  *	not be reused until it has been unregistered. A negative errno code
  *	is returned on a failure.
  *
  * 	When registered all registration and up events are replayed
  *	to the new notifier to allow device to have a race free
  *	view of the network device list.
  */
 int register_netdevice_notifier(struct notifier_block *nb)
 {
 	struct net_device *dev;
 	struct net_device *last;
 	struct net *net;
 	int err;
 	rtnl_lock();
 	err = raw_notifier_chain_register(&netdev_chain, nb);
 	if (err)
 		goto unlock;
 	if (dev_boot_phase)
 		goto unlock;
 	for_each_net(net) {
 		for_each_netdev(net, dev) {
 			err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
 			err = notifier_to_errno(err);
 			if (err)
 				goto rollback;
 			if (!(dev->flags & IFF_UP))
 				continue;
 			call_netdevice_notifier(nb, NETDEV_UP, dev);
 		}
 	}
 unlock:
 	rtnl_unlock();
 	return err;
 rollback:
 	last = dev;
 	for_each_net(net) {
 		for_each_netdev(net, dev) {
 			if (dev == last)
 				goto outroll;
 			if (dev->flags & IFF_UP) {
 				call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
 							dev);
 				call_netdevice_notifier(nb, NETDEV_DOWN, dev);
 			}
 			call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
 		}
 	}
 outroll:
 	raw_notifier_chain_unregister(&netdev_chain, nb);
 	goto unlock;
 }
 EXPORT_SYMBOL(register_netdevice_notifier);
 /**
  *	unregister_netdevice_notifier - unregister a network notifier block
  *	@nb: notifier
  *
  *	Unregister a notifier previously registered by
  *	register_netdevice_notifier(). The notifier is unlinked into the
  *	kernel structures and may then be reused. A negative errno code
  *	is returned on a failure.
  *
  * 	After unregistering unregister and down device events are synthesized
  *	for all devices on the device list to the removed notifier to remove
  *	the need for special case cleanup code.
  */
 int unregister_netdevice_notifier(struct notifier_block *nb)
 {
 	struct net_device *dev;
 	struct net *net;
 	int err;
 	rtnl_lock();
 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
 	if (err)
 		goto unlock;
 	for_each_net(net) {
 		for_each_netdev(net, dev) {
 			if (dev->flags & IFF_UP) {
 				call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
 							dev);
 				call_netdevice_notifier(nb, NETDEV_DOWN, dev);
 			}
 			call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
 		}
 	}
 unlock:
 	rtnl_unlock();
 	return err;
 }
 EXPORT_SYMBOL(unregister_netdevice_notifier);
 /**
  *	call_netdevice_notifiers_info - call all network notifier blocks
  *	@val: value passed unmodified to notifier function
  *	@dev: net_device pointer passed unmodified to notifier function
  *	@info: notifier information data
  *
  *	Call all network notifier blocks.  Parameters and return value
  *	are as for raw_notifier_call_chain().
  */
 static int call_netdevice_notifiers_info(unsigned long val,
 					 struct net_device *dev,
 					 struct netdev_notifier_info *info)
 {
 	ASSERT_RTNL();
 	netdev_notifier_info_init(info, dev);
 	return raw_notifier_call_chain(&netdev_chain, val, info);
 }
 /**
  *	call_netdevice_notifiers - call all network notifier blocks
  *      @val: value passed unmodified to notifier function
  *      @dev: net_device pointer passed unmodified to notifier function
  *
  *	Call all network notifier blocks.  Parameters and return value
  *	are as for raw_notifier_call_chain().
  */
 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 {
 	struct netdev_notifier_info info;
 	return call_netdevice_notifiers_info(val, dev, &info);
 }
 EXPORT_SYMBOL(call_netdevice_notifiers);
 static struct static_key netstamp_needed __read_mostly;
 #ifdef HAVE_JUMP_LABEL
 /* We are not allowed to call static_key_slow_dec() from irq context
  * If net_disable_timestamp() is called from irq context, defer the
  * static_key_slow_dec() calls.
  */
 static atomic_t netstamp_needed_deferred;
 #endif
 void net_enable_timestamp(void)
 {
 #ifdef HAVE_JUMP_LABEL
 	int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
 	if (deferred) {
 		while (--deferred)
 			static_key_slow_dec(&netstamp_needed);
 		return;
 	}
 #endif
 	static_key_slow_inc(&netstamp_needed);
 }
 EXPORT_SYMBOL(net_enable_timestamp);
 void net_disable_timestamp(void)
 {
 #ifdef HAVE_JUMP_LABEL
 	if (in_interrupt()) {
 		atomic_inc(&netstamp_needed_deferred);
 		return;
 	}
 #endif
 	static_key_slow_dec(&netstamp_needed);
 }
 EXPORT_SYMBOL(net_disable_timestamp);
 static inline void net_timestamp_set(struct sk_buff *skb)
 {
 	skb->tstamp.tv64 = 0;
 	if (static_key_false(&netstamp_needed))
 		__net_timestamp(skb);
 }
 #define net_timestamp_check(COND, SKB)			\
 	if (static_key_false(&netstamp_needed)) {		\
 		if ((COND) && !(SKB)->tstamp.tv64)	\
 			__net_timestamp(SKB);		\
 	}						\
 static inline bool is_skb_forwardable(struct net_device *dev,
 				      struct sk_buff *skb)
 {
 	unsigned int len;
 	if (!(dev->flags & IFF_UP))
 		return false;
 	len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
 	if (skb->len <= len)
 		return true;
 	/* if TSO is enabled, we don't care about the length as the packet
 	 * could be forwarded without being segmented before
 	 */
 	if (skb_is_gso(skb))
 		return true;
 	return false;
 }
 /**
  * dev_forward_skb - loopback an skb to another netif
  *
  * @dev: destination network device
  * @skb: buffer to forward
  *
  * return values:
  *	NET_RX_SUCCESS	(no congestion)
  *	NET_RX_DROP     (packet was dropped, but freed)
  *
  * dev_forward_skb can be used for injecting an skb from the
  * start_xmit function of one device into the receive queue
  * of another device.
  *
  * The receiving device may be in another namespace, so
  * we have to clear all information in the skb that could
  * impact namespace isolation.
  */
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 {
 	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
 		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
 			atomic_long_inc(&dev->rx_dropped);
 			kfree_skb(skb);
 			return NET_RX_DROP;
 		}
 	}
 	if (unlikely(!is_skb_forwardable(dev, skb))) {
 		atomic_long_inc(&dev->rx_dropped);
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
 	skb_scrub_packet(skb, true);
 	skb->protocol = eth_type_trans(skb, dev);
 	return netif_rx_internal(skb);
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
 static inline int deliver_skb(struct sk_buff *skb,
 			      struct packet_type *pt_prev,
 			      struct net_device *orig_dev)
 {
 	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
 		return -ENOMEM;
 	atomic_inc(&skb->users);
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
 {
 	if (!ptype->af_packet_priv || !skb->sk)
 		return false;
 	if (ptype->id_match)
 		return ptype->id_match(ptype, skb->sk);
 	else if ((struct sock *)ptype->af_packet_priv == skb->sk)
 		return true;
 	return false;
 }
 /*
  *	Support routine. Sends outgoing frames to any network
  *	taps currently in use.
  */
 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct packet_type *ptype;
 	struct sk_buff *skb2 = NULL;
 	struct packet_type *pt_prev = NULL;
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
 		/* Never send packets back to the socket
 		 * they originated from - MvS (miquels@drinkel.ow.org)
 		 */
 		if ((ptype->dev == dev || !ptype->dev) &&
 		    (!skb_loop_sk(ptype, skb))) {
 			if (pt_prev) {
 				deliver_skb(skb2, pt_prev, skb->dev);
 				pt_prev = ptype;
 				continue;
 			}
 			skb2 = skb_clone(skb, GFP_ATOMIC);
 			if (!skb2)
 				break;
 			net_timestamp_set(skb2);
 			/* skb->nh should be correctly
 			   set by sender, so that the second statement is
 			   just protection against buggy protocols.
 			 */
 			skb_reset_mac_header(skb2);
 			if (skb_network_header(skb2) < skb2->data ||
 			    skb_network_header(skb2) > skb_tail_pointer(skb2)) {
 				net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
 						     ntohs(skb2->protocol),
 						     dev->name);
 				skb_reset_network_header(skb2);
 			}
 			skb2->transport_header = skb2->network_header;
 			skb2->pkt_type = PACKET_OUTGOING;
 			pt_prev = ptype;
 		}
 	}
 	if (pt_prev)
 		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
 	rcu_read_unlock();
 }
 /**
  * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
  * @dev: Network device
  * @txq: number of queues available
  *
  * If real_num_tx_queues is changed the tc mappings may no longer be
  * valid. To resolve this verify the tc mapping remains valid and if
  * not NULL the mapping. With no priorities mapping to this
  * offset/count pair it will no longer be used. In the worst case TC0
  * is invalid nothing can be done so disable priority mappings. If is
  * expected that drivers will fix this mapping if they can before
  * calling netif_set_real_num_tx_queues.
  */
 static void netif_setup_tc(struct net_device *dev, unsigned int txq)
 {
 	int i;
 	struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
 	/* If TC0 is invalidated disable TC mapping */
 	if (tc->offset + tc->count > txq) {
 		pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
 		dev->num_tc = 0;
 		return;
 	}
 	/* Invalidated prio to tc mappings set to TC0 */
 	for (i = 1; i < TC_BITMASK + 1; i++) {
 		int q = netdev_get_prio_tc_map(dev, i);
 		tc = &dev->tc_to_txq[q];
 		if (tc->offset + tc->count > txq) {
 			pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
 				i, q);
 			netdev_set_prio_tc_map(dev, i, 0);
 		}
 	}
 }
 #ifdef CONFIG_XPS
 static DEFINE_MUTEX(xps_map_mutex);
 #define xmap_dereference(P)		\
 	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
 static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
 					int cpu, u16 index)
 {
 	struct xps_map *map = NULL;
 	int pos;
 	if (dev_maps)
 		map = xmap_dereference(dev_maps->cpu_map[cpu]);
 	for (pos = 0; map && pos < map->len; pos++) {
 		if (map->queues[pos] == index) {
 			if (map->len > 1) {
 				map->queues[pos] = map->queues[--map->len];
 			} else {
 				RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
 				kfree_rcu(map, rcu);
 				map = NULL;
 			}
 			break;
 		}
 	}
 	return map;
 }
 static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
 {
 	struct xps_dev_maps *dev_maps;
 	int cpu, i;
 	bool active = false;
 	mutex_lock(&xps_map_mutex);
 	dev_maps = xmap_dereference(dev->xps_maps);
 	if (!dev_maps)
 		goto out_no_maps;
 	for_each_possible_cpu(cpu) {
 		for (i = index; i < dev->num_tx_queues; i++) {
 			if (!remove_xps_queue(dev_maps, cpu, i))
 				break;
 		}
 		if (i == dev->num_tx_queues)
 			active = true;
 	}
 	if (!active) {
 		RCU_INIT_POINTER(dev->xps_maps, NULL);
 		kfree_rcu(dev_maps, rcu);
 	}
 	for (i = index; i < dev->num_tx_queues; i++)
 		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
 					     NUMA_NO_NODE);
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
 }
 static struct xps_map *expand_xps_map(struct xps_map *map,
 				      int cpu, u16 index)
 {
 	struct xps_map *new_map;
 	int alloc_len = XPS_MIN_MAP_ALLOC;
 	int i, pos;
 	for (pos = 0; map && pos < map->len; pos++) {
 		if (map->queues[pos] != index)
 			continue;
 		return map;
 	}
 	/* Need to add queue to this CPU's existing map */
 	if (map) {
 		if (pos < map->alloc_len)
 			return map;
 		alloc_len = map->alloc_len * 2;
 	}
 	/* Need to allocate new map to store queue on this CPU's map */
 	new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
 			       cpu_to_node(cpu));
 	if (!new_map)
 		return NULL;
 	for (i = 0; i < pos; i++)
 		new_map->queues[i] = map->queues[i];
 	new_map->alloc_len = alloc_len;
 	new_map->len = pos;
 	return new_map;
 }
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			u16 index)
 {
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
 	struct xps_map *map, *new_map;
 	int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
 	int cpu, numa_node_id = -2;
 	bool active = false;
 	mutex_lock(&xps_map_mutex);
 	dev_maps = xmap_dereference(dev->xps_maps);
 	/* allocate memory for queue storage */
 	for_each_online_cpu(cpu) {
 		if (!cpumask_test_cpu(cpu, mask))
 			continue;
 		if (!new_dev_maps)
 			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
 		if (!new_dev_maps) {
 			mutex_unlock(&xps_map_mutex);
 			return -ENOMEM;
 		}
 		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
 				 NULL;
 		map = expand_xps_map(map, cpu, index);
 		if (!map)
 			goto error;
 		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
 	}
 	if (!new_dev_maps)
 		goto out_no_new_maps;
 	for_each_possible_cpu(cpu) {
 		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
 			/* add queue to CPU maps */
 			int pos = 0;
 			map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
 			while ((pos < map->len) && (map->queues[pos] != index))
 				pos++;
 			if (pos == map->len)
 				map->queues[map->len++] = index;
 #ifdef CONFIG_NUMA
 			if (numa_node_id == -2)
 				numa_node_id = cpu_to_node(cpu);
 			else if (numa_node_id != cpu_to_node(cpu))
 				numa_node_id = -1;
 #endif
 		} else if (dev_maps) {
 			/* fill in the new device map from the old device map */
 			map = xmap_dereference(dev_maps->cpu_map[cpu]);
 			RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
 		}
 	}
 	rcu_assign_pointer(dev->xps_maps, new_dev_maps);
 	/* Cleanup old maps */
 	if (dev_maps) {
 		for_each_possible_cpu(cpu) {
 			new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
 			map = xmap_dereference(dev_maps->cpu_map[cpu]);
 			if (map && map != new_map)
 				kfree_rcu(map, rcu);
 		}
 		kfree_rcu(dev_maps, rcu);
 	}
 	dev_maps = new_dev_maps;
 	active = true;
 out_no_new_maps:
 	/* update Tx queue numa node */
 	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
 				     (numa_node_id >= 0) ? numa_node_id :
 				     NUMA_NO_NODE);
 	if (!dev_maps)
 		goto out_no_maps;
 	/* removes queue from unused CPUs */
 	for_each_possible_cpu(cpu) {
 		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
 			continue;
 		if (remove_xps_queue(dev_maps, cpu, index))
 			active = true;
 	}
 	/* free map if not active */
 	if (!active) {
 		RCU_INIT_POINTER(dev->xps_maps, NULL);
 		kfree_rcu(dev_maps, rcu);
 	}
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
 	return 0;
 error:
 	/* remove any maps that we added */
 	for_each_possible_cpu(cpu) {
 		new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
 		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
 				 NULL;
 		if (new_map && new_map != map)
 			kfree(new_map);
 	}
 	mutex_unlock(&xps_map_mutex);
 	kfree(new_dev_maps);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL(netif_set_xps_queue);
 #endif
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
  */
 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 {
 	int rc;
 	if (txq < 1 || txq > dev->num_tx_queues)
 		return -EINVAL;
 	if (dev->reg_state == NETREG_REGISTERED ||
 	    dev->reg_state == NETREG_UNREGISTERING) {
 		ASSERT_RTNL();
 		rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
 						  txq);
 		if (rc)
 			return rc;
 		if (dev->num_tc)
 			netif_setup_tc(dev, txq);
 		if (txq < dev->real_num_tx_queues) {
 			qdisc_reset_all_tx_gt(dev, txq);
 #ifdef CONFIG_XPS
 			netif_reset_xps_queues_gt(dev, txq);
 #endif
 		}
 	}
 	dev->real_num_tx_queues = txq;
 	return 0;
 }
 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
 #ifdef CONFIG_SYSFS
 /**
  *	netif_set_real_num_rx_queues - set actual number of RX queues used
  *	@dev: Network device
  *	@rxq: Actual number of RX queues
  *
  *	This must be called either with the rtnl_lock held or before
  *	registration of the net device.  Returns 0 on success, or a
  *	negative error code.  If called before registration, it always
  *	succeeds.
  */
 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
 {
 	int rc;
 	if (rxq < 1 || rxq > dev->num_rx_queues)
 		return -EINVAL;
 	if (dev->reg_state == NETREG_REGISTERED) {
 		ASSERT_RTNL();
 		rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
 						  rxq);
 		if (rc)
 			return rc;
 	}
 	dev->real_num_rx_queues = rxq;
 	return 0;
 }
 EXPORT_SYMBOL(netif_set_real_num_rx_queues);
 #endif
 /**
  * netif_get_num_default_rss_queues - default number of RSS queues
  *
  * This routine should set an upper limit on the number of RSS queues
  * used by default by multiqueue devices.
  */
 int netif_get_num_default_rss_queues(void)
 {
 	return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
 }
 EXPORT_SYMBOL(netif_get_num_default_rss_queues);
 static inline void __netif_reschedule(struct Qdisc *q)
 {
 	struct softnet_data *sd;
 	unsigned long flags;
 	local_irq_save(flags);
 	sd = &__get_cpu_var(softnet_data);
 	q->next_sched = NULL;
 	*sd->output_queue_tailp = q;
 	sd->output_queue_tailp = &q->next_sched;
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_restore(flags);
 }
 void __netif_schedule(struct Qdisc *q)
 {
 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
 		__netif_reschedule(q);
 }
 EXPORT_SYMBOL(__netif_schedule);
 struct dev_kfree_skb_cb {
 	enum skb_free_reason reason;
 };
 static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
 {
 	return (struct dev_kfree_skb_cb *)skb->cb;
 }
 void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
 {
 	unsigned long flags;
 	if (likely(atomic_read(&skb->users) == 1)) {
 		smp_rmb();
 		atomic_set(&skb->users, 0);
 	} else if (likely(!atomic_dec_and_test(&skb->users))) {
 		return;
 	}
 	get_kfree_skb_cb(skb)->reason = reason;
 	local_irq_save(flags);
 	skb->next = __this_cpu_read(softnet_data.completion_queue);
 	__this_cpu_write(softnet_data.completion_queue, skb);
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(__dev_kfree_skb_irq);
 void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
 {
 	if (in_irq() || irqs_disabled())
 		__dev_kfree_skb_irq(skb, reason);
 	else
 		dev_kfree_skb(skb);
 }
 EXPORT_SYMBOL(__dev_kfree_skb_any);
 /**
  * netif_device_detach - mark device as removed
  * @dev: network device
  *
  * Mark device as removed from system and therefore no longer available.
  */
 void netif_device_detach(struct net_device *dev)
 {
 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
 	    netif_running(dev)) {
 		netif_tx_stop_all_queues(dev);
 	}
 }
 EXPORT_SYMBOL(netif_device_detach);
 /**
  * netif_device_attach - mark device as attached
  * @dev: network device
  *
  * Mark device as attached from system and restart if needed.
  */
 void netif_device_attach(struct net_device *dev)
 {
 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
 	    netif_running(dev)) {
 		netif_tx_wake_all_queues(dev);
 		__netdev_watchdog_up(dev);
 	}
 }
 EXPORT_SYMBOL(netif_device_attach);
 static void skb_warn_bad_offload(const struct sk_buff *skb)
 {
 	static const netdev_features_t null_features = 0;
 	struct net_device *dev = skb->dev;
 	const char *driver = "";
 	if (!net_ratelimit())
 		return;
 	if (dev && dev->dev.parent)
 		driver = dev_driver_string(dev->dev.parent);
 	WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
 	     "gso_type=%d ip_summed=%d\n",
 	     driver, dev ? &dev->features : &null_features,
 	     skb->sk ? &skb->sk->sk_route_caps : &null_features,
 	     skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
 	     skb_shinfo(skb)->gso_type, skb->ip_summed);
 }
 /*
  * Invalidate hardware checksum when packet is to be mangled, and
  * complete checksum manually on outgoing path.
  */
 int skb_checksum_help(struct sk_buff *skb)
 {
 	__wsum csum;
 	int ret = 0, offset;
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		goto out_set_summed;
 	if (unlikely(skb_shinfo(skb)->gso_size)) {
 		skb_warn_bad_offload(skb);
 		return -EINVAL;
 	}
 	/* Before computing a checksum, we should make sure no frag could
 	 * be modified by an external entity : checksum could be wrong.
 	 */
 	if (skb_has_shared_frag(skb)) {
 		ret = __skb_linearize(skb);
 		if (ret)
 			goto out;
 	}
 	offset = skb_checksum_start_offset(skb);
 	BUG_ON(offset >= skb_headlen(skb));
 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
 	offset += skb->csum_offset;
 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
 	if (skb_cloned(skb) &&
 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 		if (ret)
 			goto out;
 	}
 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
 out_set_summed:
 	skb->ip_summed = CHECKSUM_NONE;
 out:
 	return ret;
 }
 EXPORT_SYMBOL(skb_checksum_help);
 __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
 {
 	__be16 type = skb->protocol;
 	int vlan_depth = skb->mac_len;
 	/* Tunnel gso handlers can set protocol to ethernet. */
 	if (type == htons(ETH_P_TEB)) {
 		struct ethhdr *eth;
 		if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
 			return 0;
 		eth = (struct ethhdr *)skb_mac_header(skb);
 		type = eth->h_proto;
 	}
 	while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
 		struct vlan_hdr *vh;
 		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
 			return 0;
 		vh = (struct vlan_hdr *)(skb->data + vlan_depth);
 		type = vh->h_vlan_encapsulated_proto;
 		vlan_depth += VLAN_HLEN;
 	}
 	*depth = vlan_depth;
 	return type;
 }
 /**
  *	skb_mac_gso_segment - mac layer segmentation handler.
  *	@skb: buffer to segment
  *	@features: features for the output path (see dev->features)
  */
 struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 				    netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_offload *ptype;
 	int vlan_depth = skb->mac_len;
 	__be16 type = skb_network_protocol(skb, &vlan_depth);
 	if (unlikely(!type))
 		return ERR_PTR(-EINVAL);
 	__skb_pull(skb, vlan_depth);
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &offload_base, list) {
 		if (ptype->type == type && ptype->callbacks.gso_segment) {
 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
 				int err;
 				err = ptype->callbacks.gso_send_check(skb);
 				segs = ERR_PTR(err);
 				if (err || skb_gso_ok(skb, features))
 					break;
 				__skb_push(skb, (skb->data -
 						 skb_network_header(skb)));
 			}
 			segs = ptype->callbacks.gso_segment(skb, features);
 			break;
 		}
 	}
 	rcu_read_unlock();
 	__skb_push(skb, skb->data - skb_mac_header(skb));
 	return segs;
 }
 EXPORT_SYMBOL(skb_mac_gso_segment);
 /* openvswitch calls this on rx path, so we need a different check.
  */
 static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
 {
 	if (tx_path)
 		return skb->ip_summed != CHECKSUM_PARTIAL;
 	else
 		return skb->ip_summed == CHECKSUM_NONE;
 }
 /**
  *	__skb_gso_segment - Perform segmentation on skb.
  *	@skb: buffer to segment
  *	@features: features for the output path (see dev->features)
  *	@tx_path: whether it is called in TX path
  *
  *	This function segments the given skb and returns a list of segments.
  *
  *	It may return NULL if the skb requires no segmentation.  This is
  *	only possible when GSO is used for verifying header integrity.
  */
 struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 				  netdev_features_t features, bool tx_path)
 {
 	if (unlikely(skb_needs_check(skb, tx_path))) {
 		int err;
 		skb_warn_bad_offload(skb);
 		if (skb_header_cloned(skb) &&
 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 			return ERR_PTR(err);
 	}
 	SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
 	SKB_GSO_CB(skb)->encap_level = 0;
 	skb_reset_mac_header(skb);
 	skb_reset_mac_len(skb);
 	return skb_mac_gso_segment(skb, features);
 }
 EXPORT_SYMBOL(__skb_gso_segment);
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
 void netdev_rx_csum_fault(struct net_device *dev)
 {
 	if (net_ratelimit()) {
 		pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
 		dump_stack();
 	}
 }
 EXPORT_SYMBOL(netdev_rx_csum_fault);
 #endif
 /* Actually, we should eliminate this check as soon as we know, that:
  * 1. IOMMU is present and allows to map all the memory.
  * 2. No high memory really exists on this machine.
  */
 static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_HIGHMEM
 	int i;
 	if (!(dev->features & NETIF_F_HIGHDMA)) {
 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 			if (PageHighMem(skb_frag_page(frag)))
 				return 1;
 		}
 	}
 	if (PCI_DMA_BUS_IS_PHYS) {
 		struct device *pdev = dev->dev.parent;
 		if (!pdev)
 			return 0;
 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 			dma_addr_t addr = page_to_phys(skb_frag_page(frag));
 			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
 				return 1;
 		}
 	}
 #endif
 	return 0;
 }
 struct dev_gso_cb {
 	void (*destructor)(struct sk_buff *skb);
 };
 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
 static void dev_gso_skb_destructor(struct sk_buff *skb)
 {
 	struct dev_gso_cb *cb;
 	kfree_skb_list(skb->next);
 	skb->next = NULL;
 	cb = DEV_GSO_CB(skb);
 	if (cb->destructor)
 		cb->destructor(skb);
 }
 /**
  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
  *	@skb: buffer to segment
  *	@features: device features as applicable to this skb
  *
  *	This function segments the given skb and stores the list of segments
  *	in skb->next.
  */
 static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
 {
 	struct sk_buff *segs;
 	segs = skb_gso_segment(skb, features);
 	/* Verifying header integrity only. */
 	if (!segs)
 		return 0;
 	if (IS_ERR(segs))
 		return PTR_ERR(segs);
 	skb->next = segs;
 	DEV_GSO_CB(skb)->destructor = skb->destructor;
 	skb->destructor = dev_gso_skb_destructor;
 	return 0;
 }
 static netdev_features_t harmonize_features(struct sk_buff *skb,
 					    const struct net_device *dev,
 					    netdev_features_t features)
 {
 	int tmp;
 	if (skb->ip_summed != CHECKSUM_NONE &&
 	    !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) {
 		features &= ~NETIF_F_ALL_CSUM;
 	} else if (illegal_highdma(dev, skb)) {
 		features &= ~NETIF_F_SG;
 	}
 	return features;
 }
 netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
 					 const struct net_device *dev)
 {
 	__be16 protocol = skb->protocol;
 	netdev_features_t features = dev->features;
 	if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
 		features &= ~NETIF_F_GSO_MASK;
 	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 		protocol = veh->h_vlan_encapsulated_proto;
 	} else if (!vlan_tx_tag_present(skb)) {
 		return harmonize_features(skb, dev, features);
 	}
 	features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
 					       NETIF_F_HW_VLAN_STAG_TX);
 	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
 		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
 				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_STAG_TX;
 	return harmonize_features(skb, dev, features);
 }
 EXPORT_SYMBOL(netif_skb_dev_features);
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int rc = NETDEV_TX_OK;
 	unsigned int skb_len;
 	if (likely(!skb->next)) {
 		netdev_features_t features;
 		/*
 		 * If device doesn't need skb->dst, release it right now while
 		 * its hot in this cpu cache
 		 */
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(skb);
 		features = netif_skb_features(skb);
 		if (vlan_tx_tag_present(skb) &&
 		    !vlan_hw_offload_capable(features, skb->vlan_proto)) {
 			skb = __vlan_put_tag(skb, skb->vlan_proto,
 					     vlan_tx_tag_get(skb));
 			if (unlikely(!skb))
 				goto out;
 			skb->vlan_tci = 0;
 		}
 		/* If encapsulation offload request, verify we are testing
 		 * hardware encapsulation features instead of standard
 		 * features for the netdev
 		 */
 		if (skb->encapsulation)
 			features &= dev->hw_enc_features;
 		if (netif_needs_gso(skb, features)) {
 			if (unlikely(dev_gso_segment(skb, features)))
 				goto out_kfree_skb;
 			if (skb->next)
 				goto gso;
 		} else {
 			if (skb_needs_linearize(skb, features) &&
 			    __skb_linearize(skb))
 				goto out_kfree_skb;
 			/* If packet is not checksummed and device does not
 			 * support checksumming for this protocol, complete
 			 * checksumming here.
 			 */
 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
 				if (skb->encapsulation)
 					skb_set_inner_transport_header(skb,
 						skb_checksum_start_offset(skb));
 				else
 					skb_set_transport_header(skb,
 						skb_checksum_start_offset(skb));
 				if (!(features & NETIF_F_ALL_CSUM) &&
 				     skb_checksum_help(skb))
 					goto out_kfree_skb;
 			}
 		}
 		if (!list_empty(&ptype_all))
 			dev_queue_xmit_nit(skb, dev);
 		skb_len = skb->len;
 		trace_net_dev_start_xmit(skb, dev);
 		rc = ops->ndo_start_xmit(skb, dev);
 		trace_net_dev_xmit(skb, rc, dev, skb_len);
 		if (rc == NETDEV_TX_OK)
 			txq_trans_update(txq);
 		return rc;
 	}
 gso:
 	do {
 		struct sk_buff *nskb = skb->next;
 		skb->next = nskb->next;
 		nskb->next = NULL;
 		if (!list_empty(&ptype_all))
 			dev_queue_xmit_nit(nskb, dev);
 		skb_len = nskb->len;
 		trace_net_dev_start_xmit(nskb, dev);
 		rc = ops->ndo_start_xmit(nskb, dev);
 		trace_net_dev_xmit(nskb, rc, dev, skb_len);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
 				goto out_kfree_gso_skb;
 			nskb->next = skb->next;
 			skb->next = nskb;
 			return rc;
 		}
 		txq_trans_update(txq);
 		if (unlikely(netif_xmit_stopped(txq) && skb->next))
 			return NETDEV_TX_BUSY;
 	} while (skb->next);
 out_kfree_gso_skb:
 	if (likely(skb->next == NULL)) {
 		skb->destructor = DEV_GSO_CB(skb)->destructor;
 		consume_skb(skb);
 		return rc;
 	}
 out_kfree_skb:
 	kfree_skb(skb);
 out:
 	return rc;
 }
 EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
 static void qdisc_pkt_len_init(struct sk_buff *skb)
 {
 	const struct skb_shared_info *shinfo = skb_shinfo(skb);
 	qdisc_skb_cb(skb)->pkt_len = skb->len;
 	/* To get more precise estimation of bytes sent on wire,
 	 * we add to pkt_len the headers size of all segments
 	 */
 	if (shinfo->gso_size)  {
 		unsigned int hdr_len;
 		u16 gso_segs = shinfo->gso_segs;
 		/* mac layer + network layer */
 		hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
 		/* + transport layer */
 		if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
 			hdr_len += tcp_hdrlen(skb);
 		else
 			hdr_len += sizeof(struct udphdr);
 		if (shinfo->gso_type & SKB_GSO_DODGY)
 			gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
 						shinfo->gso_size);
 		qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
 	}
 }
 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 				 struct net_device *dev,
 				 struct netdev_queue *txq)
 {
 	spinlock_t *root_lock = qdisc_lock(q);
 	bool contended;
 	int rc;
 	qdisc_pkt_len_init(skb);
 	qdisc_calculate_pkt_len(skb, q);
 	/*
 	 * Heuristic to force contended enqueues to serialize on a
 	 * separate lock before trying to get qdisc main lock.
 	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
 	 * and dequeue packets faster.
 	 */
 	contended = qdisc_is_running(q);
 	if (unlikely(contended))
 		spin_lock(&q->busylock);
 	spin_lock(root_lock);
 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
 		kfree_skb(skb);
 		rc = NET_XMIT_DROP;
 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
 		   qdisc_run_begin(q)) {
 		/*
 		 * This is a work-conserving queue; there are no old skbs
 		 * waiting to be sent out; and the qdisc is not running -
 		 * xmit the skb directly.
 		 */
 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
 			skb_dst_force(skb);
 		qdisc_bstats_update(q, skb);
 		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
 			if (unlikely(contended)) {
 				spin_unlock(&q->busylock);
 				contended = false;
 			}
 			__qdisc_run(q);
 		} else
 			qdisc_run_end(q);
 		rc = NET_XMIT_SUCCESS;
 	} else {
 		skb_dst_force(skb);
 		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
 		if (qdisc_run_begin(q)) {
 			if (unlikely(contended)) {
 				spin_unlock(&q->busylock);
 				contended = false;
 			}
 			__qdisc_run(q);
 		}
 	}
 	spin_unlock(root_lock);
 	if (unlikely(contended))
 		spin_unlock(&q->busylock);
 	return rc;
 }
 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 static void skb_update_prio(struct sk_buff *skb)
 {
 	struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
 	if (!skb->priority && skb->sk && map) {
 		unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
 		if (prioidx < map->priomap_len)
 			skb->priority = map->priomap[prioidx];
 	}
 }
 #else
 #define skb_update_prio(skb)
 #endif
 static DEFINE_PER_CPU(int, xmit_recursion);
 #define RECURSION_LIMIT 10
 /**
  *	dev_loopback_xmit - loop back @skb
  *	@skb: buffer to transmit
  */
 int dev_loopback_xmit(struct sk_buff *skb)
 {
 	skb_reset_mac_header(skb);
 	__skb_pull(skb, skb_network_offset(skb));
 	skb->pkt_type = PACKET_LOOPBACK;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	WARN_ON(!skb_dst(skb));
 	skb_dst_force(skb);
 	netif_rx_ni(skb);
 	return 0;
 }
 EXPORT_SYMBOL(dev_loopback_xmit);
 /**
  *	__dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
  *	@accel_priv: private data used for L2 forwarding offload
  *
  *	Queue a buffer for transmission to a network device. The caller must
  *	have set the device and priority and built the buffer before calling
  *	this function. The function can be called from an interrupt.
  *
  *	A negative errno code is returned on a failure. A success does not
  *	guarantee the frame will be transmitted as it may be dropped due
  *	to congestion or traffic shaping.
  *
  * -----------------------------------------------------------------------------------
  *      I notice this method can also return errors from the queue disciplines,
  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
  *      be positive.
  *
  *      Regardless of the return value, the skb is consumed, so it is currently
  *      difficult to retry a send to this method.  (You can bump the ref count
  *      before sending to hold a reference for retry if you are careful.)
  *
  *      When calling this method, interrupts MUST be enabled.  This is because
  *      the BH enable code must have IRQs enabled so that it will not deadlock.
  *          --BLG
  */
 static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 {
 	struct net_device *dev = skb->dev;
 	struct netdev_queue *txq;
 	struct Qdisc *q;
 	int rc = -ENOMEM;
 	skb_reset_mac_header(skb);
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
 	 */
 	rcu_read_lock_bh();
 	skb_update_prio(skb);
 	txq = netdev_pick_tx(dev, skb, accel_priv);
 	q = rcu_dereference_bh(txq->qdisc);
 #ifdef CONFIG_NET_CLS_ACT
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
 #endif
 	trace_net_dev_queue(skb);
 	if (q->enqueue) {
 		rc = __dev_xmit_skb(skb, q, dev, txq);
 		goto out;
 	}
 	/* The device has no queue. Common case for software devices:
 	   loopback, all the sorts of tunnels...
 	   Really, it is unlikely that netif_tx_lock protection is necessary
 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
 	   counters.)
 	   However, it is possible, that they rely on protection
 	   made by us here.
 	   Check this and shot the lock. It is not prone from deadlocks.
 	   Either shot noqueue qdisc, it is even simpler 8)
 	 */
 	if (dev->flags & IFF_UP) {
 		int cpu = smp_processor_id(); /* ok because BHs are off */
 		if (txq->xmit_lock_owner != cpu) {
 			if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
 				goto recursion_alert;
 			HARD_TX_LOCK(dev, txq, cpu);
 			if (!netif_xmit_stopped(txq)) {
 				__this_cpu_inc(xmit_recursion);
 				rc = dev_hard_start_xmit(skb, dev, txq);
 				__this_cpu_dec(xmit_recursion);
 				if (dev_xmit_complete(rc)) {
 					HARD_TX_UNLOCK(dev, txq);
 					goto out;
 				}
 			}
 			HARD_TX_UNLOCK(dev, txq);
 			net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
 					     dev->name);
 		} else {
 			/* Recursion is detected! It is possible,
 			 * unfortunately
 			 */
 recursion_alert:
 			net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
 					     dev->name);
 		}
 	}
 	rc = -ENETDOWN;
 	rcu_read_unlock_bh();
 	kfree_skb(skb);
 	return rc;
 out:
 	rcu_read_unlock_bh();
 	return rc;
 }
 int dev_queue_xmit(struct sk_buff *skb)
 {
 	return __dev_queue_xmit(skb, NULL);
 }
 EXPORT_SYMBOL(dev_queue_xmit);
 int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
 {
 	return __dev_queue_xmit(skb, accel_priv);
 }
 EXPORT_SYMBOL(dev_queue_xmit_accel);
 /*=======================================================================
 			Receiver routines
   =======================================================================*/
 int netdev_max_backlog __read_mostly = 1000;
 EXPORT_SYMBOL(netdev_max_backlog);
 int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
 /* Called with irq disabled */
 static inline void ____napi_schedule(struct softnet_data *sd,
 				     struct napi_struct *napi)
 {
 	list_add_tail(&napi->poll_list, &sd->poll_list);
 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 }
 #ifdef CONFIG_RPS
 /* One global table that all flow-based protocols share. */
 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
 struct static_key rps_needed __read_mostly;
 static struct rps_dev_flow *
 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	    struct rps_dev_flow *rflow, u16 next_cpu)
 {
 	if (next_cpu != RPS_NO_CPU) {
 #ifdef CONFIG_RFS_ACCEL
 		struct netdev_rx_queue *rxqueue;
 		struct rps_dev_flow_table *flow_table;
 		struct rps_dev_flow *old_rflow;
 		u32 flow_id;
 		u16 rxq_index;
 		int rc;
 		/* Should we steer this flow to a different hardware queue? */
 		if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
 		    !(dev->features & NETIF_F_NTUPLE))
 			goto out;
 		rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
 		if (rxq_index == skb_get_rx_queue(skb))
 			goto out;
 		rxqueue = dev->_rx + rxq_index;
 		flow_table = rcu_dereference(rxqueue->rps_flow_table);
 		if (!flow_table)
 			goto out;
 		flow_id = skb->rxhash & flow_table->mask;
 		rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
 							rxq_index, flow_id);
 		if (rc < 0)
 			goto out;
 		old_rflow = rflow;
 		rflow = &flow_table->flows[flow_id];
 		rflow->filter = rc;
 		if (old_rflow->filter == rflow->filter)
 			old_rflow->filter = RPS_NO_FILTER;
 	out:
 #endif
 		rflow->last_qtail =
 			per_cpu(softnet_data, next_cpu).input_queue_head;
 	}
 	rflow->cpu = next_cpu;
 	return rflow;
 }
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
  * rcu_read_lock must be held on entry.
  */
 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		       struct rps_dev_flow **rflowp)
 {
 	struct netdev_rx_queue *rxqueue;
 	struct rps_map *map;
 	struct rps_dev_flow_table *flow_table;
 	struct rps_sock_flow_table *sock_flow_table;
 	int cpu = -1;
 	u16 tcpu;
 	if (skb_rx_queue_recorded(skb)) {
 		u16 index = skb_get_rx_queue(skb);
 		if (unlikely(index >= dev->real_num_rx_queues)) {
 			WARN_ONCE(dev->real_num_rx_queues > 1,
 				  "%s received packet on queue %u, but number "
 				  "of RX queues is %u\n",
 				  dev->name, index, dev->real_num_rx_queues);
 			goto done;
 		}
 		rxqueue = dev->_rx + index;
 	} else
 		rxqueue = dev->_rx;
 	map = rcu_dereference(rxqueue->rps_map);
 	if (map) {
 		if (map->len == 1 &&
 		    !rcu_access_pointer(rxqueue->rps_flow_table)) {
 			tcpu = map->cpus[0];
 			if (cpu_online(tcpu))
 				cpu = tcpu;
 			goto done;
 		}
 	} else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
 		goto done;
 	}
 	skb_reset_network_header(skb);
 	if (!skb_get_hash(skb))
 		goto done;
 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
 	if (flow_table && sock_flow_table) {
 		u16 next_cpu;
 		struct rps_dev_flow *rflow;
 		rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
 		tcpu = rflow->cpu;
 		next_cpu = sock_flow_table->ents[skb->rxhash &
 		    sock_flow_table->mask];
 		/*
 		 * If the desired CPU (where last recvmsg was done) is
 		 * different from current CPU (one in the rx-queue flow
 		 * table entry), switch if one of the following holds:
 		 *   - Current CPU is unset (equal to RPS_NO_CPU).
 		 *   - Current CPU is offline.
 		 *   - The current CPU's queue tail has advanced beyond the
 		 *     last packet that was enqueued using this table entry.
 		 *     This guarantees that all previous packets for the flow
 		 *     have been dequeued, thus preserving in order delivery.
 		 */
 		if (unlikely(tcpu != next_cpu) &&
 		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
 		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
 		      rflow->last_qtail)) >= 0)) {
 			tcpu = next_cpu;
 			rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
 		}
 		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
 			*rflowp = rflow;
 			cpu = tcpu;
 			goto done;
 		}
 	}
 	if (map) {
 		tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
 		if (cpu_online(tcpu)) {
 			cpu = tcpu;
 			goto done;
 		}
 	}
 done:
 	return cpu;
 }
 #ifdef CONFIG_RFS_ACCEL
 /**
  * rps_may_expire_flow - check whether an RFS hardware filter may be removed
  * @dev: Device on which the filter was set
  * @rxq_index: RX queue index
  * @flow_id: Flow ID passed to ndo_rx_flow_steer()
  * @filter_id: Filter ID returned by ndo_rx_flow_steer()
  *
  * Drivers that implement ndo_rx_flow_steer() should periodically call
  * this function for each installed filter and remove the filters for
  * which it returns %true.
  */
 bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
 			 u32 flow_id, u16 filter_id)
 {
 	struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
 	struct rps_dev_flow_table *flow_table;
 	struct rps_dev_flow *rflow;
 	bool expire = true;
 	int cpu;
 	rcu_read_lock();
 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
 	if (flow_table && flow_id <= flow_table->mask) {
 		rflow = &flow_table->flows[flow_id];
 		cpu = ACCESS_ONCE(rflow->cpu);
 		if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
 		    ((int)(per_cpu(softnet_data, cpu).input_queue_head -
 			   rflow->last_qtail) <
 		     (int)(10 * flow_table->mask)))
 			expire = false;
 	}
 	rcu_read_unlock();
 	return expire;
 }
 EXPORT_SYMBOL(rps_may_expire_flow);
 #endif /* CONFIG_RFS_ACCEL */
 /* Called from hardirq (IPI) context */
 static void rps_trigger_softirq(void *data)
 {
 	struct softnet_data *sd = data;
 	____napi_schedule(sd, &sd->backlog);
 	sd->received_rps++;
 }
 #endif /* CONFIG_RPS */
 /*
  * Check if this softnet_data structure is another cpu one
  * If yes, queue it to our IPI list and return 1
  * If no, return 0
  */
 static int rps_ipi_queued(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
 	struct softnet_data *mysd = &__get_cpu_var(softnet_data);
 	if (sd != mysd) {
 		sd->rps_ipi_next = mysd->rps_ipi_list;
 		mysd->rps_ipi_list = sd;
 		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 		return 1;
 	}
 #endif /* CONFIG_RPS */
 	return 0;
 }
 #ifdef CONFIG_NET_FLOW_LIMIT
 int netdev_flow_limit_table_len __read_mostly = (1 << 12);
 #endif
 static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
 {
 #ifdef CONFIG_NET_FLOW_LIMIT
 	struct sd_flow_limit *fl;
 	struct softnet_data *sd;
 	unsigned int old_flow, new_flow;
 	if (qlen < (netdev_max_backlog >> 1))
 		return false;
 	sd = &__get_cpu_var(softnet_data);
 	rcu_read_lock();
 	fl = rcu_dereference(sd->flow_limit);
 	if (fl) {
 		new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
 		old_flow = fl->history[fl->history_head];
 		fl->history[fl->history_head] = new_flow;
 		fl->history_head++;
 		fl->history_head &= FLOW_LIMIT_HISTORY - 1;
 		if (likely(fl->buckets[old_flow]))
 			fl->buckets[old_flow]--;
 		if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
 			fl->count++;
 			rcu_read_unlock();
 			return true;
 		}
 	}
 	rcu_read_unlock();
 #endif
 	return false;
 }
 /*
  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
  * queue (may be a remote CPU queue).
  */
 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 			      unsigned int *qtail)
 {
 	struct softnet_data *sd;
 	unsigned long flags;
 	unsigned int qlen;
 	sd = &per_cpu(softnet_data, cpu);
 	local_irq_save(flags);
 	rps_lock(sd);
 	qlen = skb_queue_len(&sd->input_pkt_queue);
 	if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
 		if (skb_queue_len(&sd->input_pkt_queue)) {
 enqueue:
 			__skb_queue_tail(&sd->input_pkt_queue, skb);
 			input_queue_tail_incr_save(sd, qtail);
 			rps_unlock(sd);
 			local_irq_restore(flags);
 			return NET_RX_SUCCESS;
 		}
 		/* Schedule NAPI for backlog device
 		 * We can use non atomic operation since we own the queue lock
 		 */
 		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
 			if (!rps_ipi_queued(sd))
 				____napi_schedule(sd, &sd->backlog);
 		}
 		goto enqueue;
 	}
 	sd->dropped++;
 	rps_unlock(sd);
 	local_irq_restore(flags);
 	atomic_long_inc(&skb->dev->rx_dropped);
 	kfree_skb(skb);
 	return NET_RX_DROP;
 }
 static int netif_rx_internal(struct sk_buff *skb)
 {
 	int ret;
 	/* if netpoll wants it, pretend we never saw it */
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 	net_timestamp_check(netdev_tstamp_prequeue, skb);
 	trace_netif_rx(skb);
 #ifdef CONFIG_RPS
 	if (static_key_false(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu;
 		preempt_disable();
 		rcu_read_lock();
 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
 		if (cpu < 0)
 			cpu = smp_processor_id();
 		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 		rcu_read_unlock();
 		preempt_enable();
 	} else
 #endif
 	{
 		unsigned int qtail;
 		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
 		put_cpu();
 	}
 	return ret;
 }
 /**
  *	netif_rx	-	post buffer to the network code
  *	@skb: buffer to post
  *
  *	This function receives a packet from a device driver and queues it for
  *	the upper (protocol) levels to process.  It always succeeds. The buffer
  *	may be dropped during processing for congestion control or by the
  *	protocol layers.
  *
  *	return values:
  *	NET_RX_SUCCESS	(no congestion)
  *	NET_RX_DROP     (packet was dropped)
  *
  */
 int netif_rx(struct sk_buff *skb)
 {
 	trace_netif_rx_entry(skb);
 	return netif_rx_internal(skb);
 }
 EXPORT_SYMBOL(netif_rx);
 int netif_rx_ni(struct sk_buff *skb)
 {
 	int err;
 	trace_netif_rx_ni_entry(skb);
 	preempt_disable();
 	err = netif_rx_internal(skb);
 	if (local_softirq_pending())
 		do_softirq();
 	preempt_enable();
 	return err;
 }
 EXPORT_SYMBOL(netif_rx_ni);
 static void net_tx_action(struct softirq_action *h)
 {
 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 	if (sd->completion_queue) {
 		struct sk_buff *clist;
 		local_irq_disable();
 		clist = sd->completion_queue;
 		sd->completion_queue = NULL;
 		local_irq_enable();
 		while (clist) {
 			struct sk_buff *skb = clist;
 			clist = clist->next;
 			WARN_ON(atomic_read(&skb->users));
 			if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
 				trace_consume_skb(skb);
 			else
 				trace_kfree_skb(skb, net_tx_action);
 			__kfree_skb(skb);
 		}
 	}
 	if (sd->output_queue) {
 		struct Qdisc *head;
 		local_irq_disable();
 		head = sd->output_queue;
 		sd->output_queue = NULL;
 		sd->output_queue_tailp = &sd->output_queue;
 		local_irq_enable();
 		while (head) {
 			struct Qdisc *q = head;
 			spinlock_t *root_lock;
 			head = head->next_sched;
 			root_lock = qdisc_lock(q);
 			if (spin_trylock(root_lock)) {
 				smp_mb__before_clear_bit();
 				clear_bit(__QDISC_STATE_SCHED,
 					  &q->state);
 				qdisc_run(q);
 				spin_unlock(root_lock);
 			} else {
 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
 					      &q->state)) {
 					__netif_reschedule(q);
 				} else {
 					smp_mb__before_clear_bit();
 					clear_bit(__QDISC_STATE_SCHED,
 						  &q->state);
 				}
 			}
 		}
 	}
 }
 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
     (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 #ifdef CONFIG_NET_CLS_ACT
 /* TODO: Maybe we should just force sch_ingress to be compiled in
  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
  * a compare and 2 stores extra right now if we dont have it on
  * but have CONFIG_NET_CLS_ACT
  * NOTE: This doesn't stop any functionality; if you dont have
  * the ingress scheduler, you just can't add policies on ingress.
  *
  */
 static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
 {
 	struct net_device *dev = skb->dev;
 	u32 ttl = G_TC_RTTL(skb->tc_verd);
 	int result = TC_ACT_OK;
 	struct Qdisc *q;
 	if (unlikely(MAX_RED_LOOP < ttl++)) {
 		net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
 				     skb->skb_iif, dev->ifindex);
 		return TC_ACT_SHOT;
 	}
 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
 	q = rxq->qdisc;
 	if (q != &noop_qdisc) {
 		spin_lock(qdisc_lock(q));
 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
 			result = qdisc_enqueue_root(skb, q);
 		spin_unlock(qdisc_lock(q));
 	}
 	return result;
 }
 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 					 struct packet_type **pt_prev,
 					 int *ret, struct net_device *orig_dev)
 {
 	struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
 	if (!rxq || rxq->qdisc == &noop_qdisc)
 		goto out;
 	if (*pt_prev) {
 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
 		*pt_prev = NULL;
 	}
 	switch (ing_filter(skb, rxq)) {
 	case TC_ACT_SHOT:
 	case TC_ACT_STOLEN:
 		kfree_skb(skb);
 		return NULL;
 	}
 out:
 	skb->tc_verd = 0;
 	return skb;
 }
 #endif
 /**
  *	netdev_rx_handler_register - register receive handler
  *	@dev: device to register a handler for
  *	@rx_handler: receive handler to register
  *	@rx_handler_data: data pointer that is used by rx handler
  *
  *	Register a receive hander for a device. This handler will then be
  *	called from __netif_receive_skb. A negative errno code is returned
  *	on a failure.
  *
  *	The caller must hold the rtnl_mutex.
  *
  *	For a general description of rx_handler, see enum rx_handler_result.
  */
 int netdev_rx_handler_register(struct net_device *dev,
 			       rx_handler_func_t *rx_handler,
 			       void *rx_handler_data)
 {
 	ASSERT_RTNL();
 	if (dev->rx_handler)
 		return -EBUSY;
 	/* Note: rx_handler_data must be set before rx_handler */
 	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
 	rcu_assign_pointer(dev->rx_handler, rx_handler);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
 /**
  *	netdev_rx_handler_unregister - unregister receive handler
  *	@dev: device to unregister a handler from
  *
  *	Unregister a receive handler from a device.
  *
  *	The caller must hold the rtnl_mutex.
  */
 void netdev_rx_handler_unregister(struct net_device *dev)
 {
 	ASSERT_RTNL();
 	RCU_INIT_POINTER(dev->rx_handler, NULL);
 	/* a reader seeing a non NULL rx_handler in a rcu_read_lock()
 	 * section has a guarantee to see a non NULL rx_handler_data
 	 * as well.
 	 */
 	synchronize_net();
 	RCU_INIT_POINTER(dev->rx_handler_data, NULL);
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
 /*
  * Limit the use of PFMEMALLOC reserves to those protocols that implement
  * the special handling of PFMEMALLOC skbs.
  */
 static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
 {
 	switch (skb->protocol) {
 	case __constant_htons(ETH_P_ARP):
 	case __constant_htons(ETH_P_IP):
 	case __constant_htons(ETH_P_IPV6):
 	case __constant_htons(ETH_P_8021Q):
 	case __constant_htons(ETH_P_8021AD):
 		return true;
 	default:
 		return false;
 	}
 }
 static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 {
 	struct packet_type *ptype, *pt_prev;
 	rx_handler_func_t *rx_handler;
 	struct net_device *orig_dev;
 	struct net_device *null_or_dev;
 	bool deliver_exact = false;
 	int ret = NET_RX_DROP;
 	__be16 type;
 	net_timestamp_check(!netdev_tstamp_prequeue, skb);
 	trace_netif_receive_skb(skb);
 	/* if we've gotten here through NAPI, check netpoll */
 	if (netpoll_receive_skb(skb))
 		goto out;
 	orig_dev = skb->dev;
 	skb_reset_network_header(skb);
 	if (!skb_transport_header_was_set(skb))
 		skb_reset_transport_header(skb);
 	skb_reset_mac_len(skb);
 	pt_prev = NULL;
 	rcu_read_lock();
 another_round:
 	skb->skb_iif = skb->dev->ifindex;
 	__this_cpu_inc(softnet_data.processed);
 	if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
 	    skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
 		skb = vlan_untag(skb);
 		if (unlikely(!skb))
 			goto unlock;
 	}
 #ifdef CONFIG_NET_CLS_ACT
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
 		goto ncls;
 	}
 #endif
 	if (pfmemalloc)
 		goto skip_taps;
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
 		if (!ptype->dev || ptype->dev == skb->dev) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
 		}
 	}
 skip_taps:
 #ifdef CONFIG_NET_CLS_ACT
 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
 	if (!skb)
 		goto unlock;
 ncls:
 #endif
 	if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
 		goto drop;
 	if (vlan_tx_tag_present(skb)) {
 		if (pt_prev) {
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
 		if (vlan_do_receive(&skb))
 			goto another_round;
 		else if (unlikely(!skb))
 			goto unlock;
 	}
 	rx_handler = rcu_dereference(skb->dev->rx_handler);
 	if (rx_handler) {
 		if (pt_prev) {
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
 		switch (rx_handler(&skb)) {
 		case RX_HANDLER_CONSUMED:
 			ret = NET_RX_SUCCESS;
 			goto unlock;
 		case RX_HANDLER_ANOTHER:
 			goto another_round;
 		case RX_HANDLER_EXACT:
 			deliver_exact = true;
 		case RX_HANDLER_PASS:
 			break;
 		default:
 			BUG();
 		}
 	}
 	if (unlikely(vlan_tx_tag_present(skb))) {
 		if (vlan_tx_tag_get_id(skb))
 			skb->pkt_type = PACKET_OTHERHOST;
 		/* Note: we might in the future use prio bits
 		 * and set skb->priority like in vlan_do_receive()
 		 * For the time being, just ignore Priority Code Point
 		 */
 		skb->vlan_tci = 0;
 	}
 	/* deliver only exact match when indicated */
 	null_or_dev = deliver_exact ? skb->dev : NULL;
 	type = skb->protocol;
 	list_for_each_entry_rcu(ptype,
 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
 		if (ptype->type == type &&
 		    (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
 		     ptype->dev == orig_dev)) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
 		}
 	}
 	if (pt_prev) {
 		if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
 			goto drop;
 		else
 			ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 	} else {
 drop:
 		atomic_long_inc(&skb->dev->rx_dropped);
 		kfree_skb(skb);
 		/* Jamal, now you will not able to escape explaining
 		 * me how you were going to use this. :-)
 		 */
 		ret = NET_RX_DROP;
 	}
 unlock:
 	rcu_read_unlock();
 out:
 	return ret;
 }
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	int ret;
 	if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
 		unsigned long pflags = current->flags;
 		/*
 		 * PFMEMALLOC skbs are special, they should
 		 * - be delivered to SOCK_MEMALLOC sockets only
 		 * - stay away from userspace
 		 * - have bounded memory usage
 		 *
 		 * Use PF_MEMALLOC as this saves us from propagating the allocation
 		 * context down to all allocation sites.
 		 */
 		current->flags |= PF_MEMALLOC;
 		ret = __netif_receive_skb_core(skb, true);
 		tsk_restore_flags(current, pflags, PF_MEMALLOC);
 	} else
 		ret = __netif_receive_skb_core(skb, false);
 	return ret;
 }
 static int netif_receive_skb_internal(struct sk_buff *skb)
 {
 	net_timestamp_check(netdev_tstamp_prequeue, skb);
 	if (skb_defer_rx_timestamp(skb))
 		return NET_RX_SUCCESS;
 #ifdef CONFIG_RPS
 	if (static_key_false(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu, ret;
 		rcu_read_lock();
 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
 		if (cpu >= 0) {
 			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 			rcu_read_unlock();
 			return ret;
 		}
 		rcu_read_unlock();
 	}
 #endif
 	return __netif_receive_skb(skb);
 }
 /**
  *	netif_receive_skb - process receive buffer from network
  *	@skb: buffer to process
  *
  *	netif_receive_skb() is the main receive data processing function.
  *	It always succeeds. The buffer may be dropped during processing
  *	for congestion control or by the protocol layers.
  *
  *	This function may only be called from softirq context and interrupts
  *	should be enabled.
  *
  *	Return values (usually ignored):
  *	NET_RX_SUCCESS: no congestion
  *	NET_RX_DROP: packet was dropped
  */
 int netif_receive_skb(struct sk_buff *skb)
 {
 	trace_netif_receive_skb_entry(skb);
 	return netif_receive_skb_internal(skb);
 }
 EXPORT_SYMBOL(netif_receive_skb);
 /* Network device is going away, flush any packets still pending
  * Called with irqs disabled.
  */
 static void flush_backlog(void *arg)
 {
 	struct net_device *dev = arg;
 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 	struct sk_buff *skb, *tmp;
 	rps_lock(sd);
 	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &sd->input_pkt_queue);
 			kfree_skb(skb);
 			input_queue_head_incr(sd);
 		}
 	}
 	rps_unlock(sd);
 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &sd->process_queue);
 			kfree_skb(skb);
 			input_queue_head_incr(sd);
 		}
 	}
 }
 static int napi_gro_complete(struct sk_buff *skb)
 {
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
 	struct list_head *head = &offload_base;
 	int err = -ENOENT;
 	BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
 	if (NAPI_GRO_CB(skb)->count == 1) {
 		skb_shinfo(skb)->gso_size = 0;
 		goto out;
 	}
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
 		if (ptype->type != type || !ptype->callbacks.gro_complete)
 			continue;
 		err = ptype->callbacks.gro_complete(skb, 0);
 		break;
 	}
 	rcu_read_unlock();
 	if (err) {
 		WARN_ON(&ptype->list == head);
 		kfree_skb(skb);
 		return NET_RX_SUCCESS;
 	}
 out:
 	return netif_receive_skb_internal(skb);
 }
 /* napi->gro_list contains packets ordered by age.
  * youngest packets at the head of it.
  * Complete skbs in reverse order to reduce latencies.
  */
 void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 {
 	struct sk_buff *skb, *prev = NULL;
 	/* scan list and build reverse chain */
 	for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
 		skb->prev = prev;
 		prev = skb;
 	}
 	for (skb = prev; skb; skb = prev) {
 		skb->next = NULL;
 		if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
 			return;
 		prev = skb->prev;
 		napi_gro_complete(skb);
 		napi->gro_count--;
 	}
 	napi->gro_list = NULL;
 }
 EXPORT_SYMBOL(napi_gro_flush);
 static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff *p;
 	unsigned int maclen = skb->dev->hard_header_len;
 	u32 hash = skb_get_hash_raw(skb);
 	for (p = napi->gro_list; p; p = p->next) {
 		unsigned long diffs;
 		NAPI_GRO_CB(p)->flush = 0;
 		if (hash != skb_get_hash_raw(p)) {
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
 		}
 		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
 		diffs |= p->vlan_tci ^ skb->vlan_tci;
 		if (maclen == ETH_HLEN)
 			diffs |= compare_ether_header(skb_mac_header(p),
 						      skb_gro_mac_header(skb));
 		else if (!diffs)
 			diffs = memcmp(skb_mac_header(p),
 				       skb_gro_mac_header(skb),
 				       maclen);
 		NAPI_GRO_CB(p)->same_flow = !diffs;
 	}
 }
 static void skb_gro_reset_offset(struct sk_buff *skb)
 {
 	const struct skb_shared_info *pinfo = skb_shinfo(skb);
 	const skb_frag_t *frag0 = &pinfo->frags[0];
 	NAPI_GRO_CB(skb)->data_offset = 0;
 	NAPI_GRO_CB(skb)->frag0 = NULL;
 	NAPI_GRO_CB(skb)->frag0_len = 0;
 	if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
 	    pinfo->nr_frags &&
 	    !PageHighMem(skb_frag_page(frag0))) {
 		NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
 		NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
 	}
 }
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff **pp = NULL;
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
 	struct list_head *head = &offload_base;
 	int same_flow;
 	enum gro_result ret;
 	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
 		goto normal;
 	if (skb_is_gso(skb) || skb_has_frag_list(skb))
 		goto normal;
 	skb_gro_reset_offset(skb);
 	gro_list_prepare(napi, skb);
 	NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
 		if (ptype->type != type || !ptype->callbacks.gro_receive)
 			continue;
 		skb_set_network_header(skb, skb_gro_offset(skb));
 		skb_reset_mac_len(skb);
 		NAPI_GRO_CB(skb)->same_flow = 0;
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
 		NAPI_GRO_CB(skb)->udp_mark = 0;
 		pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
 		break;
 	}
 	rcu_read_unlock();
 	if (&ptype->list == head)
 		goto normal;
 	same_flow = NAPI_GRO_CB(skb)->same_flow;
 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
 	if (pp) {
 		struct sk_buff *nskb = *pp;
 		*pp = nskb->next;
 		nskb->next = NULL;
 		napi_gro_complete(nskb);
 		napi->gro_count--;
 	}
 	if (same_flow)
 		goto ok;
 	if (NAPI_GRO_CB(skb)->flush)
 		goto normal;
 	if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
 		struct sk_buff *nskb = napi->gro_list;
 		/* locate the end of the list to select the 'oldest' flow */
 		while (nskb->next) {
 			pp = &nskb->next;
 			nskb = *pp;
 		}
 		*pp = NULL;
 		nskb->next = NULL;
 		napi_gro_complete(nskb);
 	} else {
 		napi->gro_count++;
 	}
 	NAPI_GRO_CB(skb)->count = 1;
 	NAPI_GRO_CB(skb)->age = jiffies;
+	NAPI_GRO_CB(skb)->last = skb;
 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
 	skb->next = napi->gro_list;
 	napi->gro_list = skb;
 	ret = GRO_HELD;
 pull:
 	if (skb_headlen(skb) < skb_gro_offset(skb)) {
 		int grow = skb_gro_offset(skb) - skb_headlen(skb);
 		BUG_ON(skb->end - skb->tail < grow);
 		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
 		skb->tail += grow;
 		skb->data_len -= grow;
 		skb_shinfo(skb)->frags[0].page_offset += grow;
 		skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
 		if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
 			skb_frag_unref(skb, 0);
 			memmove(skb_shinfo(skb)->frags,
 				skb_shinfo(skb)->frags + 1,
 				--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
 		}
 	}
 ok:
 	return ret;
 normal:
 	ret = GRO_NORMAL;
 	goto pull;
 }
 struct packet_offload *gro_find_receive_by_type(__be16 type)
 {
 	struct list_head *offload_head = &offload_base;
 	struct packet_offload *ptype;
 	list_for_each_entry_rcu(ptype, offload_head, list) {
 		if (ptype->type != type || !ptype->callbacks.gro_receive)
 			continue;
 		return ptype;
 	}
 	return NULL;
 }
 EXPORT_SYMBOL(gro_find_receive_by_type);
 struct packet_offload *gro_find_complete_by_type(__be16 type)
 {
 	struct list_head *offload_head = &offload_base;
 	struct packet_offload *ptype;
 	list_for_each_entry_rcu(ptype, offload_head, list) {
 		if (ptype->type != type || !ptype->callbacks.gro_complete)
 			continue;
 		return ptype;
 	}
 	return NULL;
 }
 EXPORT_SYMBOL(gro_find_complete_by_type);
 static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 {
 	switch (ret) {
 	case GRO_NORMAL:
 		if (netif_receive_skb_internal(skb))
 			ret = GRO_DROP;
 		break;
 	case GRO_DROP:
 		kfree_skb(skb);
 		break;
 	case GRO_MERGED_FREE:
 		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
 			kmem_cache_free(skbuff_head_cache, skb);
 		else
 			__kfree_skb(skb);
 		break;
 	case GRO_HELD:
 	case GRO_MERGED:
 		break;
 	}
 	return ret;
 }
 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	trace_napi_gro_receive_entry(skb);
 	return napi_skb_finish(dev_gro_receive(napi, skb), skb);
 }
 EXPORT_SYMBOL(napi_gro_receive);
 static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 {
 	__skb_pull(skb, skb_headlen(skb));
 	/* restore the reserve we had after netdev_alloc_skb_ip_align() */
 	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
 	skb->vlan_tci = 0;
 	skb->dev = napi->dev;
 	skb->skb_iif = 0;
 	napi->skb = skb;
 }
 struct sk_buff *napi_get_frags(struct napi_struct *napi)
 {
 	struct sk_buff *skb = napi->skb;
 	if (!skb) {
 		skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
 		napi->skb = skb;
 	}
 	return skb;
 }
 EXPORT_SYMBOL(napi_get_frags);
 static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
 			       gro_result_t ret)
 {
 	switch (ret) {
 	case GRO_NORMAL:
 		if (netif_receive_skb_internal(skb))
 			ret = GRO_DROP;
 		break;
 	case GRO_DROP:
 	case GRO_MERGED_FREE:
 		napi_reuse_skb(napi, skb);
 		break;
 	case GRO_HELD:
 	case GRO_MERGED:
 		break;
 	}
 	return ret;
 }
 static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
 {
 	struct sk_buff *skb = napi->skb;
 	napi->skb = NULL;
 	if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) {
 		napi_reuse_skb(napi, skb);
 		return NULL;
 	}
 	skb->protocol = eth_type_trans(skb, skb->dev);
 	return skb;
 }
 gro_result_t napi_gro_frags(struct napi_struct *napi)
 {
 	struct sk_buff *skb = napi_frags_skb(napi);
 	if (!skb)
 		return GRO_DROP;
 	trace_napi_gro_frags_entry(skb);
 	return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
 }
 EXPORT_SYMBOL(napi_gro_frags);
 /*
  * net_rps_action_and_irq_enable sends any pending IPI's for rps.
  * Note: called with local irq disabled, but exits with local irq enabled.
  */
 static void net_rps_action_and_irq_enable(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
 	struct softnet_data *remsd = sd->rps_ipi_list;
 	if (remsd) {
 		sd->rps_ipi_list = NULL;
 		local_irq_enable();
 		/* Send pending IPI's to kick RPS processing on remote cpus. */
 		while (remsd) {
 			struct softnet_data *next = remsd->rps_ipi_next;
 			if (cpu_online(remsd->cpu))
 				__smp_call_function_single(remsd->cpu,
 							   &remsd->csd, 0);
 			remsd = next;
 		}
 	} else
 #endif
 		local_irq_enable();
 }
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
 	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
 #ifdef CONFIG_RPS
 	/* Check if we have pending ipi, its better to send them now,
 	 * not waiting net_rx_action() end.
 	 */
 	if (sd->rps_ipi_list) {
 		local_irq_disable();
 		net_rps_action_and_irq_enable(sd);
 	}
 #endif
 	napi->weight = weight_p;
 	local_irq_disable();
 	while (work < quota) {
 		struct sk_buff *skb;
 		unsigned int qlen;
 		while ((skb = __skb_dequeue(&sd->process_queue))) {
 			local_irq_enable();
 			__netif_receive_skb(skb);
 			local_irq_disable();
 			input_queue_head_incr(sd);
 			if (++work >= quota) {
 				local_irq_enable();
 				return work;
 			}
 		}
 		rps_lock(sd);
 		qlen = skb_queue_len(&sd->input_pkt_queue);
 		if (qlen)
 			skb_queue_splice_tail_init(&sd->input_pkt_queue,
 						   &sd->process_queue);
 		if (qlen < quota - work) {
 			/*
 			 * Inline a custom version of __napi_complete().
 			 * only current cpu owns and manipulates this napi,
 			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
 			 * we can use a plain write instead of clear_bit(),
 			 * and we dont need an smp_mb() memory barrier.
 			 */
 			list_del(&napi->poll_list);
 			napi->state = 0;
 			quota = work + qlen;
 		}
 		rps_unlock(sd);
 	}
 	local_irq_enable();
 	return work;
 }
 /**
  * __napi_schedule - schedule for receive
  * @n: entry to schedule
  *
  * The entry's receive function will be scheduled to run
  */
 void __napi_schedule(struct napi_struct *n)
 {
 	unsigned long flags;
 	local_irq_save(flags);
 	____napi_schedule(&__get_cpu_var(softnet_data), n);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(__napi_schedule);
 void __napi_complete(struct napi_struct *n)
 {
 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
 	BUG_ON(n->gro_list);
 	list_del(&n->poll_list);
 	smp_mb__before_clear_bit();
 	clear_bit(NAPI_STATE_SCHED, &n->state);
 }
 EXPORT_SYMBOL(__napi_complete);
 void napi_complete(struct napi_struct *n)
 {
 	unsigned long flags;
 	/*
 	 * don't let napi dequeue from the cpu poll list
 	 * just in case its running on a different cpu
 	 */
 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
 		return;
 	napi_gro_flush(n, false);
 	local_irq_save(flags);
 	__napi_complete(n);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(napi_complete);
 /* must be called under rcu_read_lock(), as we dont take a reference */
 struct napi_struct *napi_by_id(unsigned int napi_id)
 {
 	unsigned int hash = napi_id % HASH_SIZE(napi_hash);
 	struct napi_struct *napi;
 	hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
 		if (napi->napi_id == napi_id)
 			return napi;
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(napi_by_id);
 void napi_hash_add(struct napi_struct *napi)
 {
 	if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
 		spin_lock(&napi_hash_lock);
 		/* 0 is not a valid id, we also skip an id that is taken
 		 * we expect both events to be extremely rare
 		 */
 		napi->napi_id = 0;
 		while (!napi->napi_id) {
 			napi->napi_id = ++napi_gen_id;
 			if (napi_by_id(napi->napi_id))
 				napi->napi_id = 0;
 		}
 		hlist_add_head_rcu(&napi->napi_hash_node,
 			&napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
 		spin_unlock(&napi_hash_lock);
 	}
 }
 EXPORT_SYMBOL_GPL(napi_hash_add);
 /* Warning : caller is responsible to make sure rcu grace period
  * is respected before freeing memory containing @napi
  */
 void napi_hash_del(struct napi_struct *napi)
 {
 	spin_lock(&napi_hash_lock);
 	if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
 		hlist_del_rcu(&napi->napi_hash_node);
 	spin_unlock(&napi_hash_lock);
 }
 EXPORT_SYMBOL_GPL(napi_hash_del);
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 		    int (*poll)(struct napi_struct *, int), int weight)
 {
 	INIT_LIST_HEAD(&napi->poll_list);
 	napi->gro_count = 0;
 	napi->gro_list = NULL;
 	napi->skb = NULL;
 	napi->poll = poll;
 	if (weight > NAPI_POLL_WEIGHT)
 		pr_err_once("netif_napi_add() called with weight %d on device %s\n",
 			    weight, dev->name);
 	napi->weight = weight;
 	list_add(&napi->dev_list, &dev->napi_list);
 	napi->dev = dev;
 #ifdef CONFIG_NETPOLL
 	spin_lock_init(&napi->poll_lock);
 	napi->poll_owner = -1;
 #endif
 	set_bit(NAPI_STATE_SCHED, &napi->state);
 }
 EXPORT_SYMBOL(netif_napi_add);
 void netif_napi_del(struct napi_struct *napi)
 {
 	list_del_init(&napi->dev_list);
 	napi_free_frags(napi);
 	kfree_skb_list(napi->gro_list);
 	napi->gro_list = NULL;
 	napi->gro_count = 0;
 }
 EXPORT_SYMBOL(netif_napi_del);
 static void net_rx_action(struct softirq_action *h)
 {
 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
 	local_irq_disable();
 	while (!list_empty(&sd->poll_list)) {
 		struct napi_struct *n;
 		int work, weight;
 		/* If softirq window is exhuasted then punt.
 		 * Allow this to run for 2 jiffies since which will allow
 		 * an average latency of 1.5/HZ.
 		 */
 		if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
 			goto softnet_break;
 		local_irq_enable();
 		/* Even though interrupts have been re-enabled, this
 		 * access is safe because interrupts can only add new
 		 * entries to the tail of this list, and only ->poll()
 		 * calls can remove this head entry from the list.
 		 */
 		n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
 		have = netpoll_poll_lock(n);
 		weight = n->weight;
 		/* This NAPI_STATE_SCHED test is for avoiding a race
 		 * with netpoll's poll_napi().  Only the entity which
 		 * obtains the lock and sees NAPI_STATE_SCHED set will
 		 * actually make the ->poll() call.  Therefore we avoid
 		 * accidentally calling ->poll() when NAPI is not scheduled.
 		 */
 		work = 0;
 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
 			work = n->poll(n, weight);
 			trace_napi_poll(n);
 		}
 		WARN_ON_ONCE(work > weight);
 		budget -= work;
 		local_irq_disable();
 		/* Drivers must not modify the NAPI state if they
 		 * consume the entire weight.  In such cases this code
 		 * still "owns" the NAPI instance and therefore can
 		 * move the instance around on the list at-will.
 		 */
 		if (unlikely(work == weight)) {
 			if (unlikely(napi_disable_pending(n))) {
 				local_irq_enable();
 				napi_complete(n);
 				local_irq_disable();
 			} else {
 				if (n->gro_list) {
 					/* flush too old packets
 					 * If HZ < 1000, flush all packets.
 					 */
 					local_irq_enable();
 					napi_gro_flush(n, HZ >= 1000);
 					local_irq_disable();
 				}
 				list_move_tail(&n->poll_list, &sd->poll_list);
 			}
 		}
 		netpoll_poll_unlock(have);
 	}
 out:
 	net_rps_action_and_irq_enable(sd);
 #ifdef CONFIG_NET_DMA
 	/*
 	 * There may not be any more sk_buffs coming right now, so push
 	 * any pending DMA copies to hardware
 	 */
 	dma_issue_pending_all();
 #endif
 	return;
 softnet_break:
 	sd->time_squeeze++;
 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 	goto out;
 }
 struct netdev_adjacent {
 	struct net_device *dev;
 	/* upper master flag, there can only be one master device per list */
 	bool master;
 	/* counter for the number of times this device was added to us */
 	u16 ref_nr;
 	/* private field for the users */
 	void *private;
 	struct list_head list;
 	struct rcu_head rcu;
 };
 static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
 						 struct net_device *adj_dev,
 						 struct list_head *adj_list)
 {
 	struct netdev_adjacent *adj;
 	list_for_each_entry(adj, adj_list, list) {
 		if (adj->dev == adj_dev)
 			return adj;
 	}
 	return NULL;
 }
 /**
  * netdev_has_upper_dev - Check if device is linked to an upper device
  * @dev: device
  * @upper_dev: upper device to check
  *
  * Find out if a device is linked to specified upper device and return true
  * in case it is. Note that this checks only immediate upper device,
  * not through a complete stack of devices. The caller must hold the RTNL lock.
  */
 bool netdev_has_upper_dev(struct net_device *dev,
 			  struct net_device *upper_dev)
 {
 	ASSERT_RTNL();
 	return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
 }
 EXPORT_SYMBOL(netdev_has_upper_dev);
 /**
  * netdev_has_any_upper_dev - Check if device is linked to some device
  * @dev: device
  *
  * Find out if a device is linked to an upper device and return true in case
  * it is. The caller must hold the RTNL lock.
  */
 static bool netdev_has_any_upper_dev(struct net_device *dev)
 {
 	ASSERT_RTNL();
 	return !list_empty(&dev->all_adj_list.upper);
 }
 /**
  * netdev_master_upper_dev_get - Get master upper device
  * @dev: device
  *
  * Find a master upper device and return pointer to it or NULL in case
  * it's not there. The caller must hold the RTNL lock.
  */
 struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
 {
 	struct netdev_adjacent *upper;
 	ASSERT_RTNL();
 	if (list_empty(&dev->adj_list.upper))
 		return NULL;
 	upper = list_first_entry(&dev->adj_list.upper,
 				 struct netdev_adjacent, list);
 	if (likely(upper->master))
 		return upper->dev;
 	return NULL;
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_get);
 void *netdev_adjacent_get_private(struct list_head *adj_list)
 {
 	struct netdev_adjacent *adj;
 	adj = list_entry(adj_list, struct netdev_adjacent, list);
 	return adj->private;
 }
 EXPORT_SYMBOL(netdev_adjacent_get_private);
 /**
  * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
  * @dev: device
  * @iter: list_head ** of the current position
  *
  * Gets the next device from the dev's upper list, starting from iter
  * position. The caller must hold RCU read lock.
  */
 struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
 						     struct list_head **iter)
 {
 	struct netdev_adjacent *upper;
 	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
 	upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
 	if (&upper->list == &dev->all_adj_list.upper)
 		return NULL;
 	*iter = &upper->list;
 	return upper->dev;
 }
 EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
 /**
  * netdev_lower_get_next_private - Get the next ->private from the
  *				   lower neighbour list
  * @dev: device
  * @iter: list_head ** of the current position
  *
  * Gets the next netdev_adjacent->private from the dev's lower neighbour
  * list, starting from iter position. The caller must hold either hold the
  * RTNL lock or its own locking that guarantees that the neighbour lower
  * list will remain unchainged.
  */
 void *netdev_lower_get_next_private(struct net_device *dev,
 				    struct list_head **iter)
 {
 	struct netdev_adjacent *lower;
 	lower = list_entry(*iter, struct netdev_adjacent, list);
 	if (&lower->list == &dev->adj_list.lower)
 		return NULL;
 	if (iter)
 		*iter = lower->list.next;
 	return lower->private;
 }
 EXPORT_SYMBOL(netdev_lower_get_next_private);
 /**
  * netdev_lower_get_next_private_rcu - Get the next ->private from the
  *				       lower neighbour list, RCU
  *				       variant
  * @dev: device
  * @iter: list_head ** of the current position
  *
  * Gets the next netdev_adjacent->private from the dev's lower neighbour
  * list, starting from iter position. The caller must hold RCU read lock.
  */
 void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 					struct list_head **iter)
 {
 	struct netdev_adjacent *lower;
 	WARN_ON_ONCE(!rcu_read_lock_held());
 	lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
 	if (&lower->list == &dev->adj_list.lower)
 		return NULL;
 	if (iter)
 		*iter = &lower->list;
 	return lower->private;
 }
 EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
 /**
  * netdev_lower_get_next - Get the next device from the lower neighbour
  *                         list
  * @dev: device
  * @iter: list_head ** of the current position
  *
  * Gets the next netdev_adjacent from the dev's lower neighbour
  * list, starting from iter position. The caller must hold RTNL lock or
  * its own locking that guarantees that the neighbour lower
  * list will remain unchainged.
  */
 void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
 {
 	struct netdev_adjacent *lower;
 	lower = list_entry((*iter)->next, struct netdev_adjacent, list);
 	if (&lower->list == &dev->adj_list.lower)
 		return NULL;
 	*iter = &lower->list;
 	return lower->dev;
 }
 EXPORT_SYMBOL(netdev_lower_get_next);
 /**
  * netdev_lower_get_first_private_rcu - Get the first ->private from the
  *				       lower neighbour list, RCU
  *				       variant
  * @dev: device
  *
  * Gets the first netdev_adjacent->private from the dev's lower neighbour
  * list. The caller must hold RCU read lock.
  */
 void *netdev_lower_get_first_private_rcu(struct net_device *dev)
 {
 	struct netdev_adjacent *lower;
 	lower = list_first_or_null_rcu(&dev->adj_list.lower,
 			struct netdev_adjacent, list);
 	if (lower)
 		return lower->private;
 	return NULL;
 }
 EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
 /**
  * netdev_master_upper_dev_get_rcu - Get master upper device
  * @dev: device
  *
  * Find a master upper device and return pointer to it or NULL in case
  * it's not there. The caller must hold the RCU read lock.
  */
 struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
 {
 	struct netdev_adjacent *upper;
 	upper = list_first_or_null_rcu(&dev->adj_list.upper,
 				       struct netdev_adjacent, list);
 	if (upper && likely(upper->master))
 		return upper->dev;
 	return NULL;
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
 static int netdev_adjacent_sysfs_add(struct net_device *dev,
 			      struct net_device *adj_dev,
 			      struct list_head *dev_list)
 {
 	char linkname[IFNAMSIZ+7];
 	sprintf(linkname, dev_list == &dev->adj_list.upper ?
 		"upper_%s" : "lower_%s", adj_dev->name);
 	return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
 				 linkname);
 }
 static void netdev_adjacent_sysfs_del(struct net_device *dev,
 			       char *name,
 			       struct list_head *dev_list)
 {
 	char linkname[IFNAMSIZ+7];
 	sprintf(linkname, dev_list == &dev->adj_list.upper ?
 		"upper_%s" : "lower_%s", name);
 	sysfs_remove_link(&(dev->dev.kobj), linkname);
 }
 #define netdev_adjacent_is_neigh_list(dev, dev_list) \
 		(dev_list == &dev->adj_list.upper || \
 		 dev_list == &dev->adj_list.lower)
 static int __netdev_adjacent_dev_insert(struct net_device *dev,
 					struct net_device *adj_dev,
 					struct list_head *dev_list,
 					void *private, bool master)
 {
 	struct netdev_adjacent *adj;
 	int ret;
 	adj = __netdev_find_adj(dev, adj_dev, dev_list);
 	if (adj) {
 		adj->ref_nr++;
 		return 0;
 	}
 	adj = kmalloc(sizeof(*adj), GFP_KERNEL);
 	if (!adj)
 		return -ENOMEM;
 	adj->dev = adj_dev;
 	adj->master = master;
 	adj->ref_nr = 1;
 	adj->private = private;
 	dev_hold(adj_dev);
 	pr_debug("dev_hold for %s, because of link added from %s to %s\n",
 		 adj_dev->name, dev->name, adj_dev->name);
 	if (netdev_adjacent_is_neigh_list(dev, dev_list)) {
 		ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
 		if (ret)
 			goto free_adj;
 	}
 	/* Ensure that master link is always the first item in list. */
 	if (master) {
 		ret = sysfs_create_link(&(dev->dev.kobj),
 					&(adj_dev->dev.kobj), "master");
 		if (ret)
 			goto remove_symlinks;
 		list_add_rcu(&adj->list, dev_list);
 	} else {
 		list_add_tail_rcu(&adj->list, dev_list);
 	}
 	return 0;
 remove_symlinks:
 	if (netdev_adjacent_is_neigh_list(dev, dev_list))
 		netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
 free_adj:
 	kfree(adj);
 	dev_put(adj_dev);
 	return ret;
 }
 static void __netdev_adjacent_dev_remove(struct net_device *dev,
 					 struct net_device *adj_dev,
 					 struct list_head *dev_list)
 {
 	struct netdev_adjacent *adj;
 	adj = __netdev_find_adj(dev, adj_dev, dev_list);
 	if (!adj) {
 		pr_err("tried to remove device %s from %s\n",
 		       dev->name, adj_dev->name);
 		BUG();
 	}
 	if (adj->ref_nr > 1) {
 		pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
 			 adj->ref_nr-1);
 		adj->ref_nr--;
 		return;
 	}
 	if (adj->master)
 		sysfs_remove_link(&(dev->dev.kobj), "master");
 	if (netdev_adjacent_is_neigh_list(dev, dev_list))
 		netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
 	list_del_rcu(&adj->list);
 	pr_debug("dev_put for %s, because link removed from %s to %s\n",
 		 adj_dev->name, dev->name, adj_dev->name);
 	dev_put(adj_dev);
 	kfree_rcu(adj, rcu);
 }
 static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
 					    struct net_device *upper_dev,
 					    struct list_head *up_list,
 					    struct list_head *down_list,
 					    void *private, bool master)
 {
 	int ret;
 	ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
 					   master);
 	if (ret)
 		return ret;
 	ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
 					   false);
 	if (ret) {
 		__netdev_adjacent_dev_remove(dev, upper_dev, up_list);
 		return ret;
 	}
 	return 0;
 }
 static int __netdev_adjacent_dev_link(struct net_device *dev,
 				      struct net_device *upper_dev)
 {
 	return __netdev_adjacent_dev_link_lists(dev, upper_dev,
 						&dev->all_adj_list.upper,
 						&upper_dev->all_adj_list.lower,
 						NULL, false);
 }
 static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
 					       struct net_device *upper_dev,
 					       struct list_head *up_list,
 					       struct list_head *down_list)
 {
 	__netdev_adjacent_dev_remove(dev, upper_dev, up_list);
 	__netdev_adjacent_dev_remove(upper_dev, dev, down_list);
 }
 static void __netdev_adjacent_dev_unlink(struct net_device *dev,
 					 struct net_device *upper_dev)
 {
 	__netdev_adjacent_dev_unlink_lists(dev, upper_dev,
 					   &dev->all_adj_list.upper,
 					   &upper_dev->all_adj_list.lower);
 }
 static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
 						struct net_device *upper_dev,
 						void *private, bool master)
 {
 	int ret = __netdev_adjacent_dev_link(dev, upper_dev);
 	if (ret)
 		return ret;
 	ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
 					       &dev->adj_list.upper,
 					       &upper_dev->adj_list.lower,
 					       private, master);
 	if (ret) {
 		__netdev_adjacent_dev_unlink(dev, upper_dev);
 		return ret;
 	}
 	return 0;
 }
 static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
 						   struct net_device *upper_dev)
 {
 	__netdev_adjacent_dev_unlink(dev, upper_dev);
 	__netdev_adjacent_dev_unlink_lists(dev, upper_dev,
 					   &dev->adj_list.upper,
 					   &upper_dev->adj_list.lower);
 }
 static int __netdev_upper_dev_link(struct net_device *dev,
 				   struct net_device *upper_dev, bool master,
 				   void *private)
 {
 	struct netdev_adjacent *i, *j, *to_i, *to_j;
 	int ret = 0;
 	ASSERT_RTNL();
 	if (dev == upper_dev)
 		return -EBUSY;
 	/* To prevent loops, check if dev is not upper device to upper_dev. */
 	if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
 		return -EBUSY;
 	if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))
 		return -EEXIST;
 	if (master && netdev_master_upper_dev_get(dev))
 		return -EBUSY;
 	ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
 						   master);
 	if (ret)
 		return ret;
 	/* Now that we linked these devs, make all the upper_dev's
 	 * all_adj_list.upper visible to every dev's all_adj_list.lower an
 	 * versa, and don't forget the devices itself. All of these
 	 * links are non-neighbours.
 	 */
 	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
 		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
 			pr_debug("Interlinking %s with %s, non-neighbour\n",
 				 i->dev->name, j->dev->name);
 			ret = __netdev_adjacent_dev_link(i->dev, j->dev);
 			if (ret)
 				goto rollback_mesh;
 		}
 	}
 	/* add dev to every upper_dev's upper device */
 	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
 		pr_debug("linking %s's upper device %s with %s\n",
 			 upper_dev->name, i->dev->name, dev->name);
 		ret = __netdev_adjacent_dev_link(dev, i->dev);
 		if (ret)
 			goto rollback_upper_mesh;
 	}
 	/* add upper_dev to every dev's lower device */
 	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
 		pr_debug("linking %s's lower device %s with %s\n", dev->name,
 			 i->dev->name, upper_dev->name);
 		ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
 		if (ret)
 			goto rollback_lower_mesh;
 	}
 	call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
 	return 0;
 rollback_lower_mesh:
 	to_i = i;
 	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
 		if (i == to_i)
 			break;
 		__netdev_adjacent_dev_unlink(i->dev, upper_dev);
 	}
 	i = NULL;
 rollback_upper_mesh:
 	to_i = i;
 	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
 		if (i == to_i)
 			break;
 		__netdev_adjacent_dev_unlink(dev, i->dev);
 	}
 	i = j = NULL;
 rollback_mesh:
 	to_i = i;
 	to_j = j;
 	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
 		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
 			if (i == to_i && j == to_j)
 				break;
 			__netdev_adjacent_dev_unlink(i->dev, j->dev);
 		}
 		if (i == to_i)
 			break;
 	}
 	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
 	return ret;
 }
 /**
  * netdev_upper_dev_link - Add a link to the upper device
  * @dev: device
  * @upper_dev: new upper device
  *
  * Adds a link to device which is upper to this one. The caller must hold
  * the RTNL lock. On a failure a negative errno code is returned.
  * On success the reference counts are adjusted and the function
  * returns zero.
  */
 int netdev_upper_dev_link(struct net_device *dev,
 			  struct net_device *upper_dev)
 {
 	return __netdev_upper_dev_link(dev, upper_dev, false, NULL);
 }
 EXPORT_SYMBOL(netdev_upper_dev_link);
 /**
  * netdev_master_upper_dev_link - Add a master link to the upper device
  * @dev: device
  * @upper_dev: new upper device
  *
  * Adds a link to device which is upper to this one. In this case, only
  * one master upper device can be linked, although other non-master devices
  * might be linked as well. The caller must hold the RTNL lock.
  * On a failure a negative errno code is returned. On success the reference
  * counts are adjusted and the function returns zero.
  */
 int netdev_master_upper_dev_link(struct net_device *dev,
 				 struct net_device *upper_dev)
 {
 	return __netdev_upper_dev_link(dev, upper_dev, true, NULL);
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_link);
 int netdev_master_upper_dev_link_private(struct net_device *dev,
 					 struct net_device *upper_dev,
 					 void *private)
 {
 	return __netdev_upper_dev_link(dev, upper_dev, true, private);
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
 /**
  * netdev_upper_dev_unlink - Removes a link to upper device
  * @dev: device
  * @upper_dev: new upper device
  *
  * Removes a link to device which is upper to this one. The caller must hold
  * the RTNL lock.
  */
 void netdev_upper_dev_unlink(struct net_device *dev,
 			     struct net_device *upper_dev)
 {
 	struct netdev_adjacent *i, *j;
 	ASSERT_RTNL();
 	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
 	/* Here is the tricky part. We must remove all dev's lower
 	 * devices from all upper_dev's upper devices and vice
 	 * versa, to maintain the graph relationship.
 	 */
 	list_for_each_entry(i, &dev->all_adj_list.lower, list)
 		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
 			__netdev_adjacent_dev_unlink(i->dev, j->dev);
 	/* remove also the devices itself from lower/upper device
 	 * list
 	 */
 	list_for_each_entry(i, &dev->all_adj_list.lower, list)
 		__netdev_adjacent_dev_unlink(i->dev, upper_dev);
 	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
 		__netdev_adjacent_dev_unlink(dev, i->dev);
 	call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
 {
 	struct netdev_adjacent *iter;
 	list_for_each_entry(iter, &dev->adj_list.upper, list) {
 		netdev_adjacent_sysfs_del(iter->dev, oldname,
 					  &iter->dev->adj_list.lower);
 		netdev_adjacent_sysfs_add(iter->dev, dev,
 					  &iter->dev->adj_list.lower);
 	}
 	list_for_each_entry(iter, &dev->adj_list.lower, list) {
 		netdev_adjacent_sysfs_del(iter->dev, oldname,
 					  &iter->dev->adj_list.upper);
 		netdev_adjacent_sysfs_add(iter->dev, dev,
 					  &iter->dev->adj_list.upper);
 	}
 }
 void *netdev_lower_dev_get_private(struct net_device *dev,
 				   struct net_device *lower_dev)
 {
 	struct netdev_adjacent *lower;
 	if (!lower_dev)
 		return NULL;
 	lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
 	if (!lower)
 		return NULL;
 	return lower->private;
 }
 EXPORT_SYMBOL(netdev_lower_dev_get_private);
 int dev_get_nest_level(struct net_device *dev,
 		       bool (*type_check)(struct net_device *dev))
 {
 	struct net_device *lower = NULL;
 	struct list_head *iter;
 	int max_nest = -1;
 	int nest;
 	ASSERT_RTNL();
 	netdev_for_each_lower_dev(dev, lower, iter) {
 		nest = dev_get_nest_level(lower, type_check);
 		if (max_nest < nest)
 			max_nest = nest;
 	}
 	if (type_check(dev))
 		max_nest++;
 	return max_nest;
 }
 EXPORT_SYMBOL(dev_get_nest_level);
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	if (ops->ndo_change_rx_flags)
 		ops->ndo_change_rx_flags(dev, flags);
 }
 static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
 {
 	unsigned int old_flags = dev->flags;
 	kuid_t uid;
 	kgid_t gid;
 	ASSERT_RTNL();
 	dev->flags |= IFF_PROMISC;
 	dev->promiscuity += inc;
 	if (dev->promiscuity == 0) {
 		/*
 		 * Avoid overflow.
 		 * If inc causes overflow, untouch promisc and return error.
 		 */
 		if (inc < 0)
 			dev->flags &= ~IFF_PROMISC;
 		else {
 			dev->promiscuity -= inc;
 			pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
 				dev->name);
 			return -EOVERFLOW;
 		}
 	}
 	if (dev->flags != old_flags) {
 		pr_info("device %s %s promiscuous mode\n",
 			dev->name,
 			dev->flags & IFF_PROMISC ? "entered" : "left");
 		if (audit_enabled) {
 			current_uid_gid(&uid, &gid);
 			audit_log(current->audit_context, GFP_ATOMIC,
 				AUDIT_ANOM_PROMISCUOUS,
 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
 				dev->name, (dev->flags & IFF_PROMISC),
 				(old_flags & IFF_PROMISC),
 				from_kuid(&init_user_ns, audit_get_loginuid(current)),
 				from_kuid(&init_user_ns, uid),
 				from_kgid(&init_user_ns, gid),
 				audit_get_sessionid(current));
 		}
 		dev_change_rx_flags(dev, IFF_PROMISC);
 	}
 	if (notify)
 		__dev_notify_flags(dev, old_flags, IFF_PROMISC);
 	return 0;
 }
 /**
  *	dev_set_promiscuity	- update promiscuity count on a device
  *	@dev: device
  *	@inc: modifier
  *
  *	Add or remove promiscuity from a device. While the count in the device
  *	remains above zero the interface remains promiscuous. Once it hits zero
  *	the device reverts back to normal filtering operation. A negative inc
  *	value is used to drop promiscuity on the device.
  *	Return 0 if successful or a negative errno code on error.
  */
 int dev_set_promiscuity(struct net_device *dev, int inc)
 {
 	unsigned int old_flags = dev->flags;
 	int err;
 	err = __dev_set_promiscuity(dev, inc, true);
 	if (err < 0)
 		return err;
 	if (dev->flags != old_flags)
 		dev_set_rx_mode(dev);
 	return err;
 }
 EXPORT_SYMBOL(dev_set_promiscuity);
 static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
 {
 	unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
 	ASSERT_RTNL();
 	dev->flags |= IFF_ALLMULTI;
 	dev->allmulti += inc;
 	if (dev->allmulti == 0) {
 		/*
 		 * Avoid overflow.
 		 * If inc causes overflow, untouch allmulti and return error.
 		 */
 		if (inc < 0)
 			dev->flags &= ~IFF_ALLMULTI;
 		else {
 			dev->allmulti -= inc;
 			pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
 				dev->name);
 			return -EOVERFLOW;
 		}
 	}
 	if (dev->flags ^ old_flags) {
 		dev_change_rx_flags(dev, IFF_ALLMULTI);
 		dev_set_rx_mode(dev);
 		if (notify)
 			__dev_notify_flags(dev, old_flags,
 					   dev->gflags ^ old_gflags);
 	}
 	return 0;
 }
 /**
  *	dev_set_allmulti	- update allmulti count on a device
  *	@dev: device
  *	@inc: modifier
  *
  *	Add or remove reception of all multicast frames to a device. While the
  *	count in the device remains above zero the interface remains listening
  *	to all interfaces. Once it hits zero the device reverts back to normal
  *	filtering operation. A negative @inc value is used to drop the counter
  *	when releasing a resource needing all multicasts.
  *	Return 0 if successful or a negative errno code on error.
  */
 int dev_set_allmulti(struct net_device *dev, int inc)
 {
 	return __dev_set_allmulti(dev, inc, true);
 }
 EXPORT_SYMBOL(dev_set_allmulti);
 /*
  *	Upload unicast and multicast address lists to device and
  *	configure RX filtering. When the device doesn't support unicast
  *	filtering it is put in promiscuous mode while unicast addresses
  *	are present.
  */
 void __dev_set_rx_mode(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	/* dev_open will call this function so the list will stay sane. */
 	if (!(dev->flags&IFF_UP))
 		return;
 	if (!netif_device_present(dev))
 		return;
 	if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
 		/* Unicast addresses changes may only happen under the rtnl,
 		 * therefore calling __dev_set_promiscuity here is safe.
 		 */
 		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
 			__dev_set_promiscuity(dev, 1, false);
 			dev->uc_promisc = true;
 		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
 			__dev_set_promiscuity(dev, -1, false);
 			dev->uc_promisc = false;
 		}
 	}
 	if (ops->ndo_set_rx_mode)
 		ops->ndo_set_rx_mode(dev);
 }
 void dev_set_rx_mode(struct net_device *dev)
 {
 	netif_addr_lock_bh(dev);
 	__dev_set_rx_mode(dev);
 	netif_addr_unlock_bh(dev);
 }
 /**
  *	dev_get_flags - get flags reported to userspace
  *	@dev: device
  *
  *	Get the combination of flag bits exported through APIs to userspace.
  */
 unsigned int dev_get_flags(const struct net_device *dev)
 {
 	unsigned int flags;
 	flags = (dev->flags & ~(IFF_PROMISC |
 				IFF_ALLMULTI |
 				IFF_RUNNING |
 				IFF_LOWER_UP |
 				IFF_DORMANT)) |
 		(dev->gflags & (IFF_PROMISC |
 				IFF_ALLMULTI));
 	if (netif_running(dev)) {
 		if (netif_oper_up(dev))
 			flags |= IFF_RUNNING;
 		if (netif_carrier_ok(dev))
 			flags |= IFF_LOWER_UP;
 		if (netif_dormant(dev))
 			flags |= IFF_DORMANT;
 	}
 	return flags;
 }
 EXPORT_SYMBOL(dev_get_flags);
 int __dev_change_flags(struct net_device *dev, unsigned int flags)
 {
 	unsigned int old_flags = dev->flags;
 	int ret;
 	ASSERT_RTNL();
 	/*
 	 *	Set the flags on our device.
 	 */
 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
 			       IFF_AUTOMEDIA)) |
 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
 				    IFF_ALLMULTI));
 	/*
 	 *	Load in the correct multicast list now the flags have changed.
 	 */
 	if ((old_flags ^ flags) & IFF_MULTICAST)
 		dev_change_rx_flags(dev, IFF_MULTICAST);
 	dev_set_rx_mode(dev);
 	/*
 	 *	Have we downed the interface. We handle IFF_UP ourselves
 	 *	according to user attempts to set it, rather than blindly
 	 *	setting it.
 	 */
 	ret = 0;
 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
 		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
 		if (!ret)
 			dev_set_rx_mode(dev);
 	}
 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
 		unsigned int old_flags = dev->flags;
 		dev->gflags ^= IFF_PROMISC;
 		if (__dev_set_promiscuity(dev, inc, false) >= 0)
 			if (dev->flags != old_flags)
 				dev_set_rx_mode(dev);
 	}
 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
 	   is important. Some (broken) drivers set IFF_PROMISC, when
 	   IFF_ALLMULTI is requested not asking us and not reporting.
 	 */
 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
 		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
 		dev->gflags ^= IFF_ALLMULTI;
 		__dev_set_allmulti(dev, inc, false);
 	}
 	return ret;
 }
 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
 			unsigned int gchanges)
 {
 	unsigned int changes = dev->flags ^ old_flags;
 	if (gchanges)
 		rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
 	if (changes & IFF_UP) {
 		if (dev->flags & IFF_UP)
 			call_netdevice_notifiers(NETDEV_UP, dev);
 		else
 			call_netdevice_notifiers(NETDEV_DOWN, dev);
 	}
 	if (dev->flags & IFF_UP &&
 	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
 		struct netdev_notifier_change_info change_info;
 		change_info.flags_changed = changes;
 		call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
 					      &change_info.info);
 	}
 }
 /**
  *	dev_change_flags - change device settings
  *	@dev: device
  *	@flags: device state flags
  *
  *	Change settings on device based state flags. The flags are
  *	in the userspace exported format.
  */
 int dev_change_flags(struct net_device *dev, unsigned int flags)
 {
 	int ret;
 	unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
 	ret = __dev_change_flags(dev, flags);
 	if (ret < 0)
 		return ret;
 	changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
 	__dev_notify_flags(dev, old_flags, changes);
 	return ret;
 }
 EXPORT_SYMBOL(dev_change_flags);
 static int __dev_set_mtu(struct net_device *dev, int new_mtu)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	if (ops->ndo_change_mtu)
 		return ops->ndo_change_mtu(dev, new_mtu);
 	dev->mtu = new_mtu;
 	return 0;
 }
 /**
  *	dev_set_mtu - Change maximum transfer unit
  *	@dev: device
  *	@new_mtu: new transfer unit
  *
  *	Change the maximum transfer size of the network device.
  */
 int dev_set_mtu(struct net_device *dev, int new_mtu)
 {
 	int err, orig_mtu;
 	if (new_mtu == dev->mtu)
 		return 0;
 	/*	MTU must be positive.	 */
 	if (new_mtu < 0)
 		return -EINVAL;
 	if (!netif_device_present(dev))
 		return -ENODEV;
 	err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
 	err = notifier_to_errno(err);
 	if (err)
 		return err;
 	orig_mtu = dev->mtu;
 	err = __dev_set_mtu(dev, new_mtu);
 	if (!err) {
 		err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
 		err = notifier_to_errno(err);
 		if (err) {
 			/* setting mtu back and notifying everyone again,
 			 * so that they have a chance to revert changes.
 			 */
 			__dev_set_mtu(dev, orig_mtu);
 			call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
 		}
 	}
 	return err;
 }
 EXPORT_SYMBOL(dev_set_mtu);
 /**
  *	dev_set_group - Change group this device belongs to
  *	@dev: device
  *	@new_group: group this device should belong to
  */
 void dev_set_group(struct net_device *dev, int new_group)
 {
 	dev->group = new_group;
 }
 EXPORT_SYMBOL(dev_set_group);
 /**
  *	dev_set_mac_address - Change Media Access Control Address
  *	@dev: device
  *	@sa: new address
  *
  *	Change the hardware (MAC) address of the device
  */
 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int err;
 	if (!ops->ndo_set_mac_address)
 		return -EOPNOTSUPP;
 	if (sa->sa_family != dev->type)
 		return -EINVAL;
 	if (!netif_device_present(dev))
 		return -ENODEV;
 	err = ops->ndo_set_mac_address(dev, sa);
 	if (err)
 		return err;
 	dev->addr_assign_type = NET_ADDR_SET;
 	call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	add_device_randomness(dev->dev_addr, dev->addr_len);
 	return 0;
 }
 EXPORT_SYMBOL(dev_set_mac_address);
 /**
  *	dev_change_carrier - Change device carrier
  *	@dev: device
  *	@new_carrier: new value
  *
  *	Change device carrier
  */
 int dev_change_carrier(struct net_device *dev, bool new_carrier)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	if (!ops->ndo_change_carrier)
 		return -EOPNOTSUPP;
 	if (!netif_device_present(dev))
 		return -ENODEV;
 	return ops->ndo_change_carrier(dev, new_carrier);
 }
 EXPORT_SYMBOL(dev_change_carrier);
 /**
  *	dev_get_phys_port_id - Get device physical port ID
  *	@dev: device
  *	@ppid: port ID
  *
  *	Get device physical port ID
  */
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	if (!ops->ndo_get_phys_port_id)
 		return -EOPNOTSUPP;
 	return ops->ndo_get_phys_port_id(dev, ppid);
 }
 EXPORT_SYMBOL(dev_get_phys_port_id);
 /**
  *	dev_new_index	-	allocate an ifindex
  *	@net: the applicable net namespace
  *
  *	Returns a suitable unique value for a new device interface
  *	number.  The caller must hold the rtnl semaphore or the
  *	dev_base_lock to be sure it remains unique.
  */
 static int dev_new_index(struct net *net)
 {
 	int ifindex = net->ifindex;
 	for (;;) {
 		if (++ifindex <= 0)
 			ifindex = 1;
 		if (!__dev_get_by_index(net, ifindex))
 			return net->ifindex = ifindex;
 	}
 }
 /* Delayed registration/unregisteration */
 static LIST_HEAD(net_todo_list);
 DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
 static void net_set_todo(struct net_device *dev)
 {
 	list_add_tail(&dev->todo_list, &net_todo_list);
 	dev_net(dev)->dev_unreg_count++;
 }
 static void rollback_registered_many(struct list_head *head)
 {
 	struct net_device *dev, *tmp;
 	LIST_HEAD(close_head);
 	BUG_ON(dev_boot_phase);
 	ASSERT_RTNL();
 	list_for_each_entry_safe(dev, tmp, head, unreg_list) {
 		/* Some devices call without registering
 		 * for initialization unwind. Remove those
 		 * devices and proceed with the remaining.
 		 */
 		if (dev->reg_state == NETREG_UNINITIALIZED) {
 			pr_debug("unregister_netdevice: device %s/%p never was registered\n",
 				 dev->name, dev);
 			WARN_ON(1);
 			list_del(&dev->unreg_list);
 			continue;
 		}
 		dev->dismantle = true;
 		BUG_ON(dev->reg_state != NETREG_REGISTERED);
 	}
 	/* If device is running, close it first. */
 	list_for_each_entry(dev, head, unreg_list)
 		list_add_tail(&dev->close_list, &close_head);
 	dev_close_many(&close_head);
 	list_for_each_entry(dev, head, unreg_list) {
 		/* And unlink it from device chain. */
 		unlist_netdevice(dev);
 		dev->reg_state = NETREG_UNREGISTERING;
 	}
 	synchronize_net();
 	list_for_each_entry(dev, head, unreg_list) {
 		/* Shutdown queueing discipline. */
 		dev_shutdown(dev);
 		/* Notify protocols, that we are about to destroy
 		   this device. They should clean all the things.
 		*/
 		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 		if (!dev->rtnl_link_ops ||
 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
 			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
 		/*
 		 *	Flush the unicast and multicast chains
 		 */
 		dev_uc_flush(dev);
 		dev_mc_flush(dev);
 		if (dev->netdev_ops->ndo_uninit)
 			dev->netdev_ops->ndo_uninit(dev);
 		/* Notifier chain MUST detach us all upper devices. */
 		WARN_ON(netdev_has_any_upper_dev(dev));
 		/* Remove entries from kobject tree */
 		netdev_unregister_kobject(dev);
 #ifdef CONFIG_XPS
 		/* Remove XPS queueing entries */
 		netif_reset_xps_queues_gt(dev, 0);
 #endif
 	}
 	synchronize_net();
 	list_for_each_entry(dev, head, unreg_list)
 		dev_put(dev);
 }
 static void rollback_registered(struct net_device *dev)
 {
 	LIST_HEAD(single);
 	list_add(&dev->unreg_list, &single);
 	rollback_registered_many(&single);
 	list_del(&single);
 }
 static netdev_features_t netdev_fix_features(struct net_device *dev,
 	netdev_features_t features)
 {
 	/* Fix illegal checksum combinations */
 	if ((features & NETIF_F_HW_CSUM) &&
 	    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
 		netdev_warn(dev, "mixed HW and IP checksum settings.\n");
 		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
 	}
 	/* TSO requires that SG is present as well. */
 	if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
 		netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
 		features &= ~NETIF_F_ALL_TSO;
 	}
 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
 					!(features & NETIF_F_IP_CSUM)) {
 		netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
 		features &= ~NETIF_F_TSO;
 		features &= ~NETIF_F_TSO_ECN;
 	}
 	if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
 					 !(features & NETIF_F_IPV6_CSUM)) {
 		netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
 		features &= ~NETIF_F_TSO6;
 	}
 	/* TSO ECN requires that TSO is present as well. */
 	if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
 		features &= ~NETIF_F_TSO_ECN;
 	/* Software GSO depends on SG. */
 	if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
 		netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
 		features &= ~NETIF_F_GSO;
 	}
 	/* UFO needs SG and checksumming */
 	if (features & NETIF_F_UFO) {
 		/* maybe split UFO into V4 and V6? */
 		if (!((features & NETIF_F_GEN_CSUM) ||
 		    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
 			    == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
 			netdev_dbg(dev,
 				"Dropping NETIF_F_UFO since no checksum offload features.\n");
 			features &= ~NETIF_F_UFO;
 		}
 		if (!(features & NETIF_F_SG)) {
 			netdev_dbg(dev,
 				"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
 			features &= ~NETIF_F_UFO;
 		}
 	}
 	return features;
 }
 int __netdev_update_features(struct net_device *dev)
 {
 	netdev_features_t features;
 	int err = 0;
 	ASSERT_RTNL();
 	features = netdev_get_wanted_features(dev);
 	if (dev->netdev_ops->ndo_fix_features)
 		features = dev->netdev_ops->ndo_fix_features(dev, features);
 	/* driver might be less strict about feature dependencies */
 	features = netdev_fix_features(dev, features);
 	if (dev->features == features)
 		return 0;
 	netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
 		&dev->features, &features);
 	if (dev->netdev_ops->ndo_set_features)
 		err = dev->netdev_ops->ndo_set_features(dev, features);
 	if (unlikely(err < 0)) {
 		netdev_err(dev,
 			"set_features() failed (%d); wanted %pNF, left %pNF\n",
 			err, &features, &dev->features);
 		return -1;
 	}
 	if (!err)
 		dev->features = features;
 	return 1;
 }
 /**
  *	netdev_update_features - recalculate device features
  *	@dev: the device to check
  *
  *	Recalculate dev->features set and send notifications if it
  *	has changed. Should be called after driver or hardware dependent
  *	conditions might have changed that influence the features.
  */
 void netdev_update_features(struct net_device *dev)
 {
 	if (__netdev_update_features(dev))
 		netdev_features_change(dev);
 }
 EXPORT_SYMBOL(netdev_update_features);
 /**
  *	netdev_change_features - recalculate device features
  *	@dev: the device to check
  *
  *	Recalculate dev->features set and send notifications even
  *	if they have not changed. Should be called instead of
  *	netdev_update_features() if also dev->vlan_features might
  *	have changed to allow the changes to be propagated to stacked
  *	VLAN devices.
  */
 void netdev_change_features(struct net_device *dev)
 {
 	__netdev_update_features(dev);
 	netdev_features_change(dev);
 }
 EXPORT_SYMBOL(netdev_change_features);
 /**
  *	netif_stacked_transfer_operstate -	transfer operstate
  *	@rootdev: the root or lower level device to transfer state from
  *	@dev: the device to transfer operstate to
  *
  *	Transfer operational state from root to device. This is normally
  *	called when a stacking relationship exists between the root
  *	device and the device(a leaf device).
  */
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 					struct net_device *dev)
 {
 	if (rootdev->operstate == IF_OPER_DORMANT)
 		netif_dormant_on(dev);
 	else
 		netif_dormant_off(dev);
 	if (netif_carrier_ok(rootdev)) {
 		if (!netif_carrier_ok(dev))
 			netif_carrier_on(dev);
 	} else {
 		if (netif_carrier_ok(dev))
 			netif_carrier_off(dev);
 	}
 }
 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
 #ifdef CONFIG_SYSFS
 static int netif_alloc_rx_queues(struct net_device *dev)
 {
 	unsigned int i, count = dev->num_rx_queues;
 	struct netdev_rx_queue *rx;
 	BUG_ON(count < 1);
 	rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
 	if (!rx)
 		return -ENOMEM;
 	dev->_rx = rx;
 	for (i = 0; i < count; i++)
 		rx[i].dev = dev;
 	return 0;
 }
 #endif
 static void netdev_init_one_queue(struct net_device *dev,
 				  struct netdev_queue *queue, void *_unused)
 {
 	/* Initialize queue lock */
 	spin_lock_init(&queue->_xmit_lock);
 	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
 	queue->xmit_lock_owner = -1;
 	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
 	queue->dev = dev;
 #ifdef CONFIG_BQL
 	dql_init(&queue->dql, HZ);
 #endif
 }
 static void netif_free_tx_queues(struct net_device *dev)
 {
 	if (is_vmalloc_addr(dev->_tx))
 		vfree(dev->_tx);
 	else
 		kfree(dev->_tx);
 }
 static int netif_alloc_netdev_queues(struct net_device *dev)
 {
 	unsigned int count = dev->num_tx_queues;
 	struct netdev_queue *tx;
 	size_t sz = count * sizeof(*tx);
 	BUG_ON(count < 1 || count > 0xffff);
 	tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
 	if (!tx) {
 		tx = vzalloc(sz);
 		if (!tx)
 			return -ENOMEM;
 	}
 	dev->_tx = tx;
 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
 	spin_lock_init(&dev->tx_global_lock);
 	return 0;
 }
 /**
  *	register_netdevice	- register a network device
  *	@dev: device to register
  *
  *	Take a completed network device structure and add it to the kernel
  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
  *	chain. 0 is returned on success. A negative errno code is returned
  *	on a failure to set up the device, or if the name is a duplicate.
  *
  *	Callers must hold the rtnl semaphore. You may want
  *	register_netdev() instead of this.
  *
  *	BUGS:
  *	The locking appears insufficient to guarantee two parallel registers
  *	will not get the same name.
  */
 int register_netdevice(struct net_device *dev)
 {
 	int ret;
 	struct net *net = dev_net(dev);
 	BUG_ON(dev_boot_phase);
 	ASSERT_RTNL();
 	might_sleep();
 	/* When net_device's are persistent, this will be fatal. */
 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
 	BUG_ON(!net);
 	spin_lock_init(&dev->addr_list_lock);
 	netdev_set_addr_lockdep_class(dev);
 	dev->iflink = -1;
 	ret = dev_get_valid_name(net, dev, dev->name);
 	if (ret < 0)
 		goto out;
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
 		ret = dev->netdev_ops->ndo_init(dev);
 		if (ret) {
 			if (ret > 0)
 				ret = -EIO;
 			goto out;
 		}
 	}
 	if (((dev->hw_features | dev->features) &
 	     NETIF_F_HW_VLAN_CTAG_FILTER) &&
 	    (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
 	     !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
 		netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
 		ret = -EINVAL;
 		goto err_uninit;
 	}
 	ret = -EBUSY;
 	if (!dev->ifindex)
 		dev->ifindex = dev_new_index(net);
 	else if (__dev_get_by_index(net, dev->ifindex))
 		goto err_uninit;
 	if (dev->iflink == -1)
 		dev->iflink = dev->ifindex;
 	/* Transfer changeable features to wanted_features and enable
 	 * software offloads (GSO and GRO).
 	 */
 	dev->hw_features |= NETIF_F_SOFT_FEATURES;
 	dev->features |= NETIF_F_SOFT_FEATURES;
 	dev->wanted_features = dev->features & dev->hw_features;
 	if (!(dev->flags & IFF_LOOPBACK)) {
 		dev->hw_features |= NETIF_F_NOCACHE_COPY;
 	}
 	/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
 	 */
 	dev->vlan_features |= NETIF_F_HIGHDMA;
 	/* Make NETIF_F_SG inheritable to tunnel devices.
 	 */
 	dev->hw_enc_features |= NETIF_F_SG;
 	/* Make NETIF_F_SG inheritable to MPLS.
 	 */
 	dev->mpls_features |= NETIF_F_SG;
 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
 	ret = notifier_to_errno(ret);
 	if (ret)
 		goto err_uninit;
 	ret = netdev_register_kobject(dev);
 	if (ret)
 		goto err_uninit;
 	dev->reg_state = NETREG_REGISTERED;
 	__netdev_update_features(dev);
 	/*
 	 *	Default initial state at registry is that the
 	 *	device is present.
 	 */
 	set_bit(__LINK_STATE_PRESENT, &dev->state);
 	linkwatch_init_dev(dev);
 	dev_init_scheduler(dev);
 	dev_hold(dev);
 	list_netdevice(dev);
 	add_device_randomness(dev->dev_addr, dev->addr_len);
 	/* If the device has permanent device address, driver should
 	 * set dev_addr and also addr_assign_type should be set to
 	 * NET_ADDR_PERM (default value).
 	 */
 	if (dev->addr_assign_type == NET_ADDR_PERM)
 		memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
 	/* Notify protocols, that a new device appeared. */
 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
 	ret = notifier_to_errno(ret);
 	if (ret) {
 		rollback_registered(dev);
 		dev->reg_state = NETREG_UNREGISTERED;
 	}
 	/*
 	 *	Prevent userspace races by waiting until the network
 	 *	device is fully setup before sending notifications.
 	 */
 	if (!dev->rtnl_link_ops ||
 	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
 		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
 out:
 	return ret;
 err_uninit:
 	if (dev->netdev_ops->ndo_uninit)
 		dev->netdev_ops->ndo_uninit(dev);
 	goto out;
 }
 EXPORT_SYMBOL(register_netdevice);
 /**
  *	init_dummy_netdev	- init a dummy network device for NAPI
  *	@dev: device to init
  *
  *	This takes a network device structure and initialize the minimum
  *	amount of fields so it can be used to schedule NAPI polls without
  *	registering a full blown interface. This is to be used by drivers
  *	that need to tie several hardware interfaces to a single NAPI
  *	poll scheduler due to HW limitations.
  */
 int init_dummy_netdev(struct net_device *dev)
 {
 	/* Clear everything. Note we don't initialize spinlocks
 	 * are they aren't supposed to be taken by any of the
 	 * NAPI code and this dummy netdev is supposed to be
 	 * only ever used for NAPI polls
 	 */
 	memset(dev, 0, sizeof(struct net_device));
 	/* make sure we BUG if trying to hit standard
 	 * register/unregister code path
 	 */
 	dev->reg_state = NETREG_DUMMY;
 	/* NAPI wants this */
 	INIT_LIST_HEAD(&dev->napi_list);
 	/* a dummy interface is started by default */
 	set_bit(__LINK_STATE_PRESENT, &dev->state);
 	set_bit(__LINK_STATE_START, &dev->state);
 	/* Note : We dont allocate pcpu_refcnt for dummy devices,
 	 * because users of this 'device' dont need to change
 	 * its refcount.
 	 */
 	return 0;
 }
 EXPORT_SYMBOL_GPL(init_dummy_netdev);
 /**
  *	register_netdev	- register a network device
  *	@dev: device to register
  *
  *	Take a completed network device structure and add it to the kernel
  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
  *	chain. 0 is returned on success. A negative errno code is returned
  *	on a failure to set up the device, or if the name is a duplicate.
  *
  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
  *	and expands the device name if you passed a format string to
  *	alloc_netdev.
  */
 int register_netdev(struct net_device *dev)
 {
 	int err;
 	rtnl_lock();
 	err = register_netdevice(dev);
 	rtnl_unlock();
 	return err;
 }
 EXPORT_SYMBOL(register_netdev);
 int netdev_refcnt_read(const struct net_device *dev)
 {
 	int i, refcnt = 0;
 	for_each_possible_cpu(i)
 		refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
 	return refcnt;
 }
 EXPORT_SYMBOL(netdev_refcnt_read);
 /**
  * netdev_wait_allrefs - wait until all references are gone.
  * @dev: target net_device
  *
  * This is called when unregistering network devices.
  *
  * Any protocol or device that holds a reference should register
  * for netdevice notification, and cleanup and put back the
  * reference if they receive an UNREGISTER event.
  * We can get stuck here if buggy protocols don't correctly
  * call dev_put.
  */
 static void netdev_wait_allrefs(struct net_device *dev)
 {
 	unsigned long rebroadcast_time, warning_time;
 	int refcnt;
 	linkwatch_forget_dev(dev);
 	rebroadcast_time = warning_time = jiffies;
 	refcnt = netdev_refcnt_read(dev);
 	while (refcnt != 0) {
 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
 			rtnl_lock();
 			/* Rebroadcast unregister notification */
 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 			__rtnl_unlock();
 			rcu_barrier();
 			rtnl_lock();
 			call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
 				     &dev->state)) {
 				/* We must not have linkwatch events
 				 * pending on unregister. If this
 				 * happens, we simply run the queue
 				 * unscheduled, resulting in a noop
 				 * for this device.
 				 */
 				linkwatch_run_queue();
 			}
 			__rtnl_unlock();
 			rebroadcast_time = jiffies;
 		}
 		msleep(250);
 		refcnt = netdev_refcnt_read(dev);
 		if (time_after(jiffies, warning_time + 10 * HZ)) {
 			pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
 				 dev->name, refcnt);
 			warning_time = jiffies;
 		}
 	}
 }
 /* The sequence is:
  *
  *	rtnl_lock();
  *	...
  *	register_netdevice(x1);
  *	register_netdevice(x2);
  *	...
  *	unregister_netdevice(y1);
  *	unregister_netdevice(y2);
  *      ...
  *	rtnl_unlock();
  *	free_netdev(y1);
  *	free_netdev(y2);
  *
  * We are invoked by rtnl_unlock().
  * This allows us to deal with problems:
  * 1) We can delete sysfs objects which invoke hotplug
  *    without deadlocking with linkwatch via keventd.
  * 2) Since we run with the RTNL semaphore not held, we can sleep
  *    safely in order to wait for the netdev refcnt to drop to zero.
  *
  * We must not return until all unregister events added during
  * the interval the lock was held have been completed.
  */
 void netdev_run_todo(void)
 {
 	struct list_head list;
 	/* Snapshot list, allow later requests */
 	list_replace_init(&net_todo_list, &list);
 	__rtnl_unlock();
 	/* Wait for rcu callbacks to finish before next phase */
 	if (!list_empty(&list))
 		rcu_barrier();
 	while (!list_empty(&list)) {
 		struct net_device *dev
 			= list_first_entry(&list, struct net_device, todo_list);
 		list_del(&dev->todo_list);
 		rtnl_lock();
 		call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
 		__rtnl_unlock();
 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
 			pr_err("network todo '%s' but state %d\n",
 			       dev->name, dev->reg_state);
 			dump_stack();
 			continue;
 		}
 		dev->reg_state = NETREG_UNREGISTERED;
 		on_each_cpu(flush_backlog, dev, 1);
 		netdev_wait_allrefs(dev);
 		/* paranoia */
 		BUG_ON(netdev_refcnt_read(dev));
 		WARN_ON(rcu_access_pointer(dev->ip_ptr));
 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
 		WARN_ON(dev->dn_ptr);
 		if (dev->destructor)
 			dev->destructor(dev);
 		/* Report a network device has been unregistered */
 		rtnl_lock();
 		dev_net(dev)->dev_unreg_count--;
 		__rtnl_unlock();
 		wake_up(&netdev_unregistering_wq);
 		/* Free network device */
 		kobject_put(&dev->dev.kobj);
 	}
 }
 /* Convert net_device_stats to rtnl_link_stats64.  They have the same
  * fields in the same order, with only the type differing.
  */
 void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
 			     const struct net_device_stats *netdev_stats)
 {
 #if BITS_PER_LONG == 64
 	BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
 	memcpy(stats64, netdev_stats, sizeof(*stats64));
 #else
 	size_t i, n = sizeof(*stats64) / sizeof(u64);
 	const unsigned long *src = (const unsigned long *)netdev_stats;
 	u64 *dst = (u64 *)stats64;
 	BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
 		     sizeof(*stats64) / sizeof(u64));
 	for (i = 0; i < n; i++)
 		dst[i] = src[i];
 #endif
 }
 EXPORT_SYMBOL(netdev_stats_to_stats64);
 /**
  *	dev_get_stats	- get network device statistics
  *	@dev: device to get statistics from
  *	@storage: place to store stats
  *
  *	Get network statistics from device. Return @storage.
  *	The device driver may provide its own method by setting
  *	dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
  *	otherwise the internal statistics structure is used.
  */
 struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 					struct rtnl_link_stats64 *storage)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	if (ops->ndo_get_stats64) {
 		memset(storage, 0, sizeof(*storage));
 		ops->ndo_get_stats64(dev, storage);
 	} else if (ops->ndo_get_stats) {
 		netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
 	} else {
 		netdev_stats_to_stats64(storage, &dev->stats);
 	}
 	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
 	return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 {
 	struct netdev_queue *queue = dev_ingress_queue(dev);
 #ifdef CONFIG_NET_CLS_ACT
 	if (queue)
 		return queue;
 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
 	if (!queue)
 		return NULL;
 	netdev_init_one_queue(dev, queue, NULL);
 	queue->qdisc = &noop_qdisc;
 	queue->qdisc_sleeping = &noop_qdisc;
 	rcu_assign_pointer(dev->ingress_queue, queue);
 #endif
 	return queue;
 }
 static const struct ethtool_ops default_ethtool_ops;
 void netdev_set_default_ethtool_ops(struct net_device *dev,
 				    const struct ethtool_ops *ops)
 {
 	if (dev->ethtool_ops == &default_ethtool_ops)
 		dev->ethtool_ops = ops;
 }
 EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
 void netdev_freemem(struct net_device *dev)
 {
 	char *addr = (char *)dev - dev->padded;
 	if (is_vmalloc_addr(addr))
 		vfree(addr);
 	else
 		kfree(addr);
 }
 /**
  *	alloc_netdev_mqs - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
  *	@name:		device name format string
  *	@setup:		callback to initialize device
  *	@txqs:		the number of TX subqueues to allocate
  *	@rxqs:		the number of RX subqueues to allocate
  *
  *	Allocates a struct net_device with private data area for driver use
  *	and performs basic initialization.  Also allocates subqueue structs
  *	for each queue on the device.
  */
 struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 		void (*setup)(struct net_device *),
 		unsigned int txqs, unsigned int rxqs)
 {
 	struct net_device *dev;
 	size_t alloc_size;
 	struct net_device *p;
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 	if (txqs < 1) {
 		pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
 		return NULL;
 	}
 #ifdef CONFIG_SYSFS
 	if (rxqs < 1) {
 		pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
 		return NULL;
 	}
 #endif
 	alloc_size = sizeof(struct net_device);
 	if (sizeof_priv) {
 		/* ensure 32-byte alignment of private area */
 		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
 		alloc_size += sizeof_priv;
 	}
 	/* ensure 32-byte alignment of whole construct */
 	alloc_size += NETDEV_ALIGN - 1;
 	p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
 	if (!p)
 		p = vzalloc(alloc_size);
 	if (!p)
 		return NULL;
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 	dev->pcpu_refcnt = alloc_percpu(int);
 	if (!dev->pcpu_refcnt)
 		goto free_dev;
 	if (dev_addr_init(dev))
 		goto free_pcpu;
 	dev_mc_init(dev);
 	dev_uc_init(dev);
 	dev_net_set(dev, &init_net);
 	dev->gso_max_size = GSO_MAX_SIZE;
 	dev->gso_max_segs = GSO_MAX_SEGS;
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
 	INIT_LIST_HEAD(&dev->close_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
 	INIT_LIST_HEAD(&dev->adj_list.upper);
 	INIT_LIST_HEAD(&dev->adj_list.lower);
 	INIT_LIST_HEAD(&dev->all_adj_list.upper);
 	INIT_LIST_HEAD(&dev->all_adj_list.lower);
 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
 	setup(dev);
 	dev->num_tx_queues = txqs;
 	dev->real_num_tx_queues = txqs;
 	if (netif_alloc_netdev_queues(dev))
 		goto free_all;
 #ifdef CONFIG_SYSFS
 	dev->num_rx_queues = rxqs;
 	dev->real_num_rx_queues = rxqs;
 	if (netif_alloc_rx_queues(dev))
 		goto free_all;
 #endif
 	strcpy(dev->name, name);
 	dev->group = INIT_NETDEV_GROUP;
 	if (!dev->ethtool_ops)
 		dev->ethtool_ops = &default_ethtool_ops;
 	return dev;
 free_all:
 	free_netdev(dev);
 	return NULL;
 free_pcpu:
 	free_percpu(dev->pcpu_refcnt);
 	netif_free_tx_queues(dev);
 #ifdef CONFIG_SYSFS
 	kfree(dev->_rx);
 #endif
 free_dev:
 	netdev_freemem(dev);
 	return NULL;
 }
 EXPORT_SYMBOL(alloc_netdev_mqs);
 /**
  *	free_netdev - free network device
  *	@dev: device
  *
  *	This function does the last stage of destroying an allocated device
  * 	interface. The reference to the device object is released.
  *	If this is the last reference then it will be freed.
  */
 void free_netdev(struct net_device *dev)
 {
 	struct napi_struct *p, *n;
 	release_net(dev_net(dev));
 	netif_free_tx_queues(dev);
 #ifdef CONFIG_SYSFS
 	kfree(dev->_rx);
 #endif
 	kfree(rcu_dereference_protected(dev->ingress_queue, 1));
 	/* Flush device addresses */
 	dev_addr_flush(dev);
 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
 		netif_napi_del(p);
 	free_percpu(dev->pcpu_refcnt);
 	dev->pcpu_refcnt = NULL;
 	/*  Compatibility with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
 		netdev_freemem(dev);
 		return;
 	}
 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
 	dev->reg_state = NETREG_RELEASED;
 	/* will free via device release */
 	put_device(&dev->dev);
 }
 EXPORT_SYMBOL(free_netdev);
 /**
  *	synchronize_net -  Synchronize with packet receive processing
  *
  *	Wait for packets currently being received to be done.
  *	Does not block later packets from starting.
  */
 void synchronize_net(void)
 {
 	might_sleep();
 	if (rtnl_is_locked())
 		synchronize_rcu_expedited();
 	else
 		synchronize_rcu();
 }
 EXPORT_SYMBOL(synchronize_net);
 /**
  *	unregister_netdevice_queue - remove device from the kernel
  *	@dev: device
  *	@head: list
  *
  *	This function shuts down a device interface and removes it
  *	from the kernel tables.
  *	If head not NULL, device is queued to be unregistered later.
  *
  *	Callers must hold the rtnl semaphore.  You may want
  *	unregister_netdev() instead of this.
  */
 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
 {
 	ASSERT_RTNL();
 	if (head) {
 		list_move_tail(&dev->unreg_list, head);
 	} else {
 		rollback_registered(dev);
 		/* Finish processing unregister after unlock */
 		net_set_todo(dev);
 	}
 }
 EXPORT_SYMBOL(unregister_netdevice_queue);
 /**
  *	unregister_netdevice_many - unregister many devices
  *	@head: list of devices
  */
 void unregister_netdevice_many(struct list_head *head)
 {
 	struct net_device *dev;
 	if (!list_empty(head)) {
 		rollback_registered_many(head);
 		list_for_each_entry(dev, head, unreg_list)
 			net_set_todo(dev);
 	}
 }
 EXPORT_SYMBOL(unregister_netdevice_many);
 /**
  *	unregister_netdev - remove device from the kernel
  *	@dev: device
  *
  *	This function shuts down a device interface and removes it
  *	from the kernel tables.
  *
  *	This is just a wrapper for unregister_netdevice that takes
  *	the rtnl semaphore.  In general you want to use this and not
  *	unregister_netdevice.
  */
 void unregister_netdev(struct net_device *dev)
 {
 	rtnl_lock();
 	unregister_netdevice(dev);
 	rtnl_unlock();
 }
 EXPORT_SYMBOL(unregister_netdev);
 /**
  *	dev_change_net_namespace - move device to different nethost namespace
  *	@dev: device
  *	@net: network namespace
  *	@pat: If not NULL name pattern to try if the current device name
  *	      is already taken in the destination network namespace.
  *
  *	This function shuts down a device interface and moves it
  *	to a new network namespace. On success 0 is returned, on
  *	a failure a netagive errno code is returned.
  *
  *	Callers must hold the rtnl semaphore.
  */
 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
 {
 	int err;
 	ASSERT_RTNL();
 	/* Don't allow namespace local devices to be moved. */
 	err = -EINVAL;
 	if (dev->features & NETIF_F_NETNS_LOCAL)
 		goto out;
 	/* Ensure the device has been registrered */
 	if (dev->reg_state != NETREG_REGISTERED)
 		goto out;
 	/* Get out if there is nothing todo */
 	err = 0;
 	if (net_eq(dev_net(dev), net))
 		goto out;
 	/* Pick the destination device name, and ensure
 	 * we can use it in the destination network namespace.
 	 */
 	err = -EEXIST;
 	if (__dev_get_by_name(net, dev->name)) {
 		/* We get here if we can't use the current device name */
 		if (!pat)
 			goto out;
 		if (dev_get_valid_name(net, dev, pat) < 0)
 			goto out;
 	}
 	/*
 	 * And now a mini version of register_netdevice unregister_netdevice.
 	 */
 	/* If device is running close it first. */
 	dev_close(dev);
 	/* And unlink it from device chain */
 	err = -ENODEV;
 	unlist_netdevice(dev);
 	synchronize_net();
 	/* Shutdown queueing discipline. */
 	dev_shutdown(dev);
 	/* Notify protocols, that we are about to destroy
 	   this device. They should clean all the things.
 	   Note that dev->reg_state stays at NETREG_REGISTERED.
 	   This is wanted because this way 8021q and macvlan know
 	   the device is just moving and can keep their slaves up.
 	*/
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 	rcu_barrier();
 	call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
 	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
 	/*
 	 *	Flush the unicast and multicast chains
 	 */
 	dev_uc_flush(dev);
 	dev_mc_flush(dev);
 	/* Send a netdev-removed uevent to the old namespace */
 	kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
 	/* Actually switch the network namespace */
 	dev_net_set(dev, net);
 	/* If there is an ifindex conflict assign a new one */
 	if (__dev_get_by_index(net, dev->ifindex)) {
 		int iflink = (dev->iflink == dev->ifindex);
 		dev->ifindex = dev_new_index(net);
 		if (iflink)
 			dev->iflink = dev->ifindex;
 	}
 	/* Send a netdev-add uevent to the new namespace */
 	kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
 	/* Fixup kobjects */
 	err = device_rename(&dev->dev, dev->name);
 	WARN_ON(err);
 	/* Add the device back in the hashes */
 	list_netdevice(dev);
 	/* Notify protocols, that a new device appeared. */
 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
 	/*
 	 *	Prevent userspace races by waiting until the network
 	 *	device is fully setup before sending notifications.
 	 */
 	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
 	synchronize_net();
 	err = 0;
 out:
 	return err;
 }
 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
 static int dev_cpu_callback(struct notifier_block *nfb,
 			    unsigned long action,
 			    void *ocpu)
 {
 	struct sk_buff **list_skb;
 	struct sk_buff *skb;
 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
 	struct softnet_data *sd, *oldsd;
 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
 		return NOTIFY_OK;
 	local_irq_disable();
 	cpu = smp_processor_id();
 	sd = &per_cpu(softnet_data, cpu);
 	oldsd = &per_cpu(softnet_data, oldcpu);
 	/* Find end of our completion_queue. */
 	list_skb = &sd->completion_queue;
 	while (*list_skb)
 		list_skb = &(*list_skb)->next;
 	/* Append completion queue from offline CPU. */
 	*list_skb = oldsd->completion_queue;
 	oldsd->completion_queue = NULL;
 	/* Append output queue from offline CPU. */
 	if (oldsd->output_queue) {
 		*sd->output_queue_tailp = oldsd->output_queue;
 		sd->output_queue_tailp = oldsd->output_queue_tailp;
 		oldsd->output_queue = NULL;
 		oldsd->output_queue_tailp = &oldsd->output_queue;
 	}
 	/* Append NAPI poll list from offline CPU. */
 	if (!list_empty(&oldsd->poll_list)) {
 		list_splice_init(&oldsd->poll_list, &sd->poll_list);
 		raise_softirq_irqoff(NET_RX_SOFTIRQ);
 	}
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_enable();
 	/* Process offline CPU's input_pkt_queue */
 	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
 		netif_rx_internal(skb);
 		input_queue_head_incr(oldsd);
 	}
 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx_internal(skb);
 		input_queue_head_incr(oldsd);
 	}
 	return NOTIFY_OK;
 }
 /**
  *	netdev_increment_features - increment feature set by one
  *	@all: current feature set
  *	@one: new feature set
  *	@mask: mask feature set
  *
  *	Computes a new feature set after adding a device with feature set
  *	@one to the master device with current feature set @all.  Will not
  *	enable anything that is off in @mask. Returns the new feature set.
  */
 netdev_features_t netdev_increment_features(netdev_features_t all,
 	netdev_features_t one, netdev_features_t mask)
 {
 	if (mask & NETIF_F_GEN_CSUM)
 		mask |= NETIF_F_ALL_CSUM;
 	mask |= NETIF_F_VLAN_CHALLENGED;
 	all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
 	all &= one | ~NETIF_F_ALL_FOR_ALL;
 	/* If one device supports hw checksumming, set for all. */
 	if (all & NETIF_F_GEN_CSUM)
 		all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
 	return all;
 }
 EXPORT_SYMBOL(netdev_increment_features);
 static struct hlist_head * __net_init netdev_create_hash(void)
 {
 	int i;
 	struct hlist_head *hash;
 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
 	if (hash != NULL)
 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
 			INIT_HLIST_HEAD(&hash[i]);
 	return hash;
 }
 /* Initialize per network namespace state */
 static int __net_init netdev_init(struct net *net)
 {
 	if (net != &init_net)
 		INIT_LIST_HEAD(&net->dev_base_head);
 	net->dev_name_head = netdev_create_hash();
 	if (net->dev_name_head == NULL)
 		goto err_name;
 	net->dev_index_head = netdev_create_hash();
 	if (net->dev_index_head == NULL)
 		goto err_idx;
 	return 0;
 err_idx:
 	kfree(net->dev_name_head);
 err_name:
 	return -ENOMEM;
 }
 /**
  *	netdev_drivername - network driver for the device
  *	@dev: network device
  *
  *	Determine network driver for device.
  */
 const char *netdev_drivername(const struct net_device *dev)
 {
 	const struct device_driver *driver;
 	const struct device *parent;
 	const char *empty = "";
 	parent = dev->dev.parent;
 	if (!parent)
 		return empty;
 	driver = parent->driver;
 	if (driver && driver->name)
 		return driver->name;
 	return empty;
 }
 static int __netdev_printk(const char *level, const struct net_device *dev,
 			   struct va_format *vaf)
 {
 	int r;
 	if (dev && dev->dev.parent) {
 		r = dev_printk_emit(level[1] - '0',
 				    dev->dev.parent,
 				    "%s %s %s: %pV",
 				    dev_driver_string(dev->dev.parent),
 				    dev_name(dev->dev.parent),
 				    netdev_name(dev), vaf);
 	} else if (dev) {
 		r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
 	} else {
 		r = printk("%s(NULL net_device): %pV", level, vaf);
 	}
 	return r;
 }
 int netdev_printk(const char *level, const struct net_device *dev,
 		  const char *format, ...)
 {
 	struct va_format vaf;
 	va_list args;
 	int r;
 	va_start(args, format);
 	vaf.fmt = format;
 	vaf.va = &args;
 	r = __netdev_printk(level, dev, &vaf);
 	va_end(args);
 	return r;
 }
 EXPORT_SYMBOL(netdev_printk);
 #define define_netdev_printk_level(func, level)			\
 int func(const struct net_device *dev, const char *fmt, ...)	\
 {								\
 	int r;							\
 	struct va_format vaf;					\
 	va_list args;						\
 								\
 	va_start(args, fmt);					\
 								\
 	vaf.fmt = fmt;						\
 	vaf.va = &args;						\
 								\
 	r = __netdev_printk(level, dev, &vaf);			\
 								\
 	va_end(args);						\
 								\
 	return r;						\
 }								\
 EXPORT_SYMBOL(func);
 define_netdev_printk_level(netdev_emerg, KERN_EMERG);
 define_netdev_printk_level(netdev_alert, KERN_ALERT);
 define_netdev_printk_level(netdev_crit, KERN_CRIT);
 define_netdev_printk_level(netdev_err, KERN_ERR);
 define_netdev_printk_level(netdev_warn, KERN_WARNING);
 define_netdev_printk_level(netdev_notice, KERN_NOTICE);
 define_netdev_printk_level(netdev_info, KERN_INFO);
 static void __net_exit netdev_exit(struct net *net)
 {
 	kfree(net->dev_name_head);
 	kfree(net->dev_index_head);
 }
 static struct pernet_operations __net_initdata netdev_net_ops = {
 	.init = netdev_init,
 	.exit = netdev_exit,
 };
 static void __net_exit default_device_exit(struct net *net)
 {
 	struct net_device *dev, *aux;
 	/*
 	 * Push all migratable network devices back to the
 	 * initial network namespace
 	 */
 	rtnl_lock();
 	for_each_netdev_safe(net, dev, aux) {
 		int err;
 		char fb_name[IFNAMSIZ];
 		/* Ignore unmoveable devices (i.e. loopback) */
 		if (dev->features & NETIF_F_NETNS_LOCAL)
 			continue;
 		/* Leave virtual devices for the generic cleanup */
 		if (dev->rtnl_link_ops)
 			continue;
 		/* Push remaining network devices to init_net */
 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
 		err = dev_change_net_namespace(dev, &init_net, fb_name);
 		if (err) {
 			pr_emerg("%s: failed to move %s to init_net: %d\n",
 				 __func__, dev->name, err);
 			BUG();
 		}
 	}
 	rtnl_unlock();
 }
 static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
 {
 	/* Return with the rtnl_lock held when there are no network
 	 * devices unregistering in any network namespace in net_list.
 	 */
 	struct net *net;
 	bool unregistering;
 	DEFINE_WAIT(wait);
 	for (;;) {
 		prepare_to_wait(&netdev_unregistering_wq, &wait,
 				TASK_UNINTERRUPTIBLE);
 		unregistering = false;
 		rtnl_lock();
 		list_for_each_entry(net, net_list, exit_list) {
 			if (net->dev_unreg_count > 0) {
 				unregistering = true;
 				break;
 			}
 		}
 		if (!unregistering)
 			break;
 		__rtnl_unlock();
 		schedule();
 	}
 	finish_wait(&netdev_unregistering_wq, &wait);
 }
 static void __net_exit default_device_exit_batch(struct list_head *net_list)
 {
 	/* At exit all network devices most be removed from a network
 	 * namespace.  Do this in the reverse order of registration.
 	 * Do this across as many network namespaces as possible to
 	 * improve batching efficiency.
 	 */
 	struct net_device *dev;
 	struct net *net;
 	LIST_HEAD(dev_kill_list);
 	/* To prevent network device cleanup code from dereferencing
 	 * loopback devices or network devices that have been freed
 	 * wait here for all pending unregistrations to complete,
 	 * before unregistring the loopback device and allowing the
 	 * network namespace be freed.
 	 *
 	 * The netdev todo list containing all network devices
 	 * unregistrations that happen in default_device_exit_batch
 	 * will run in the rtnl_unlock() at the end of
 	 * default_device_exit_batch.
 	 */
 	rtnl_lock_unregistering(net_list);
 	list_for_each_entry(net, net_list, exit_list) {
 		for_each_netdev_reverse(net, dev) {
 			if (dev->rtnl_link_ops)
 				dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
 			else
 				unregister_netdevice_queue(dev, &dev_kill_list);
 		}
 	}
 	unregister_netdevice_many(&dev_kill_list);
 	list_del(&dev_kill_list);
 	rtnl_unlock();
 }
 static struct pernet_operations __net_initdata default_device_ops = {
 	.exit = default_device_exit,
 	.exit_batch = default_device_exit_batch,
 };
 /*
  *	Initialize the DEV module. At boot time this walks the device list and
  *	unhooks any devices that fail to initialise (normally hardware not
  *	present) and leaves us with a valid list of present and active devices.
  *
  */
 /*
  *       This is called single threaded during boot, so no need
  *       to take the rtnl semaphore.
  */
 static int __init net_dev_init(void)
 {
 	int i, rc = -ENOMEM;
 	BUG_ON(!dev_boot_phase);
 	if (dev_proc_init())
 		goto out;
 	if (netdev_kobject_init())
 		goto out;
 	INIT_LIST_HEAD(&ptype_all);
 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&ptype_base[i]);
 	INIT_LIST_HEAD(&offload_base);
 	if (register_pernet_subsys(&netdev_net_ops))
 		goto out;
 	/*
 	 *	Initialise the packet receive queues.
 	 */
 	for_each_possible_cpu(i) {
 		struct softnet_data *sd = &per_cpu(softnet_data, i);
 		skb_queue_head_init(&sd->input_pkt_queue);
 		skb_queue_head_init(&sd->process_queue);
 		INIT_LIST_HEAD(&sd->poll_list);
 		sd->output_queue_tailp = &sd->output_queue;
 #ifdef CONFIG_RPS
 		sd->csd.func = rps_trigger_softirq;
 		sd->csd.info = sd;
 		sd->cpu = i;
 #endif
 		sd->backlog.poll = process_backlog;
 		sd->backlog.weight = weight_p;
 	}
 	dev_boot_phase = 0;
 	/* The loopback device is special if any other network devices
 	 * is present in a network namespace the loopback device must
 	 * be present. Since we now dynamically allocate and free the
 	 * loopback device ensure this invariant is maintained by
 	 * keeping the loopback device as the first device on the
 	 * list of network devices.  Ensuring the loopback devices
 	 * is the first device that appears and the last network device
 	 * that disappears.
 	 */
 	if (register_pernet_device(&loopback_net_ops))
 		goto out;
 	if (register_pernet_device(&default_device_ops))
 		goto out;
 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
 	hotcpu_notifier(dev_cpu_callback, 0);
 	dst_init();
 	rc = 0;
 out:
 	return rc;
 }
 subsys_initcall(net_dev_init);